diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/assigned-dev.c | 15 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 7 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 4 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 6 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 129 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 15 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 178 |
7 files changed, 246 insertions, 108 deletions
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 02ff2b19dbe2..7c98928b09d9 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Kernel-based Virtual Machine - device assignment support | 2 | * Kernel-based Virtual Machine - device assignment support |
3 | * | 3 | * |
4 | * Copyright (C) 2006-9 Red Hat, Inc | 4 | * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. |
5 | * | 5 | * |
6 | * This work is licensed under the terms of the GNU GPL, version 2. See | 6 | * This work is licensed under the terms of the GNU GPL, version 2. See |
7 | * the COPYING file in the top-level directory. | 7 | * the COPYING file in the top-level directory. |
@@ -58,12 +58,10 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | |||
58 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | 58 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) |
59 | { | 59 | { |
60 | struct kvm_assigned_dev_kernel *assigned_dev; | 60 | struct kvm_assigned_dev_kernel *assigned_dev; |
61 | struct kvm *kvm; | ||
62 | int i; | 61 | int i; |
63 | 62 | ||
64 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, | 63 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, |
65 | interrupt_work); | 64 | interrupt_work); |
66 | kvm = assigned_dev->kvm; | ||
67 | 65 | ||
68 | spin_lock_irq(&assigned_dev->assigned_dev_lock); | 66 | spin_lock_irq(&assigned_dev->assigned_dev_lock); |
69 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | 67 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { |
@@ -316,12 +314,16 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, | |||
316 | kvm_assigned_dev_intr, 0, | 314 | kvm_assigned_dev_intr, 0, |
317 | "kvm_assigned_msix_device", | 315 | "kvm_assigned_msix_device", |
318 | (void *)dev); | 316 | (void *)dev); |
319 | /* FIXME: free requested_irq's on failure */ | ||
320 | if (r) | 317 | if (r) |
321 | return r; | 318 | goto err; |
322 | } | 319 | } |
323 | 320 | ||
324 | return 0; | 321 | return 0; |
322 | err: | ||
323 | for (i -= 1; i >= 0; i--) | ||
324 | free_irq(dev->host_msix_entries[i].vector, (void *)dev); | ||
325 | pci_disable_msix(dev->dev); | ||
326 | return r; | ||
325 | } | 327 | } |
326 | 328 | ||
327 | #endif | 329 | #endif |
@@ -444,9 +446,6 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | |||
444 | struct kvm_assigned_dev_kernel *match; | 446 | struct kvm_assigned_dev_kernel *match; |
445 | unsigned long host_irq_type, guest_irq_type; | 447 | unsigned long host_irq_type, guest_irq_type; |
446 | 448 | ||
447 | if (!capable(CAP_SYS_RAWIO)) | ||
448 | return -EPERM; | ||
449 | |||
450 | if (!irqchip_in_kernel(kvm)) | 449 | if (!irqchip_in_kernel(kvm)) |
451 | return r; | 450 | return r; |
452 | 451 | ||
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 36e258029649..fc8487564d1f 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * KVM coalesced MMIO | 2 | * KVM coalesced MMIO |
3 | * | 3 | * |
4 | * Copyright (c) 2008 Bull S.A.S. | 4 | * Copyright (c) 2008 Bull S.A.S. |
5 | * Copyright 2009 Red Hat, Inc. and/or its affiliates. | ||
5 | * | 6 | * |
6 | * Author: Laurent Vivier <Laurent.Vivier@bull.net> | 7 | * Author: Laurent Vivier <Laurent.Vivier@bull.net> |
7 | * | 8 | * |
@@ -120,8 +121,10 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) | |||
120 | return ret; | 121 | return ret; |
121 | 122 | ||
122 | out_free_dev: | 123 | out_free_dev: |
124 | kvm->coalesced_mmio_dev = NULL; | ||
123 | kfree(dev); | 125 | kfree(dev); |
124 | out_free_page: | 126 | out_free_page: |
127 | kvm->coalesced_mmio_ring = NULL; | ||
125 | __free_page(page); | 128 | __free_page(page); |
126 | out_err: | 129 | out_err: |
127 | return ret; | 130 | return ret; |
@@ -139,7 +142,7 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | |||
139 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; | 142 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; |
140 | 143 | ||
141 | if (dev == NULL) | 144 | if (dev == NULL) |
142 | return -EINVAL; | 145 | return -ENXIO; |
143 | 146 | ||
144 | mutex_lock(&kvm->slots_lock); | 147 | mutex_lock(&kvm->slots_lock); |
145 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { | 148 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { |
@@ -162,7 +165,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | |||
162 | struct kvm_coalesced_mmio_zone *z; | 165 | struct kvm_coalesced_mmio_zone *z; |
163 | 166 | ||
164 | if (dev == NULL) | 167 | if (dev == NULL) |
165 | return -EINVAL; | 168 | return -ENXIO; |
166 | 169 | ||
167 | mutex_lock(&kvm->slots_lock); | 170 | mutex_lock(&kvm->slots_lock); |
168 | 171 | ||
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b81f0ebbaaad..c1f1e3c62984 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * kvm eventfd support - use eventfd objects to signal various KVM events | 2 | * kvm eventfd support - use eventfd objects to signal various KVM events |
3 | * | 3 | * |
4 | * Copyright 2009 Novell. All Rights Reserved. | 4 | * Copyright 2009 Novell. All Rights Reserved. |
5 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. | ||
5 | * | 6 | * |
6 | * Author: | 7 | * Author: |
7 | * Gregory Haskins <ghaskins@novell.com> | 8 | * Gregory Haskins <ghaskins@novell.com> |
@@ -217,7 +218,6 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) | |||
217 | events = file->f_op->poll(file, &irqfd->pt); | 218 | events = file->f_op->poll(file, &irqfd->pt); |
218 | 219 | ||
219 | list_add_tail(&irqfd->list, &kvm->irqfds.items); | 220 | list_add_tail(&irqfd->list, &kvm->irqfds.items); |
220 | spin_unlock_irq(&kvm->irqfds.lock); | ||
221 | 221 | ||
222 | /* | 222 | /* |
223 | * Check if there was an event already pending on the eventfd | 223 | * Check if there was an event already pending on the eventfd |
@@ -226,6 +226,8 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) | |||
226 | if (events & POLLIN) | 226 | if (events & POLLIN) |
227 | schedule_work(&irqfd->inject); | 227 | schedule_work(&irqfd->inject); |
228 | 228 | ||
229 | spin_unlock_irq(&kvm->irqfds.lock); | ||
230 | |||
229 | /* | 231 | /* |
230 | * do not drop the file until the irqfd is fully initialized, otherwise | 232 | * do not drop the file until the irqfd is fully initialized, otherwise |
231 | * we might race against the POLLHUP | 233 | * we might race against the POLLHUP |
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 7c79c1d76d0c..0b9df8303dcf 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2001 MandrakeSoft S.A. | 2 | * Copyright (C) 2001 MandrakeSoft S.A. |
3 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. | ||
3 | * | 4 | * |
4 | * MandrakeSoft S.A. | 5 | * MandrakeSoft S.A. |
5 | * 43, rue d'Aboukir | 6 | * 43, rue d'Aboukir |
@@ -151,7 +152,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | |||
151 | update_handled_vectors(ioapic); | 152 | update_handled_vectors(ioapic); |
152 | mask_after = e->fields.mask; | 153 | mask_after = e->fields.mask; |
153 | if (mask_before != mask_after) | 154 | if (mask_before != mask_after) |
154 | kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); | 155 | kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); |
155 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG | 156 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG |
156 | && ioapic->irr & (1 << index)) | 157 | && ioapic->irr & (1 << index)) |
157 | ioapic_service(ioapic, index); | 158 | ioapic_service(ioapic, index); |
@@ -192,12 +193,13 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) | |||
192 | 193 | ||
193 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) | 194 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) |
194 | { | 195 | { |
195 | u32 old_irr = ioapic->irr; | 196 | u32 old_irr; |
196 | u32 mask = 1 << irq; | 197 | u32 mask = 1 << irq; |
197 | union kvm_ioapic_redirect_entry entry; | 198 | union kvm_ioapic_redirect_entry entry; |
198 | int ret = 1; | 199 | int ret = 1; |
199 | 200 | ||
200 | spin_lock(&ioapic->lock); | 201 | spin_lock(&ioapic->lock); |
202 | old_irr = ioapic->irr; | ||
201 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { | 203 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { |
202 | entry = ioapic->redirtbl[irq]; | 204 | entry = ioapic->redirtbl[irq]; |
203 | level ^= entry.fields.polarity; | 205 | level ^= entry.fields.polarity; |
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 80fd3ad3b2de..62a9caf0563c 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
@@ -16,6 +16,8 @@ | |||
16 | * | 16 | * |
17 | * Copyright (C) 2006-2008 Intel Corporation | 17 | * Copyright (C) 2006-2008 Intel Corporation |
18 | * Copyright IBM Corporation, 2008 | 18 | * Copyright IBM Corporation, 2008 |
19 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. | ||
20 | * | ||
19 | * Author: Allen M. Kay <allen.m.kay@intel.com> | 21 | * Author: Allen M. Kay <allen.m.kay@intel.com> |
20 | * Author: Weidong Han <weidong.han@intel.com> | 22 | * Author: Weidong Han <weidong.han@intel.com> |
21 | * Author: Ben-Ami Yassour <benami@il.ibm.com> | 23 | * Author: Ben-Ami Yassour <benami@il.ibm.com> |
@@ -32,12 +34,30 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm); | |||
32 | static void kvm_iommu_put_pages(struct kvm *kvm, | 34 | static void kvm_iommu_put_pages(struct kvm *kvm, |
33 | gfn_t base_gfn, unsigned long npages); | 35 | gfn_t base_gfn, unsigned long npages); |
34 | 36 | ||
37 | static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, | ||
38 | gfn_t gfn, unsigned long size) | ||
39 | { | ||
40 | gfn_t end_gfn; | ||
41 | pfn_t pfn; | ||
42 | |||
43 | pfn = gfn_to_pfn_memslot(kvm, slot, gfn); | ||
44 | end_gfn = gfn + (size >> PAGE_SHIFT); | ||
45 | gfn += 1; | ||
46 | |||
47 | if (is_error_pfn(pfn)) | ||
48 | return pfn; | ||
49 | |||
50 | while (gfn < end_gfn) | ||
51 | gfn_to_pfn_memslot(kvm, slot, gfn++); | ||
52 | |||
53 | return pfn; | ||
54 | } | ||
55 | |||
35 | int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) | 56 | int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) |
36 | { | 57 | { |
37 | gfn_t gfn = slot->base_gfn; | 58 | gfn_t gfn, end_gfn; |
38 | unsigned long npages = slot->npages; | ||
39 | pfn_t pfn; | 59 | pfn_t pfn; |
40 | int i, r = 0; | 60 | int r = 0; |
41 | struct iommu_domain *domain = kvm->arch.iommu_domain; | 61 | struct iommu_domain *domain = kvm->arch.iommu_domain; |
42 | int flags; | 62 | int flags; |
43 | 63 | ||
@@ -45,46 +65,79 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) | |||
45 | if (!domain) | 65 | if (!domain) |
46 | return 0; | 66 | return 0; |
47 | 67 | ||
68 | gfn = slot->base_gfn; | ||
69 | end_gfn = gfn + slot->npages; | ||
70 | |||
48 | flags = IOMMU_READ | IOMMU_WRITE; | 71 | flags = IOMMU_READ | IOMMU_WRITE; |
49 | if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) | 72 | if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) |
50 | flags |= IOMMU_CACHE; | 73 | flags |= IOMMU_CACHE; |
51 | 74 | ||
52 | for (i = 0; i < npages; i++) { | 75 | |
53 | /* check if already mapped */ | 76 | while (gfn < end_gfn) { |
54 | if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) | 77 | unsigned long page_size; |
78 | |||
79 | /* Check if already mapped */ | ||
80 | if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) { | ||
81 | gfn += 1; | ||
82 | continue; | ||
83 | } | ||
84 | |||
85 | /* Get the page size we could use to map */ | ||
86 | page_size = kvm_host_page_size(kvm, gfn); | ||
87 | |||
88 | /* Make sure the page_size does not exceed the memslot */ | ||
89 | while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn) | ||
90 | page_size >>= 1; | ||
91 | |||
92 | /* Make sure gfn is aligned to the page size we want to map */ | ||
93 | while ((gfn << PAGE_SHIFT) & (page_size - 1)) | ||
94 | page_size >>= 1; | ||
95 | |||
96 | /* | ||
97 | * Pin all pages we are about to map in memory. This is | ||
98 | * important because we unmap and unpin in 4kb steps later. | ||
99 | */ | ||
100 | pfn = kvm_pin_pages(kvm, slot, gfn, page_size); | ||
101 | if (is_error_pfn(pfn)) { | ||
102 | gfn += 1; | ||
55 | continue; | 103 | continue; |
104 | } | ||
56 | 105 | ||
57 | pfn = gfn_to_pfn_memslot(kvm, slot, gfn); | 106 | /* Map into IO address space */ |
58 | r = iommu_map_range(domain, | 107 | r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), |
59 | gfn_to_gpa(gfn), | 108 | get_order(page_size), flags); |
60 | pfn_to_hpa(pfn), | ||
61 | PAGE_SIZE, flags); | ||
62 | if (r) { | 109 | if (r) { |
63 | printk(KERN_ERR "kvm_iommu_map_address:" | 110 | printk(KERN_ERR "kvm_iommu_map_address:" |
64 | "iommu failed to map pfn=%lx\n", pfn); | 111 | "iommu failed to map pfn=%llx\n", pfn); |
65 | goto unmap_pages; | 112 | goto unmap_pages; |
66 | } | 113 | } |
67 | gfn++; | 114 | |
115 | gfn += page_size >> PAGE_SHIFT; | ||
116 | |||
117 | |||
68 | } | 118 | } |
119 | |||
69 | return 0; | 120 | return 0; |
70 | 121 | ||
71 | unmap_pages: | 122 | unmap_pages: |
72 | kvm_iommu_put_pages(kvm, slot->base_gfn, i); | 123 | kvm_iommu_put_pages(kvm, slot->base_gfn, gfn); |
73 | return r; | 124 | return r; |
74 | } | 125 | } |
75 | 126 | ||
76 | static int kvm_iommu_map_memslots(struct kvm *kvm) | 127 | static int kvm_iommu_map_memslots(struct kvm *kvm) |
77 | { | 128 | { |
78 | int i, r = 0; | 129 | int i, idx, r = 0; |
79 | struct kvm_memslots *slots; | 130 | struct kvm_memslots *slots; |
80 | 131 | ||
81 | slots = rcu_dereference(kvm->memslots); | 132 | idx = srcu_read_lock(&kvm->srcu); |
133 | slots = kvm_memslots(kvm); | ||
82 | 134 | ||
83 | for (i = 0; i < slots->nmemslots; i++) { | 135 | for (i = 0; i < slots->nmemslots; i++) { |
84 | r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); | 136 | r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); |
85 | if (r) | 137 | if (r) |
86 | break; | 138 | break; |
87 | } | 139 | } |
140 | srcu_read_unlock(&kvm->srcu, idx); | ||
88 | 141 | ||
89 | return r; | 142 | return r; |
90 | } | 143 | } |
@@ -189,40 +242,62 @@ out_unmap: | |||
189 | return r; | 242 | return r; |
190 | } | 243 | } |
191 | 244 | ||
245 | static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages) | ||
246 | { | ||
247 | unsigned long i; | ||
248 | |||
249 | for (i = 0; i < npages; ++i) | ||
250 | kvm_release_pfn_clean(pfn + i); | ||
251 | } | ||
252 | |||
192 | static void kvm_iommu_put_pages(struct kvm *kvm, | 253 | static void kvm_iommu_put_pages(struct kvm *kvm, |
193 | gfn_t base_gfn, unsigned long npages) | 254 | gfn_t base_gfn, unsigned long npages) |
194 | { | 255 | { |
195 | gfn_t gfn = base_gfn; | 256 | struct iommu_domain *domain; |
257 | gfn_t end_gfn, gfn; | ||
196 | pfn_t pfn; | 258 | pfn_t pfn; |
197 | struct iommu_domain *domain = kvm->arch.iommu_domain; | ||
198 | unsigned long i; | ||
199 | u64 phys; | 259 | u64 phys; |
200 | 260 | ||
261 | domain = kvm->arch.iommu_domain; | ||
262 | end_gfn = base_gfn + npages; | ||
263 | gfn = base_gfn; | ||
264 | |||
201 | /* check if iommu exists and in use */ | 265 | /* check if iommu exists and in use */ |
202 | if (!domain) | 266 | if (!domain) |
203 | return; | 267 | return; |
204 | 268 | ||
205 | for (i = 0; i < npages; i++) { | 269 | while (gfn < end_gfn) { |
270 | unsigned long unmap_pages; | ||
271 | int order; | ||
272 | |||
273 | /* Get physical address */ | ||
206 | phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); | 274 | phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); |
207 | pfn = phys >> PAGE_SHIFT; | 275 | pfn = phys >> PAGE_SHIFT; |
208 | kvm_release_pfn_clean(pfn); | 276 | |
209 | gfn++; | 277 | /* Unmap address from IO address space */ |
210 | } | 278 | order = iommu_unmap(domain, gfn_to_gpa(gfn), 0); |
279 | unmap_pages = 1ULL << order; | ||
211 | 280 | ||
212 | iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages); | 281 | /* Unpin all pages we just unmapped to not leak any memory */ |
282 | kvm_unpin_pages(kvm, pfn, unmap_pages); | ||
283 | |||
284 | gfn += unmap_pages; | ||
285 | } | ||
213 | } | 286 | } |
214 | 287 | ||
215 | static int kvm_iommu_unmap_memslots(struct kvm *kvm) | 288 | static int kvm_iommu_unmap_memslots(struct kvm *kvm) |
216 | { | 289 | { |
217 | int i; | 290 | int i, idx; |
218 | struct kvm_memslots *slots; | 291 | struct kvm_memslots *slots; |
219 | 292 | ||
220 | slots = rcu_dereference(kvm->memslots); | 293 | idx = srcu_read_lock(&kvm->srcu); |
294 | slots = kvm_memslots(kvm); | ||
221 | 295 | ||
222 | for (i = 0; i < slots->nmemslots; i++) { | 296 | for (i = 0; i < slots->nmemslots; i++) { |
223 | kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, | 297 | kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, |
224 | slots->memslots[i].npages); | 298 | slots->memslots[i].npages); |
225 | } | 299 | } |
300 | srcu_read_unlock(&kvm->srcu, idx); | ||
226 | 301 | ||
227 | return 0; | 302 | return 0; |
228 | } | 303 | } |
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a0e88809e45e..369e38010ad5 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
@@ -17,6 +17,7 @@ | |||
17 | * Authors: | 17 | * Authors: |
18 | * Yaozu (Eddie) Dong <Eddie.dong@intel.com> | 18 | * Yaozu (Eddie) Dong <Eddie.dong@intel.com> |
19 | * | 19 | * |
20 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
20 | */ | 21 | */ |
21 | 22 | ||
22 | #include <linux/kvm_host.h> | 23 | #include <linux/kvm_host.h> |
@@ -99,7 +100,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
99 | if (r < 0) | 100 | if (r < 0) |
100 | r = 0; | 101 | r = 0; |
101 | r += kvm_apic_set_irq(vcpu, irq); | 102 | r += kvm_apic_set_irq(vcpu, irq); |
102 | } else { | 103 | } else if (kvm_lapic_enabled(vcpu)) { |
103 | if (!lowest) | 104 | if (!lowest) |
104 | lowest = vcpu; | 105 | lowest = vcpu; |
105 | else if (kvm_apic_compare_prio(vcpu, lowest) < 0) | 106 | else if (kvm_apic_compare_prio(vcpu, lowest) < 0) |
@@ -278,15 +279,19 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, | |||
278 | synchronize_rcu(); | 279 | synchronize_rcu(); |
279 | } | 280 | } |
280 | 281 | ||
281 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) | 282 | void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, |
283 | bool mask) | ||
282 | { | 284 | { |
283 | struct kvm_irq_mask_notifier *kimn; | 285 | struct kvm_irq_mask_notifier *kimn; |
284 | struct hlist_node *n; | 286 | struct hlist_node *n; |
287 | int gsi; | ||
285 | 288 | ||
286 | rcu_read_lock(); | 289 | rcu_read_lock(); |
287 | hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) | 290 | gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; |
288 | if (kimn->irq == irq) | 291 | if (gsi != -1) |
289 | kimn->func(kimn, mask); | 292 | hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) |
293 | if (kimn->irq == gsi) | ||
294 | kimn->func(kimn, mask); | ||
290 | rcu_read_unlock(); | 295 | rcu_read_unlock(); |
291 | } | 296 | } |
292 | 297 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c82ae2492634..5186e728c53e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * machines without emulation or binary translation. | 5 | * machines without emulation or binary translation. |
6 | * | 6 | * |
7 | * Copyright (C) 2006 Qumranet, Inc. | 7 | * Copyright (C) 2006 Qumranet, Inc. |
8 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
8 | * | 9 | * |
9 | * Authors: | 10 | * Authors: |
10 | * Avi Kivity <avi@qumranet.com> | 11 | * Avi Kivity <avi@qumranet.com> |
@@ -92,6 +93,12 @@ static bool kvm_rebooting; | |||
92 | 93 | ||
93 | static bool largepages_enabled = true; | 94 | static bool largepages_enabled = true; |
94 | 95 | ||
96 | static struct page *hwpoison_page; | ||
97 | static pfn_t hwpoison_pfn; | ||
98 | |||
99 | static struct page *fault_page; | ||
100 | static pfn_t fault_pfn; | ||
101 | |||
95 | inline int kvm_is_mmio_pfn(pfn_t pfn) | 102 | inline int kvm_is_mmio_pfn(pfn_t pfn) |
96 | { | 103 | { |
97 | if (pfn_valid(pfn)) { | 104 | if (pfn_valid(pfn)) { |
@@ -141,7 +148,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
141 | raw_spin_lock(&kvm->requests_lock); | 148 | raw_spin_lock(&kvm->requests_lock); |
142 | me = smp_processor_id(); | 149 | me = smp_processor_id(); |
143 | kvm_for_each_vcpu(i, vcpu, kvm) { | 150 | kvm_for_each_vcpu(i, vcpu, kvm) { |
144 | if (test_and_set_bit(req, &vcpu->requests)) | 151 | if (kvm_make_check_request(req, vcpu)) |
145 | continue; | 152 | continue; |
146 | cpu = vcpu->cpu; | 153 | cpu = vcpu->cpu; |
147 | if (cpus != NULL && cpu != -1 && cpu != me) | 154 | if (cpus != NULL && cpu != -1 && cpu != me) |
@@ -422,9 +429,6 @@ static struct kvm *kvm_create_vm(void) | |||
422 | spin_lock(&kvm_lock); | 429 | spin_lock(&kvm_lock); |
423 | list_add(&kvm->vm_list, &vm_list); | 430 | list_add(&kvm->vm_list, &vm_list); |
424 | spin_unlock(&kvm_lock); | 431 | spin_unlock(&kvm_lock); |
425 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
426 | kvm_coalesced_mmio_init(kvm); | ||
427 | #endif | ||
428 | out: | 432 | out: |
429 | return kvm; | 433 | return kvm; |
430 | 434 | ||
@@ -560,11 +564,16 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
560 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; | 564 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; |
561 | npages = mem->memory_size >> PAGE_SHIFT; | 565 | npages = mem->memory_size >> PAGE_SHIFT; |
562 | 566 | ||
567 | r = -EINVAL; | ||
568 | if (npages > KVM_MEM_MAX_NR_PAGES) | ||
569 | goto out; | ||
570 | |||
563 | if (!npages) | 571 | if (!npages) |
564 | mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; | 572 | mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; |
565 | 573 | ||
566 | new = old = *memslot; | 574 | new = old = *memslot; |
567 | 575 | ||
576 | new.id = mem->slot; | ||
568 | new.base_gfn = base_gfn; | 577 | new.base_gfn = base_gfn; |
569 | new.npages = npages; | 578 | new.npages = npages; |
570 | new.flags = mem->flags; | 579 | new.flags = mem->flags; |
@@ -595,7 +604,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
595 | /* Allocate if a slot is being created */ | 604 | /* Allocate if a slot is being created */ |
596 | #ifndef CONFIG_S390 | 605 | #ifndef CONFIG_S390 |
597 | if (npages && !new.rmap) { | 606 | if (npages && !new.rmap) { |
598 | new.rmap = vmalloc(npages * sizeof(struct page *)); | 607 | new.rmap = vmalloc(npages * sizeof(*new.rmap)); |
599 | 608 | ||
600 | if (!new.rmap) | 609 | if (!new.rmap) |
601 | goto out_free; | 610 | goto out_free; |
@@ -620,9 +629,9 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
620 | if (new.lpage_info[i]) | 629 | if (new.lpage_info[i]) |
621 | continue; | 630 | continue; |
622 | 631 | ||
623 | lpages = 1 + (base_gfn + npages - 1) / | 632 | lpages = 1 + ((base_gfn + npages - 1) |
624 | KVM_PAGES_PER_HPAGE(level); | 633 | >> KVM_HPAGE_GFN_SHIFT(level)); |
625 | lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level); | 634 | lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level); |
626 | 635 | ||
627 | new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); | 636 | new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); |
628 | 637 | ||
@@ -632,9 +641,9 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
632 | memset(new.lpage_info[i], 0, | 641 | memset(new.lpage_info[i], 0, |
633 | lpages * sizeof(*new.lpage_info[i])); | 642 | lpages * sizeof(*new.lpage_info[i])); |
634 | 643 | ||
635 | if (base_gfn % KVM_PAGES_PER_HPAGE(level)) | 644 | if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) |
636 | new.lpage_info[i][0].write_count = 1; | 645 | new.lpage_info[i][0].write_count = 1; |
637 | if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level)) | 646 | if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) |
638 | new.lpage_info[i][lpages - 1].write_count = 1; | 647 | new.lpage_info[i][lpages - 1].write_count = 1; |
639 | ugfn = new.userspace_addr >> PAGE_SHIFT; | 648 | ugfn = new.userspace_addr >> PAGE_SHIFT; |
640 | /* | 649 | /* |
@@ -809,16 +818,28 @@ EXPORT_SYMBOL_GPL(kvm_disable_largepages); | |||
809 | 818 | ||
810 | int is_error_page(struct page *page) | 819 | int is_error_page(struct page *page) |
811 | { | 820 | { |
812 | return page == bad_page; | 821 | return page == bad_page || page == hwpoison_page || page == fault_page; |
813 | } | 822 | } |
814 | EXPORT_SYMBOL_GPL(is_error_page); | 823 | EXPORT_SYMBOL_GPL(is_error_page); |
815 | 824 | ||
816 | int is_error_pfn(pfn_t pfn) | 825 | int is_error_pfn(pfn_t pfn) |
817 | { | 826 | { |
818 | return pfn == bad_pfn; | 827 | return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn; |
819 | } | 828 | } |
820 | EXPORT_SYMBOL_GPL(is_error_pfn); | 829 | EXPORT_SYMBOL_GPL(is_error_pfn); |
821 | 830 | ||
831 | int is_hwpoison_pfn(pfn_t pfn) | ||
832 | { | ||
833 | return pfn == hwpoison_pfn; | ||
834 | } | ||
835 | EXPORT_SYMBOL_GPL(is_hwpoison_pfn); | ||
836 | |||
837 | int is_fault_pfn(pfn_t pfn) | ||
838 | { | ||
839 | return pfn == fault_pfn; | ||
840 | } | ||
841 | EXPORT_SYMBOL_GPL(is_fault_pfn); | ||
842 | |||
822 | static inline unsigned long bad_hva(void) | 843 | static inline unsigned long bad_hva(void) |
823 | { | 844 | { |
824 | return PAGE_OFFSET; | 845 | return PAGE_OFFSET; |
@@ -830,10 +851,10 @@ int kvm_is_error_hva(unsigned long addr) | |||
830 | } | 851 | } |
831 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); | 852 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); |
832 | 853 | ||
833 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) | 854 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) |
834 | { | 855 | { |
835 | int i; | 856 | int i; |
836 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | 857 | struct kvm_memslots *slots = kvm_memslots(kvm); |
837 | 858 | ||
838 | for (i = 0; i < slots->nmemslots; ++i) { | 859 | for (i = 0; i < slots->nmemslots; ++i) { |
839 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 860 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
@@ -844,20 +865,13 @@ struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) | |||
844 | } | 865 | } |
845 | return NULL; | 866 | return NULL; |
846 | } | 867 | } |
847 | EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased); | 868 | EXPORT_SYMBOL_GPL(gfn_to_memslot); |
848 | |||
849 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | ||
850 | { | ||
851 | gfn = unalias_gfn(kvm, gfn); | ||
852 | return gfn_to_memslot_unaliased(kvm, gfn); | ||
853 | } | ||
854 | 869 | ||
855 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | 870 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) |
856 | { | 871 | { |
857 | int i; | 872 | int i; |
858 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | 873 | struct kvm_memslots *slots = kvm_memslots(kvm); |
859 | 874 | ||
860 | gfn = unalias_gfn_instantiation(kvm, gfn); | ||
861 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 875 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
862 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 876 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
863 | 877 | ||
@@ -899,10 +913,9 @@ out: | |||
899 | int memslot_id(struct kvm *kvm, gfn_t gfn) | 913 | int memslot_id(struct kvm *kvm, gfn_t gfn) |
900 | { | 914 | { |
901 | int i; | 915 | int i; |
902 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | 916 | struct kvm_memslots *slots = kvm_memslots(kvm); |
903 | struct kvm_memory_slot *memslot = NULL; | 917 | struct kvm_memory_slot *memslot = NULL; |
904 | 918 | ||
905 | gfn = unalias_gfn(kvm, gfn); | ||
906 | for (i = 0; i < slots->nmemslots; ++i) { | 919 | for (i = 0; i < slots->nmemslots; ++i) { |
907 | memslot = &slots->memslots[i]; | 920 | memslot = &slots->memslots[i]; |
908 | 921 | ||
@@ -914,15 +927,19 @@ int memslot_id(struct kvm *kvm, gfn_t gfn) | |||
914 | return memslot - slots->memslots; | 927 | return memslot - slots->memslots; |
915 | } | 928 | } |
916 | 929 | ||
930 | static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) | ||
931 | { | ||
932 | return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; | ||
933 | } | ||
934 | |||
917 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | 935 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) |
918 | { | 936 | { |
919 | struct kvm_memory_slot *slot; | 937 | struct kvm_memory_slot *slot; |
920 | 938 | ||
921 | gfn = unalias_gfn_instantiation(kvm, gfn); | 939 | slot = gfn_to_memslot(kvm, gfn); |
922 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
923 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) | 940 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) |
924 | return bad_hva(); | 941 | return bad_hva(); |
925 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); | 942 | return gfn_to_hva_memslot(slot, gfn); |
926 | } | 943 | } |
927 | EXPORT_SYMBOL_GPL(gfn_to_hva); | 944 | EXPORT_SYMBOL_GPL(gfn_to_hva); |
928 | 945 | ||
@@ -940,13 +957,19 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) | |||
940 | struct vm_area_struct *vma; | 957 | struct vm_area_struct *vma; |
941 | 958 | ||
942 | down_read(¤t->mm->mmap_sem); | 959 | down_read(¤t->mm->mmap_sem); |
960 | if (is_hwpoison_address(addr)) { | ||
961 | up_read(¤t->mm->mmap_sem); | ||
962 | get_page(hwpoison_page); | ||
963 | return page_to_pfn(hwpoison_page); | ||
964 | } | ||
965 | |||
943 | vma = find_vma(current->mm, addr); | 966 | vma = find_vma(current->mm, addr); |
944 | 967 | ||
945 | if (vma == NULL || addr < vma->vm_start || | 968 | if (vma == NULL || addr < vma->vm_start || |
946 | !(vma->vm_flags & VM_PFNMAP)) { | 969 | !(vma->vm_flags & VM_PFNMAP)) { |
947 | up_read(¤t->mm->mmap_sem); | 970 | up_read(¤t->mm->mmap_sem); |
948 | get_page(bad_page); | 971 | get_page(fault_page); |
949 | return page_to_pfn(bad_page); | 972 | return page_to_pfn(fault_page); |
950 | } | 973 | } |
951 | 974 | ||
952 | pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 975 | pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; |
@@ -972,11 +995,6 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | |||
972 | } | 995 | } |
973 | EXPORT_SYMBOL_GPL(gfn_to_pfn); | 996 | EXPORT_SYMBOL_GPL(gfn_to_pfn); |
974 | 997 | ||
975 | static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) | ||
976 | { | ||
977 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); | ||
978 | } | ||
979 | |||
980 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | 998 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, |
981 | struct kvm_memory_slot *slot, gfn_t gfn) | 999 | struct kvm_memory_slot *slot, gfn_t gfn) |
982 | { | 1000 | { |
@@ -1186,17 +1204,11 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | |||
1186 | { | 1204 | { |
1187 | struct kvm_memory_slot *memslot; | 1205 | struct kvm_memory_slot *memslot; |
1188 | 1206 | ||
1189 | gfn = unalias_gfn(kvm, gfn); | 1207 | memslot = gfn_to_memslot(kvm, gfn); |
1190 | memslot = gfn_to_memslot_unaliased(kvm, gfn); | ||
1191 | if (memslot && memslot->dirty_bitmap) { | 1208 | if (memslot && memslot->dirty_bitmap) { |
1192 | unsigned long rel_gfn = gfn - memslot->base_gfn; | 1209 | unsigned long rel_gfn = gfn - memslot->base_gfn; |
1193 | unsigned long *p = memslot->dirty_bitmap + | ||
1194 | rel_gfn / BITS_PER_LONG; | ||
1195 | int offset = rel_gfn % BITS_PER_LONG; | ||
1196 | 1210 | ||
1197 | /* avoid RMW */ | 1211 | generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); |
1198 | if (!generic_test_le_bit(offset, p)) | ||
1199 | generic___set_le_bit(offset, p); | ||
1200 | } | 1212 | } |
1201 | } | 1213 | } |
1202 | 1214 | ||
@@ -1211,7 +1223,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
1211 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | 1223 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); |
1212 | 1224 | ||
1213 | if (kvm_arch_vcpu_runnable(vcpu)) { | 1225 | if (kvm_arch_vcpu_runnable(vcpu)) { |
1214 | set_bit(KVM_REQ_UNHALT, &vcpu->requests); | 1226 | kvm_make_request(KVM_REQ_UNHALT, vcpu); |
1215 | break; | 1227 | break; |
1216 | } | 1228 | } |
1217 | if (kvm_cpu_has_pending_timer(vcpu)) | 1229 | if (kvm_cpu_has_pending_timer(vcpu)) |
@@ -1382,6 +1394,18 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
1382 | 1394 | ||
1383 | if (vcpu->kvm->mm != current->mm) | 1395 | if (vcpu->kvm->mm != current->mm) |
1384 | return -EIO; | 1396 | return -EIO; |
1397 | |||
1398 | #if defined(CONFIG_S390) || defined(CONFIG_PPC) | ||
1399 | /* | ||
1400 | * Special cases: vcpu ioctls that are asynchronous to vcpu execution, | ||
1401 | * so vcpu_load() would break it. | ||
1402 | */ | ||
1403 | if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT) | ||
1404 | return kvm_arch_vcpu_ioctl(filp, ioctl, arg); | ||
1405 | #endif | ||
1406 | |||
1407 | |||
1408 | vcpu_load(vcpu); | ||
1385 | switch (ioctl) { | 1409 | switch (ioctl) { |
1386 | case KVM_RUN: | 1410 | case KVM_RUN: |
1387 | r = -EINVAL; | 1411 | r = -EINVAL; |
@@ -1524,7 +1548,7 @@ out_free2: | |||
1524 | goto out; | 1548 | goto out; |
1525 | p = &sigset; | 1549 | p = &sigset; |
1526 | } | 1550 | } |
1527 | r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); | 1551 | r = kvm_vcpu_ioctl_set_sigmask(vcpu, p); |
1528 | break; | 1552 | break; |
1529 | } | 1553 | } |
1530 | case KVM_GET_FPU: { | 1554 | case KVM_GET_FPU: { |
@@ -1559,6 +1583,7 @@ out_free2: | |||
1559 | r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); | 1583 | r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); |
1560 | } | 1584 | } |
1561 | out: | 1585 | out: |
1586 | vcpu_put(vcpu); | ||
1562 | kfree(fpu); | 1587 | kfree(fpu); |
1563 | kfree(kvm_sregs); | 1588 | kfree(kvm_sregs); |
1564 | return r; | 1589 | return r; |
@@ -1609,7 +1634,6 @@ static long kvm_vm_ioctl(struct file *filp, | |||
1609 | r = -EFAULT; | 1634 | r = -EFAULT; |
1610 | if (copy_from_user(&zone, argp, sizeof zone)) | 1635 | if (copy_from_user(&zone, argp, sizeof zone)) |
1611 | goto out; | 1636 | goto out; |
1612 | r = -ENXIO; | ||
1613 | r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); | 1637 | r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); |
1614 | if (r) | 1638 | if (r) |
1615 | goto out; | 1639 | goto out; |
@@ -1621,7 +1645,6 @@ static long kvm_vm_ioctl(struct file *filp, | |||
1621 | r = -EFAULT; | 1645 | r = -EFAULT; |
1622 | if (copy_from_user(&zone, argp, sizeof zone)) | 1646 | if (copy_from_user(&zone, argp, sizeof zone)) |
1623 | goto out; | 1647 | goto out; |
1624 | r = -ENXIO; | ||
1625 | r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); | 1648 | r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); |
1626 | if (r) | 1649 | if (r) |
1627 | goto out; | 1650 | goto out; |
@@ -1755,12 +1778,19 @@ static struct file_operations kvm_vm_fops = { | |||
1755 | 1778 | ||
1756 | static int kvm_dev_ioctl_create_vm(void) | 1779 | static int kvm_dev_ioctl_create_vm(void) |
1757 | { | 1780 | { |
1758 | int fd; | 1781 | int fd, r; |
1759 | struct kvm *kvm; | 1782 | struct kvm *kvm; |
1760 | 1783 | ||
1761 | kvm = kvm_create_vm(); | 1784 | kvm = kvm_create_vm(); |
1762 | if (IS_ERR(kvm)) | 1785 | if (IS_ERR(kvm)) |
1763 | return PTR_ERR(kvm); | 1786 | return PTR_ERR(kvm); |
1787 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
1788 | r = kvm_coalesced_mmio_init(kvm); | ||
1789 | if (r < 0) { | ||
1790 | kvm_put_kvm(kvm); | ||
1791 | return r; | ||
1792 | } | ||
1793 | #endif | ||
1764 | fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); | 1794 | fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); |
1765 | if (fd < 0) | 1795 | if (fd < 0) |
1766 | kvm_put_kvm(kvm); | 1796 | kvm_put_kvm(kvm); |
@@ -1928,15 +1958,10 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | |||
1928 | cpu); | 1958 | cpu); |
1929 | hardware_disable(NULL); | 1959 | hardware_disable(NULL); |
1930 | break; | 1960 | break; |
1931 | case CPU_UP_CANCELED: | 1961 | case CPU_STARTING: |
1932 | printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", | ||
1933 | cpu); | ||
1934 | smp_call_function_single(cpu, hardware_disable, NULL, 1); | ||
1935 | break; | ||
1936 | case CPU_ONLINE: | ||
1937 | printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", | 1962 | printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", |
1938 | cpu); | 1963 | cpu); |
1939 | smp_call_function_single(cpu, hardware_enable, NULL, 1); | 1964 | hardware_enable(NULL); |
1940 | break; | 1965 | break; |
1941 | } | 1966 | } |
1942 | return NOTIFY_OK; | 1967 | return NOTIFY_OK; |
@@ -1945,10 +1970,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | |||
1945 | 1970 | ||
1946 | asmlinkage void kvm_handle_fault_on_reboot(void) | 1971 | asmlinkage void kvm_handle_fault_on_reboot(void) |
1947 | { | 1972 | { |
1948 | if (kvm_rebooting) | 1973 | if (kvm_rebooting) { |
1949 | /* spin while reset goes on */ | 1974 | /* spin while reset goes on */ |
1975 | local_irq_enable(); | ||
1950 | while (true) | 1976 | while (true) |
1951 | ; | 1977 | ; |
1978 | } | ||
1952 | /* Fault while not rebooting. We want the trace. */ | 1979 | /* Fault while not rebooting. We want the trace. */ |
1953 | BUG(); | 1980 | BUG(); |
1954 | } | 1981 | } |
@@ -1991,7 +2018,9 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
1991 | int len, const void *val) | 2018 | int len, const void *val) |
1992 | { | 2019 | { |
1993 | int i; | 2020 | int i; |
1994 | struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); | 2021 | struct kvm_io_bus *bus; |
2022 | |||
2023 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | ||
1995 | for (i = 0; i < bus->dev_count; i++) | 2024 | for (i = 0; i < bus->dev_count; i++) |
1996 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) | 2025 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) |
1997 | return 0; | 2026 | return 0; |
@@ -2003,8 +2032,9 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
2003 | int len, void *val) | 2032 | int len, void *val) |
2004 | { | 2033 | { |
2005 | int i; | 2034 | int i; |
2006 | struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); | 2035 | struct kvm_io_bus *bus; |
2007 | 2036 | ||
2037 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | ||
2008 | for (i = 0; i < bus->dev_count; i++) | 2038 | for (i = 0; i < bus->dev_count; i++) |
2009 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) | 2039 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) |
2010 | return 0; | 2040 | return 0; |
@@ -2068,7 +2098,6 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | |||
2068 | 2098 | ||
2069 | static struct notifier_block kvm_cpu_notifier = { | 2099 | static struct notifier_block kvm_cpu_notifier = { |
2070 | .notifier_call = kvm_cpu_hotplug, | 2100 | .notifier_call = kvm_cpu_hotplug, |
2071 | .priority = 20, /* must be > scheduler priority */ | ||
2072 | }; | 2101 | }; |
2073 | 2102 | ||
2074 | static int vm_stat_get(void *_offset, u64 *val) | 2103 | static int vm_stat_get(void *_offset, u64 *val) |
@@ -2179,7 +2208,7 @@ static void kvm_sched_out(struct preempt_notifier *pn, | |||
2179 | kvm_arch_vcpu_put(vcpu); | 2208 | kvm_arch_vcpu_put(vcpu); |
2180 | } | 2209 | } |
2181 | 2210 | ||
2182 | int kvm_init(void *opaque, unsigned int vcpu_size, | 2211 | int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, |
2183 | struct module *module) | 2212 | struct module *module) |
2184 | { | 2213 | { |
2185 | int r; | 2214 | int r; |
@@ -2198,6 +2227,24 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
2198 | 2227 | ||
2199 | bad_pfn = page_to_pfn(bad_page); | 2228 | bad_pfn = page_to_pfn(bad_page); |
2200 | 2229 | ||
2230 | hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2231 | |||
2232 | if (hwpoison_page == NULL) { | ||
2233 | r = -ENOMEM; | ||
2234 | goto out_free_0; | ||
2235 | } | ||
2236 | |||
2237 | hwpoison_pfn = page_to_pfn(hwpoison_page); | ||
2238 | |||
2239 | fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2240 | |||
2241 | if (fault_page == NULL) { | ||
2242 | r = -ENOMEM; | ||
2243 | goto out_free_0; | ||
2244 | } | ||
2245 | |||
2246 | fault_pfn = page_to_pfn(fault_page); | ||
2247 | |||
2201 | if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { | 2248 | if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { |
2202 | r = -ENOMEM; | 2249 | r = -ENOMEM; |
2203 | goto out_free_0; | 2250 | goto out_free_0; |
@@ -2229,8 +2276,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
2229 | goto out_free_4; | 2276 | goto out_free_4; |
2230 | 2277 | ||
2231 | /* A kmem cache lets us meet the alignment requirements of fx_save. */ | 2278 | /* A kmem cache lets us meet the alignment requirements of fx_save. */ |
2232 | kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, | 2279 | if (!vcpu_align) |
2233 | __alignof__(struct kvm_vcpu), | 2280 | vcpu_align = __alignof__(struct kvm_vcpu); |
2281 | kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, | ||
2234 | 0, NULL); | 2282 | 0, NULL); |
2235 | if (!kvm_vcpu_cache) { | 2283 | if (!kvm_vcpu_cache) { |
2236 | r = -ENOMEM; | 2284 | r = -ENOMEM; |
@@ -2269,6 +2317,10 @@ out_free_1: | |||
2269 | out_free_0a: | 2317 | out_free_0a: |
2270 | free_cpumask_var(cpus_hardware_enabled); | 2318 | free_cpumask_var(cpus_hardware_enabled); |
2271 | out_free_0: | 2319 | out_free_0: |
2320 | if (fault_page) | ||
2321 | __free_page(fault_page); | ||
2322 | if (hwpoison_page) | ||
2323 | __free_page(hwpoison_page); | ||
2272 | __free_page(bad_page); | 2324 | __free_page(bad_page); |
2273 | out: | 2325 | out: |
2274 | kvm_arch_exit(); | 2326 | kvm_arch_exit(); |
@@ -2279,7 +2331,6 @@ EXPORT_SYMBOL_GPL(kvm_init); | |||
2279 | 2331 | ||
2280 | void kvm_exit(void) | 2332 | void kvm_exit(void) |
2281 | { | 2333 | { |
2282 | tracepoint_synchronize_unregister(); | ||
2283 | kvm_exit_debug(); | 2334 | kvm_exit_debug(); |
2284 | misc_deregister(&kvm_dev); | 2335 | misc_deregister(&kvm_dev); |
2285 | kmem_cache_destroy(kvm_vcpu_cache); | 2336 | kmem_cache_destroy(kvm_vcpu_cache); |
@@ -2291,6 +2342,7 @@ void kvm_exit(void) | |||
2291 | kvm_arch_hardware_unsetup(); | 2342 | kvm_arch_hardware_unsetup(); |
2292 | kvm_arch_exit(); | 2343 | kvm_arch_exit(); |
2293 | free_cpumask_var(cpus_hardware_enabled); | 2344 | free_cpumask_var(cpus_hardware_enabled); |
2345 | __free_page(hwpoison_page); | ||
2294 | __free_page(bad_page); | 2346 | __free_page(bad_page); |
2295 | } | 2347 | } |
2296 | EXPORT_SYMBOL_GPL(kvm_exit); | 2348 | EXPORT_SYMBOL_GPL(kvm_exit); |