diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
commit | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch) | |
tree | a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /virt | |
parent | 406089d01562f1e2bf9f089fd7637009ebaad589 (diff) |
Patched in Tegra support.
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/Kconfig | 6 | ||||
-rw-r--r-- | virt/kvm/assigned-dev.c | 296 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 11 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 137 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.h | 7 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 182 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 78 | ||||
-rw-r--r-- | virt/kvm/ioapic.h | 5 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 79 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 146 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 1158 |
11 files changed, 717 insertions, 1388 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index d01b24b72c6..f63ccb0a598 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig | |||
@@ -18,9 +18,3 @@ config KVM_MMIO | |||
18 | 18 | ||
19 | config KVM_ASYNC_PF | 19 | config KVM_ASYNC_PF |
20 | bool | 20 | bool |
21 | |||
22 | config HAVE_KVM_MSI | ||
23 | bool | ||
24 | |||
25 | config HAVE_KVM_CPU_RELAX_INTERCEPT | ||
26 | bool | ||
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 3642239252b..af7910228fb 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c | |||
@@ -49,157 +49,71 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | |||
49 | index = i; | 49 | index = i; |
50 | break; | 50 | break; |
51 | } | 51 | } |
52 | if (index < 0) | 52 | if (index < 0) { |
53 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); | 53 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); |
54 | return 0; | ||
55 | } | ||
54 | 56 | ||
55 | return index; | 57 | return index; |
56 | } | 58 | } |
57 | 59 | ||
58 | static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id) | 60 | static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) |
59 | { | 61 | { |
60 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | 62 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; |
61 | int ret; | 63 | u32 vector; |
64 | int index; | ||
62 | 65 | ||
63 | spin_lock(&assigned_dev->intx_lock); | 66 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) { |
64 | if (pci_check_and_mask_intx(assigned_dev->dev)) { | 67 | spin_lock(&assigned_dev->intx_lock); |
68 | disable_irq_nosync(irq); | ||
65 | assigned_dev->host_irq_disabled = true; | 69 | assigned_dev->host_irq_disabled = true; |
66 | ret = IRQ_WAKE_THREAD; | 70 | spin_unlock(&assigned_dev->intx_lock); |
67 | } else | 71 | } |
68 | ret = IRQ_NONE; | ||
69 | spin_unlock(&assigned_dev->intx_lock); | ||
70 | |||
71 | return ret; | ||
72 | } | ||
73 | 72 | ||
74 | static void | 73 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { |
75 | kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, | 74 | index = find_index_from_host_irq(assigned_dev, irq); |
76 | int vector) | 75 | if (index >= 0) { |
77 | { | 76 | vector = assigned_dev-> |
78 | if (unlikely(assigned_dev->irq_requested_type & | 77 | guest_msix_entries[index].vector; |
79 | KVM_DEV_IRQ_GUEST_INTX)) { | ||
80 | spin_lock(&assigned_dev->intx_mask_lock); | ||
81 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) | ||
82 | kvm_set_irq(assigned_dev->kvm, | 78 | kvm_set_irq(assigned_dev->kvm, |
83 | assigned_dev->irq_source_id, vector, 1); | 79 | assigned_dev->irq_source_id, vector, 1); |
84 | spin_unlock(&assigned_dev->intx_mask_lock); | 80 | } |
85 | } else | 81 | } else |
86 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | 82 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
87 | vector, 1); | 83 | assigned_dev->guest_irq, 1); |
88 | } | ||
89 | |||
90 | static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) | ||
91 | { | ||
92 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | ||
93 | |||
94 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { | ||
95 | spin_lock_irq(&assigned_dev->intx_lock); | ||
96 | disable_irq_nosync(irq); | ||
97 | assigned_dev->host_irq_disabled = true; | ||
98 | spin_unlock_irq(&assigned_dev->intx_lock); | ||
99 | } | ||
100 | |||
101 | kvm_assigned_dev_raise_guest_irq(assigned_dev, | ||
102 | assigned_dev->guest_irq); | ||
103 | 84 | ||
104 | return IRQ_HANDLED; | 85 | return IRQ_HANDLED; |
105 | } | 86 | } |
106 | 87 | ||
107 | #ifdef __KVM_HAVE_MSI | ||
108 | static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) | ||
109 | { | ||
110 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | ||
111 | int ret = kvm_set_irq_inatomic(assigned_dev->kvm, | ||
112 | assigned_dev->irq_source_id, | ||
113 | assigned_dev->guest_irq, 1); | ||
114 | return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; | ||
115 | } | ||
116 | |||
117 | static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) | ||
118 | { | ||
119 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | ||
120 | |||
121 | kvm_assigned_dev_raise_guest_irq(assigned_dev, | ||
122 | assigned_dev->guest_irq); | ||
123 | |||
124 | return IRQ_HANDLED; | ||
125 | } | ||
126 | #endif | ||
127 | |||
128 | #ifdef __KVM_HAVE_MSIX | ||
129 | static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) | ||
130 | { | ||
131 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | ||
132 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
133 | u32 vector; | ||
134 | int ret = 0; | ||
135 | |||
136 | if (index >= 0) { | ||
137 | vector = assigned_dev->guest_msix_entries[index].vector; | ||
138 | ret = kvm_set_irq_inatomic(assigned_dev->kvm, | ||
139 | assigned_dev->irq_source_id, | ||
140 | vector, 1); | ||
141 | } | ||
142 | |||
143 | return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; | ||
144 | } | ||
145 | |||
146 | static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) | ||
147 | { | ||
148 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | ||
149 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
150 | u32 vector; | ||
151 | |||
152 | if (index >= 0) { | ||
153 | vector = assigned_dev->guest_msix_entries[index].vector; | ||
154 | kvm_assigned_dev_raise_guest_irq(assigned_dev, vector); | ||
155 | } | ||
156 | |||
157 | return IRQ_HANDLED; | ||
158 | } | ||
159 | #endif | ||
160 | |||
161 | /* Ack the irq line for an assigned device */ | 88 | /* Ack the irq line for an assigned device */ |
162 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | 89 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) |
163 | { | 90 | { |
164 | struct kvm_assigned_dev_kernel *dev = | 91 | struct kvm_assigned_dev_kernel *dev; |
165 | container_of(kian, struct kvm_assigned_dev_kernel, | ||
166 | ack_notifier); | ||
167 | 92 | ||
168 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | 93 | if (kian->gsi == -1) |
94 | return; | ||
169 | 95 | ||
170 | spin_lock(&dev->intx_mask_lock); | 96 | dev = container_of(kian, struct kvm_assigned_dev_kernel, |
171 | 97 | ack_notifier); | |
172 | if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { | ||
173 | bool reassert = false; | ||
174 | |||
175 | spin_lock_irq(&dev->intx_lock); | ||
176 | /* | ||
177 | * The guest IRQ may be shared so this ack can come from an | ||
178 | * IRQ for another guest device. | ||
179 | */ | ||
180 | if (dev->host_irq_disabled) { | ||
181 | if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) | ||
182 | enable_irq(dev->host_irq); | ||
183 | else if (!pci_check_and_unmask_intx(dev->dev)) | ||
184 | reassert = true; | ||
185 | dev->host_irq_disabled = reassert; | ||
186 | } | ||
187 | spin_unlock_irq(&dev->intx_lock); | ||
188 | 98 | ||
189 | if (reassert) | 99 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); |
190 | kvm_set_irq(dev->kvm, dev->irq_source_id, | ||
191 | dev->guest_irq, 1); | ||
192 | } | ||
193 | 100 | ||
194 | spin_unlock(&dev->intx_mask_lock); | 101 | /* The guest irq may be shared so this ack may be |
102 | * from another device. | ||
103 | */ | ||
104 | spin_lock(&dev->intx_lock); | ||
105 | if (dev->host_irq_disabled) { | ||
106 | enable_irq(dev->host_irq); | ||
107 | dev->host_irq_disabled = false; | ||
108 | } | ||
109 | spin_unlock(&dev->intx_lock); | ||
195 | } | 110 | } |
196 | 111 | ||
197 | static void deassign_guest_irq(struct kvm *kvm, | 112 | static void deassign_guest_irq(struct kvm *kvm, |
198 | struct kvm_assigned_dev_kernel *assigned_dev) | 113 | struct kvm_assigned_dev_kernel *assigned_dev) |
199 | { | 114 | { |
200 | if (assigned_dev->ack_notifier.gsi != -1) | 115 | kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); |
201 | kvm_unregister_irq_ack_notifier(kvm, | 116 | assigned_dev->ack_notifier.gsi = -1; |
202 | &assigned_dev->ack_notifier); | ||
203 | 117 | ||
204 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | 118 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
205 | assigned_dev->guest_irq, 0); | 119 | assigned_dev->guest_irq, 0); |
@@ -231,7 +145,7 @@ static void deassign_host_irq(struct kvm *kvm, | |||
231 | 145 | ||
232 | for (i = 0; i < assigned_dev->entries_nr; i++) | 146 | for (i = 0; i < assigned_dev->entries_nr; i++) |
233 | free_irq(assigned_dev->host_msix_entries[i].vector, | 147 | free_irq(assigned_dev->host_msix_entries[i].vector, |
234 | assigned_dev); | 148 | (void *)assigned_dev); |
235 | 149 | ||
236 | assigned_dev->entries_nr = 0; | 150 | assigned_dev->entries_nr = 0; |
237 | kfree(assigned_dev->host_msix_entries); | 151 | kfree(assigned_dev->host_msix_entries); |
@@ -239,17 +153,9 @@ static void deassign_host_irq(struct kvm *kvm, | |||
239 | pci_disable_msix(assigned_dev->dev); | 153 | pci_disable_msix(assigned_dev->dev); |
240 | } else { | 154 | } else { |
241 | /* Deal with MSI and INTx */ | 155 | /* Deal with MSI and INTx */ |
242 | if ((assigned_dev->irq_requested_type & | 156 | disable_irq(assigned_dev->host_irq); |
243 | KVM_DEV_IRQ_HOST_INTX) && | 157 | |
244 | (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { | 158 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); |
245 | spin_lock_irq(&assigned_dev->intx_lock); | ||
246 | pci_intx(assigned_dev->dev, false); | ||
247 | spin_unlock_irq(&assigned_dev->intx_lock); | ||
248 | synchronize_irq(assigned_dev->host_irq); | ||
249 | } else | ||
250 | disable_irq(assigned_dev->host_irq); | ||
251 | |||
252 | free_irq(assigned_dev->host_irq, assigned_dev); | ||
253 | 159 | ||
254 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) | 160 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) |
255 | pci_disable_msi(assigned_dev->dev); | 161 | pci_disable_msi(assigned_dev->dev); |
@@ -301,8 +207,6 @@ static void kvm_free_assigned_device(struct kvm *kvm, | |||
301 | else | 207 | else |
302 | pci_restore_state(assigned_dev->dev); | 208 | pci_restore_state(assigned_dev->dev); |
303 | 209 | ||
304 | assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; | ||
305 | |||
306 | pci_release_regions(assigned_dev->dev); | 210 | pci_release_regions(assigned_dev->dev); |
307 | pci_disable_device(assigned_dev->dev); | 211 | pci_disable_device(assigned_dev->dev); |
308 | pci_dev_put(assigned_dev->dev); | 212 | pci_dev_put(assigned_dev->dev); |
@@ -328,34 +232,15 @@ void kvm_free_all_assigned_devices(struct kvm *kvm) | |||
328 | static int assigned_device_enable_host_intx(struct kvm *kvm, | 232 | static int assigned_device_enable_host_intx(struct kvm *kvm, |
329 | struct kvm_assigned_dev_kernel *dev) | 233 | struct kvm_assigned_dev_kernel *dev) |
330 | { | 234 | { |
331 | irq_handler_t irq_handler; | ||
332 | unsigned long flags; | ||
333 | |||
334 | dev->host_irq = dev->dev->irq; | 235 | dev->host_irq = dev->dev->irq; |
335 | 236 | /* Even though this is PCI, we don't want to use shared | |
336 | /* | 237 | * interrupts. Sharing host devices with guest-assigned devices |
337 | * We can only share the IRQ line with other host devices if we are | 238 | * on the same interrupt line is not a happy situation: there |
338 | * able to disable the IRQ source at device-level - independently of | 239 | * are going to be long delays in accepting, acking, etc. |
339 | * the guest driver. Otherwise host devices may suffer from unbounded | ||
340 | * IRQ latencies when the guest keeps the line asserted. | ||
341 | */ | 240 | */ |
342 | if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { | 241 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, |
343 | irq_handler = kvm_assigned_dev_intx; | 242 | IRQF_ONESHOT, dev->irq_name, (void *)dev)) |
344 | flags = IRQF_SHARED; | ||
345 | } else { | ||
346 | irq_handler = NULL; | ||
347 | flags = IRQF_ONESHOT; | ||
348 | } | ||
349 | if (request_threaded_irq(dev->host_irq, irq_handler, | ||
350 | kvm_assigned_dev_thread_intx, flags, | ||
351 | dev->irq_name, dev)) | ||
352 | return -EIO; | 243 | return -EIO; |
353 | |||
354 | if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { | ||
355 | spin_lock_irq(&dev->intx_lock); | ||
356 | pci_intx(dev->dev, true); | ||
357 | spin_unlock_irq(&dev->intx_lock); | ||
358 | } | ||
359 | return 0; | 244 | return 0; |
360 | } | 245 | } |
361 | 246 | ||
@@ -372,9 +257,8 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, | |||
372 | } | 257 | } |
373 | 258 | ||
374 | dev->host_irq = dev->dev->irq; | 259 | dev->host_irq = dev->dev->irq; |
375 | if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi, | 260 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, |
376 | kvm_assigned_dev_thread_msi, 0, | 261 | 0, dev->irq_name, (void *)dev)) { |
377 | dev->irq_name, dev)) { | ||
378 | pci_disable_msi(dev->dev); | 262 | pci_disable_msi(dev->dev); |
379 | return -EIO; | 263 | return -EIO; |
380 | } | 264 | } |
@@ -400,9 +284,8 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, | |||
400 | 284 | ||
401 | for (i = 0; i < dev->entries_nr; i++) { | 285 | for (i = 0; i < dev->entries_nr; i++) { |
402 | r = request_threaded_irq(dev->host_msix_entries[i].vector, | 286 | r = request_threaded_irq(dev->host_msix_entries[i].vector, |
403 | kvm_assigned_dev_msix, | 287 | NULL, kvm_assigned_dev_thread, |
404 | kvm_assigned_dev_thread_msix, | 288 | 0, dev->irq_name, (void *)dev); |
405 | 0, dev->irq_name, dev); | ||
406 | if (r) | 289 | if (r) |
407 | goto err; | 290 | goto err; |
408 | } | 291 | } |
@@ -410,7 +293,7 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, | |||
410 | return 0; | 293 | return 0; |
411 | err: | 294 | err: |
412 | for (i -= 1; i >= 0; i--) | 295 | for (i -= 1; i >= 0; i--) |
413 | free_irq(dev->host_msix_entries[i].vector, dev); | 296 | free_irq(dev->host_msix_entries[i].vector, (void *)dev); |
414 | pci_disable_msix(dev->dev); | 297 | pci_disable_msix(dev->dev); |
415 | return r; | 298 | return r; |
416 | } | 299 | } |
@@ -433,6 +316,7 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm, | |||
433 | { | 316 | { |
434 | dev->guest_irq = irq->guest_irq; | 317 | dev->guest_irq = irq->guest_irq; |
435 | dev->ack_notifier.gsi = -1; | 318 | dev->ack_notifier.gsi = -1; |
319 | dev->host_irq_disabled = false; | ||
436 | return 0; | 320 | return 0; |
437 | } | 321 | } |
438 | #endif | 322 | #endif |
@@ -444,6 +328,7 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm, | |||
444 | { | 328 | { |
445 | dev->guest_irq = irq->guest_irq; | 329 | dev->guest_irq = irq->guest_irq; |
446 | dev->ack_notifier.gsi = -1; | 330 | dev->ack_notifier.gsi = -1; |
331 | dev->host_irq_disabled = false; | ||
447 | return 0; | 332 | return 0; |
448 | } | 333 | } |
449 | #endif | 334 | #endif |
@@ -477,7 +362,6 @@ static int assign_host_irq(struct kvm *kvm, | |||
477 | default: | 362 | default: |
478 | r = -EINVAL; | 363 | r = -EINVAL; |
479 | } | 364 | } |
480 | dev->host_irq_disabled = false; | ||
481 | 365 | ||
482 | if (!r) | 366 | if (!r) |
483 | dev->irq_requested_type |= host_irq_type; | 367 | dev->irq_requested_type |= host_irq_type; |
@@ -522,8 +406,7 @@ static int assign_guest_irq(struct kvm *kvm, | |||
522 | 406 | ||
523 | if (!r) { | 407 | if (!r) { |
524 | dev->irq_requested_type |= guest_irq_type; | 408 | dev->irq_requested_type |= guest_irq_type; |
525 | if (dev->ack_notifier.gsi != -1) | 409 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); |
526 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | ||
527 | } else | 410 | } else |
528 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | 411 | kvm_free_irq_source_id(kvm, dev->irq_source_id); |
529 | 412 | ||
@@ -579,7 +462,6 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | |||
579 | { | 462 | { |
580 | int r = -ENODEV; | 463 | int r = -ENODEV; |
581 | struct kvm_assigned_dev_kernel *match; | 464 | struct kvm_assigned_dev_kernel *match; |
582 | unsigned long irq_type; | ||
583 | 465 | ||
584 | mutex_lock(&kvm->lock); | 466 | mutex_lock(&kvm->lock); |
585 | 467 | ||
@@ -588,9 +470,7 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | |||
588 | if (!match) | 470 | if (!match) |
589 | goto out; | 471 | goto out; |
590 | 472 | ||
591 | irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK | | 473 | r = kvm_deassign_irq(kvm, match, assigned_irq->flags); |
592 | KVM_DEV_IRQ_GUEST_MASK); | ||
593 | r = kvm_deassign_irq(kvm, match, irq_type); | ||
594 | out: | 474 | out: |
595 | mutex_unlock(&kvm->lock); | 475 | mutex_unlock(&kvm->lock); |
596 | return r; | 476 | return r; |
@@ -662,6 +542,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
662 | int r = 0, idx; | 542 | int r = 0, idx; |
663 | struct kvm_assigned_dev_kernel *match; | 543 | struct kvm_assigned_dev_kernel *match; |
664 | struct pci_dev *dev; | 544 | struct pci_dev *dev; |
545 | u8 header_type; | ||
665 | 546 | ||
666 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) | 547 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) |
667 | return -EINVAL; | 548 | return -EINVAL; |
@@ -694,7 +575,8 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
694 | } | 575 | } |
695 | 576 | ||
696 | /* Don't allow bridges to be assigned */ | 577 | /* Don't allow bridges to be assigned */ |
697 | if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) { | 578 | pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); |
579 | if ((header_type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) { | ||
698 | r = -EPERM; | 580 | r = -EPERM; |
699 | goto out_put; | 581 | goto out_put; |
700 | } | 582 | } |
@@ -721,10 +603,6 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
721 | if (!match->pci_saved_state) | 603 | if (!match->pci_saved_state) |
722 | printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", | 604 | printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", |
723 | __func__, dev_name(&dev->dev)); | 605 | __func__, dev_name(&dev->dev)); |
724 | |||
725 | if (!pci_intx_mask_supported(dev)) | ||
726 | assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3; | ||
727 | |||
728 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | 606 | match->assigned_dev_id = assigned_dev->assigned_dev_id; |
729 | match->host_segnr = assigned_dev->segnr; | 607 | match->host_segnr = assigned_dev->segnr; |
730 | match->host_busnr = assigned_dev->busnr; | 608 | match->host_busnr = assigned_dev->busnr; |
@@ -732,7 +610,6 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
732 | match->flags = assigned_dev->flags; | 610 | match->flags = assigned_dev->flags; |
733 | match->dev = dev; | 611 | match->dev = dev; |
734 | spin_lock_init(&match->intx_lock); | 612 | spin_lock_init(&match->intx_lock); |
735 | spin_lock_init(&match->intx_mask_lock); | ||
736 | match->irq_source_id = -1; | 613 | match->irq_source_id = -1; |
737 | match->kvm = kvm; | 614 | match->kvm = kvm; |
738 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | 615 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; |
@@ -878,55 +755,6 @@ msix_entry_out: | |||
878 | } | 755 | } |
879 | #endif | 756 | #endif |
880 | 757 | ||
881 | static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, | ||
882 | struct kvm_assigned_pci_dev *assigned_dev) | ||
883 | { | ||
884 | int r = 0; | ||
885 | struct kvm_assigned_dev_kernel *match; | ||
886 | |||
887 | mutex_lock(&kvm->lock); | ||
888 | |||
889 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
890 | assigned_dev->assigned_dev_id); | ||
891 | if (!match) { | ||
892 | r = -ENODEV; | ||
893 | goto out; | ||
894 | } | ||
895 | |||
896 | spin_lock(&match->intx_mask_lock); | ||
897 | |||
898 | match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; | ||
899 | match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; | ||
900 | |||
901 | if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { | ||
902 | if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { | ||
903 | kvm_set_irq(match->kvm, match->irq_source_id, | ||
904 | match->guest_irq, 0); | ||
905 | /* | ||
906 | * Masking at hardware-level is performed on demand, | ||
907 | * i.e. when an IRQ actually arrives at the host. | ||
908 | */ | ||
909 | } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { | ||
910 | /* | ||
911 | * Unmask the IRQ line if required. Unmasking at | ||
912 | * device level will be performed by user space. | ||
913 | */ | ||
914 | spin_lock_irq(&match->intx_lock); | ||
915 | if (match->host_irq_disabled) { | ||
916 | enable_irq(match->host_irq); | ||
917 | match->host_irq_disabled = false; | ||
918 | } | ||
919 | spin_unlock_irq(&match->intx_lock); | ||
920 | } | ||
921 | } | ||
922 | |||
923 | spin_unlock(&match->intx_mask_lock); | ||
924 | |||
925 | out: | ||
926 | mutex_unlock(&kvm->lock); | ||
927 | return r; | ||
928 | } | ||
929 | |||
930 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | 758 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, |
931 | unsigned long arg) | 759 | unsigned long arg) |
932 | { | 760 | { |
@@ -1034,15 +862,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
1034 | break; | 862 | break; |
1035 | } | 863 | } |
1036 | #endif | 864 | #endif |
1037 | case KVM_ASSIGN_SET_INTX_MASK: { | ||
1038 | struct kvm_assigned_pci_dev assigned_dev; | ||
1039 | |||
1040 | r = -EFAULT; | ||
1041 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
1042 | goto out; | ||
1043 | r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev); | ||
1044 | break; | ||
1045 | } | ||
1046 | default: | 865 | default: |
1047 | r = -ENOTTY; | 866 | r = -ENOTTY; |
1048 | break; | 867 | break; |
@@ -1050,3 +869,4 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
1050 | out: | 869 | out: |
1051 | return r; | 870 | return r; |
1052 | } | 871 | } |
872 | |||
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index ea475cd0351..74268b4c2ee 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c | |||
@@ -111,8 +111,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) | |||
111 | list_entry(vcpu->async_pf.done.next, | 111 | list_entry(vcpu->async_pf.done.next, |
112 | typeof(*work), link); | 112 | typeof(*work), link); |
113 | list_del(&work->link); | 113 | list_del(&work->link); |
114 | if (!is_error_page(work->page)) | 114 | if (work->page) |
115 | kvm_release_page_clean(work->page); | 115 | put_page(work->page); |
116 | kmem_cache_free(async_pf_cache, work); | 116 | kmem_cache_free(async_pf_cache, work); |
117 | } | 117 | } |
118 | spin_unlock(&vcpu->async_pf.lock); | 118 | spin_unlock(&vcpu->async_pf.lock); |
@@ -138,8 +138,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) | |||
138 | 138 | ||
139 | list_del(&work->queue); | 139 | list_del(&work->queue); |
140 | vcpu->async_pf.queued--; | 140 | vcpu->async_pf.queued--; |
141 | if (!is_error_page(work->page)) | 141 | if (work->page) |
142 | kvm_release_page_clean(work->page); | 142 | put_page(work->page); |
143 | kmem_cache_free(async_pf_cache, work); | 143 | kmem_cache_free(async_pf_cache, work); |
144 | } | 144 | } |
145 | } | 145 | } |
@@ -203,7 +203,8 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) | |||
203 | if (!work) | 203 | if (!work) |
204 | return -ENOMEM; | 204 | return -ENOMEM; |
205 | 205 | ||
206 | work->page = KVM_ERR_PTR_BAD_PAGE; | 206 | work->page = bad_page; |
207 | get_page(bad_page); | ||
207 | INIT_LIST_HEAD(&work->queue); /* for list_del to work */ | 208 | INIT_LIST_HEAD(&work->queue); /* for list_del to work */ |
208 | 209 | ||
209 | spin_lock(&vcpu->async_pf.lock); | 210 | spin_lock(&vcpu->async_pf.lock); |
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 88b2fe3ddf4..fc8487564d1 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c | |||
@@ -24,25 +24,10 @@ static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev) | |||
24 | static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, | 24 | static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, |
25 | gpa_t addr, int len) | 25 | gpa_t addr, int len) |
26 | { | 26 | { |
27 | /* is it in a batchable area ? | 27 | struct kvm_coalesced_mmio_zone *zone; |
28 | * (addr,len) is fully included in | ||
29 | * (zone->addr, zone->size) | ||
30 | */ | ||
31 | if (len < 0) | ||
32 | return 0; | ||
33 | if (addr + len < addr) | ||
34 | return 0; | ||
35 | if (addr < dev->zone.addr) | ||
36 | return 0; | ||
37 | if (addr + len > dev->zone.addr + dev->zone.size) | ||
38 | return 0; | ||
39 | return 1; | ||
40 | } | ||
41 | |||
42 | static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) | ||
43 | { | ||
44 | struct kvm_coalesced_mmio_ring *ring; | 28 | struct kvm_coalesced_mmio_ring *ring; |
45 | unsigned avail; | 29 | unsigned avail; |
30 | int i; | ||
46 | 31 | ||
47 | /* Are we able to batch it ? */ | 32 | /* Are we able to batch it ? */ |
48 | 33 | ||
@@ -52,12 +37,25 @@ static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) | |||
52 | */ | 37 | */ |
53 | ring = dev->kvm->coalesced_mmio_ring; | 38 | ring = dev->kvm->coalesced_mmio_ring; |
54 | avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX; | 39 | avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX; |
55 | if (avail == 0) { | 40 | if (avail < KVM_MAX_VCPUS) { |
56 | /* full */ | 41 | /* full */ |
57 | return 0; | 42 | return 0; |
58 | } | 43 | } |
59 | 44 | ||
60 | return 1; | 45 | /* is it in a batchable area ? */ |
46 | |||
47 | for (i = 0; i < dev->nb_zones; i++) { | ||
48 | zone = &dev->zone[i]; | ||
49 | |||
50 | /* (addr,len) is fully included in | ||
51 | * (zone->addr, zone->size) | ||
52 | */ | ||
53 | |||
54 | if (zone->addr <= addr && | ||
55 | addr + len <= zone->addr + zone->size) | ||
56 | return 1; | ||
57 | } | ||
58 | return 0; | ||
61 | } | 59 | } |
62 | 60 | ||
63 | static int coalesced_mmio_write(struct kvm_io_device *this, | 61 | static int coalesced_mmio_write(struct kvm_io_device *this, |
@@ -65,16 +63,10 @@ static int coalesced_mmio_write(struct kvm_io_device *this, | |||
65 | { | 63 | { |
66 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); | 64 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); |
67 | struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; | 65 | struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; |
68 | |||
69 | if (!coalesced_mmio_in_range(dev, addr, len)) | 66 | if (!coalesced_mmio_in_range(dev, addr, len)) |
70 | return -EOPNOTSUPP; | 67 | return -EOPNOTSUPP; |
71 | 68 | ||
72 | spin_lock(&dev->kvm->ring_lock); | 69 | spin_lock(&dev->lock); |
73 | |||
74 | if (!coalesced_mmio_has_room(dev)) { | ||
75 | spin_unlock(&dev->kvm->ring_lock); | ||
76 | return -EOPNOTSUPP; | ||
77 | } | ||
78 | 70 | ||
79 | /* copy data in first free entry of the ring */ | 71 | /* copy data in first free entry of the ring */ |
80 | 72 | ||
@@ -83,7 +75,7 @@ static int coalesced_mmio_write(struct kvm_io_device *this, | |||
83 | memcpy(ring->coalesced_mmio[ring->last].data, val, len); | 75 | memcpy(ring->coalesced_mmio[ring->last].data, val, len); |
84 | smp_wmb(); | 76 | smp_wmb(); |
85 | ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; | 77 | ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; |
86 | spin_unlock(&dev->kvm->ring_lock); | 78 | spin_unlock(&dev->lock); |
87 | return 0; | 79 | return 0; |
88 | } | 80 | } |
89 | 81 | ||
@@ -91,8 +83,6 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this) | |||
91 | { | 83 | { |
92 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); | 84 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); |
93 | 85 | ||
94 | list_del(&dev->list); | ||
95 | |||
96 | kfree(dev); | 86 | kfree(dev); |
97 | } | 87 | } |
98 | 88 | ||
@@ -103,6 +93,7 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = { | |||
103 | 93 | ||
104 | int kvm_coalesced_mmio_init(struct kvm *kvm) | 94 | int kvm_coalesced_mmio_init(struct kvm *kvm) |
105 | { | 95 | { |
96 | struct kvm_coalesced_mmio_dev *dev; | ||
106 | struct page *page; | 97 | struct page *page; |
107 | int ret; | 98 | int ret; |
108 | 99 | ||
@@ -110,18 +101,31 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) | |||
110 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 101 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
111 | if (!page) | 102 | if (!page) |
112 | goto out_err; | 103 | goto out_err; |
113 | |||
114 | ret = 0; | ||
115 | kvm->coalesced_mmio_ring = page_address(page); | 104 | kvm->coalesced_mmio_ring = page_address(page); |
116 | 105 | ||
117 | /* | 106 | ret = -ENOMEM; |
118 | * We're using this spinlock to sync access to the coalesced ring. | 107 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); |
119 | * The list doesn't need it's own lock since device registration and | 108 | if (!dev) |
120 | * unregistration should only happen when kvm->slots_lock is held. | 109 | goto out_free_page; |
121 | */ | 110 | spin_lock_init(&dev->lock); |
122 | spin_lock_init(&kvm->ring_lock); | 111 | kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); |
123 | INIT_LIST_HEAD(&kvm->coalesced_zones); | 112 | dev->kvm = kvm; |
113 | kvm->coalesced_mmio_dev = dev; | ||
114 | |||
115 | mutex_lock(&kvm->slots_lock); | ||
116 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev); | ||
117 | mutex_unlock(&kvm->slots_lock); | ||
118 | if (ret < 0) | ||
119 | goto out_free_dev; | ||
124 | 120 | ||
121 | return ret; | ||
122 | |||
123 | out_free_dev: | ||
124 | kvm->coalesced_mmio_dev = NULL; | ||
125 | kfree(dev); | ||
126 | out_free_page: | ||
127 | kvm->coalesced_mmio_ring = NULL; | ||
128 | __free_page(page); | ||
125 | out_err: | 129 | out_err: |
126 | return ret; | 130 | return ret; |
127 | } | 131 | } |
@@ -135,50 +139,51 @@ void kvm_coalesced_mmio_free(struct kvm *kvm) | |||
135 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | 139 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, |
136 | struct kvm_coalesced_mmio_zone *zone) | 140 | struct kvm_coalesced_mmio_zone *zone) |
137 | { | 141 | { |
138 | int ret; | 142 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; |
139 | struct kvm_coalesced_mmio_dev *dev; | ||
140 | |||
141 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); | ||
142 | if (!dev) | ||
143 | return -ENOMEM; | ||
144 | 143 | ||
145 | kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); | 144 | if (dev == NULL) |
146 | dev->kvm = kvm; | 145 | return -ENXIO; |
147 | dev->zone = *zone; | ||
148 | 146 | ||
149 | mutex_lock(&kvm->slots_lock); | 147 | mutex_lock(&kvm->slots_lock); |
150 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr, | 148 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { |
151 | zone->size, &dev->dev); | 149 | mutex_unlock(&kvm->slots_lock); |
152 | if (ret < 0) | 150 | return -ENOBUFS; |
153 | goto out_free_dev; | 151 | } |
154 | list_add_tail(&dev->list, &kvm->coalesced_zones); | ||
155 | mutex_unlock(&kvm->slots_lock); | ||
156 | 152 | ||
157 | return ret; | 153 | dev->zone[dev->nb_zones] = *zone; |
154 | dev->nb_zones++; | ||
158 | 155 | ||
159 | out_free_dev: | ||
160 | mutex_unlock(&kvm->slots_lock); | 156 | mutex_unlock(&kvm->slots_lock); |
161 | |||
162 | kfree(dev); | ||
163 | |||
164 | if (dev == NULL) | ||
165 | return -ENXIO; | ||
166 | |||
167 | return 0; | 157 | return 0; |
168 | } | 158 | } |
169 | 159 | ||
170 | int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | 160 | int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, |
171 | struct kvm_coalesced_mmio_zone *zone) | 161 | struct kvm_coalesced_mmio_zone *zone) |
172 | { | 162 | { |
173 | struct kvm_coalesced_mmio_dev *dev, *tmp; | 163 | int i; |
164 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; | ||
165 | struct kvm_coalesced_mmio_zone *z; | ||
166 | |||
167 | if (dev == NULL) | ||
168 | return -ENXIO; | ||
174 | 169 | ||
175 | mutex_lock(&kvm->slots_lock); | 170 | mutex_lock(&kvm->slots_lock); |
176 | 171 | ||
177 | list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list) | 172 | i = dev->nb_zones; |
178 | if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) { | 173 | while (i) { |
179 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dev->dev); | 174 | z = &dev->zone[i - 1]; |
180 | kvm_iodevice_destructor(&dev->dev); | 175 | |
176 | /* unregister all zones | ||
177 | * included in (zone->addr, zone->size) | ||
178 | */ | ||
179 | |||
180 | if (zone->addr <= z->addr && | ||
181 | z->addr + z->size <= zone->addr + zone->size) { | ||
182 | dev->nb_zones--; | ||
183 | *z = dev->zone[dev->nb_zones]; | ||
181 | } | 184 | } |
185 | i--; | ||
186 | } | ||
182 | 187 | ||
183 | mutex_unlock(&kvm->slots_lock); | 188 | mutex_unlock(&kvm->slots_lock); |
184 | 189 | ||
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index b280c20444d..8a5959e3535 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h | |||
@@ -12,13 +12,14 @@ | |||
12 | 12 | ||
13 | #ifdef CONFIG_KVM_MMIO | 13 | #ifdef CONFIG_KVM_MMIO |
14 | 14 | ||
15 | #include <linux/list.h> | 15 | #define KVM_COALESCED_MMIO_ZONE_MAX 100 |
16 | 16 | ||
17 | struct kvm_coalesced_mmio_dev { | 17 | struct kvm_coalesced_mmio_dev { |
18 | struct list_head list; | ||
19 | struct kvm_io_device dev; | 18 | struct kvm_io_device dev; |
20 | struct kvm *kvm; | 19 | struct kvm *kvm; |
21 | struct kvm_coalesced_mmio_zone zone; | 20 | spinlock_t lock; |
21 | int nb_zones; | ||
22 | struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX]; | ||
22 | }; | 23 | }; |
23 | 24 | ||
24 | int kvm_coalesced_mmio_init(struct kvm *kvm); | 25 | int kvm_coalesced_mmio_init(struct kvm *kvm); |
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b6eea5cc7b3..73358d256fa 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -35,7 +35,6 @@ | |||
35 | 35 | ||
36 | #include "iodev.h" | 36 | #include "iodev.h" |
37 | 37 | ||
38 | #ifdef __KVM_HAVE_IOAPIC | ||
39 | /* | 38 | /* |
40 | * -------------------------------------------------------------------- | 39 | * -------------------------------------------------------------------- |
41 | * irqfd: Allows an fd to be used to inject an interrupt to the guest | 40 | * irqfd: Allows an fd to be used to inject an interrupt to the guest |
@@ -44,31 +43,6 @@ | |||
44 | * -------------------------------------------------------------------- | 43 | * -------------------------------------------------------------------- |
45 | */ | 44 | */ |
46 | 45 | ||
47 | /* | ||
48 | * Resampling irqfds are a special variety of irqfds used to emulate | ||
49 | * level triggered interrupts. The interrupt is asserted on eventfd | ||
50 | * trigger. On acknowledgement through the irq ack notifier, the | ||
51 | * interrupt is de-asserted and userspace is notified through the | ||
52 | * resamplefd. All resamplers on the same gsi are de-asserted | ||
53 | * together, so we don't need to track the state of each individual | ||
54 | * user. We can also therefore share the same irq source ID. | ||
55 | */ | ||
56 | struct _irqfd_resampler { | ||
57 | struct kvm *kvm; | ||
58 | /* | ||
59 | * List of resampling struct _irqfd objects sharing this gsi. | ||
60 | * RCU list modified under kvm->irqfds.resampler_lock | ||
61 | */ | ||
62 | struct list_head list; | ||
63 | struct kvm_irq_ack_notifier notifier; | ||
64 | /* | ||
65 | * Entry in list of kvm->irqfd.resampler_list. Use for sharing | ||
66 | * resamplers among irqfds on the same gsi. | ||
67 | * Accessed and modified under kvm->irqfds.resampler_lock | ||
68 | */ | ||
69 | struct list_head link; | ||
70 | }; | ||
71 | |||
72 | struct _irqfd { | 46 | struct _irqfd { |
73 | /* Used for MSI fast-path */ | 47 | /* Used for MSI fast-path */ |
74 | struct kvm *kvm; | 48 | struct kvm *kvm; |
@@ -78,12 +52,6 @@ struct _irqfd { | |||
78 | /* Used for level IRQ fast-path */ | 52 | /* Used for level IRQ fast-path */ |
79 | int gsi; | 53 | int gsi; |
80 | struct work_struct inject; | 54 | struct work_struct inject; |
81 | /* The resampler used by this irqfd (resampler-only) */ | ||
82 | struct _irqfd_resampler *resampler; | ||
83 | /* Eventfd notified on resample (resampler-only) */ | ||
84 | struct eventfd_ctx *resamplefd; | ||
85 | /* Entry in list of irqfds for a resampler (resampler-only) */ | ||
86 | struct list_head resampler_link; | ||
87 | /* Used for setup/shutdown */ | 55 | /* Used for setup/shutdown */ |
88 | struct eventfd_ctx *eventfd; | 56 | struct eventfd_ctx *eventfd; |
89 | struct list_head list; | 57 | struct list_head list; |
@@ -99,58 +67,8 @@ irqfd_inject(struct work_struct *work) | |||
99 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); | 67 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); |
100 | struct kvm *kvm = irqfd->kvm; | 68 | struct kvm *kvm = irqfd->kvm; |
101 | 69 | ||
102 | if (!irqfd->resampler) { | 70 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); |
103 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); | 71 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); |
104 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); | ||
105 | } else | ||
106 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | ||
107 | irqfd->gsi, 1); | ||
108 | } | ||
109 | |||
110 | /* | ||
111 | * Since resampler irqfds share an IRQ source ID, we de-assert once | ||
112 | * then notify all of the resampler irqfds using this GSI. We can't | ||
113 | * do multiple de-asserts or we risk racing with incoming re-asserts. | ||
114 | */ | ||
115 | static void | ||
116 | irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) | ||
117 | { | ||
118 | struct _irqfd_resampler *resampler; | ||
119 | struct _irqfd *irqfd; | ||
120 | |||
121 | resampler = container_of(kian, struct _irqfd_resampler, notifier); | ||
122 | |||
123 | kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | ||
124 | resampler->notifier.gsi, 0); | ||
125 | |||
126 | rcu_read_lock(); | ||
127 | |||
128 | list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) | ||
129 | eventfd_signal(irqfd->resamplefd, 1); | ||
130 | |||
131 | rcu_read_unlock(); | ||
132 | } | ||
133 | |||
134 | static void | ||
135 | irqfd_resampler_shutdown(struct _irqfd *irqfd) | ||
136 | { | ||
137 | struct _irqfd_resampler *resampler = irqfd->resampler; | ||
138 | struct kvm *kvm = resampler->kvm; | ||
139 | |||
140 | mutex_lock(&kvm->irqfds.resampler_lock); | ||
141 | |||
142 | list_del_rcu(&irqfd->resampler_link); | ||
143 | synchronize_rcu(); | ||
144 | |||
145 | if (list_empty(&resampler->list)) { | ||
146 | list_del(&resampler->link); | ||
147 | kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); | ||
148 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | ||
149 | resampler->notifier.gsi, 0); | ||
150 | kfree(resampler); | ||
151 | } | ||
152 | |||
153 | mutex_unlock(&kvm->irqfds.resampler_lock); | ||
154 | } | 72 | } |
155 | 73 | ||
156 | /* | 74 | /* |
@@ -172,12 +90,7 @@ irqfd_shutdown(struct work_struct *work) | |||
172 | * We know no new events will be scheduled at this point, so block | 90 | * We know no new events will be scheduled at this point, so block |
173 | * until all previously outstanding events have completed | 91 | * until all previously outstanding events have completed |
174 | */ | 92 | */ |
175 | flush_work(&irqfd->inject); | 93 | flush_work_sync(&irqfd->inject); |
176 | |||
177 | if (irqfd->resampler) { | ||
178 | irqfd_resampler_shutdown(irqfd); | ||
179 | eventfd_ctx_put(irqfd->resamplefd); | ||
180 | } | ||
181 | 94 | ||
182 | /* | 95 | /* |
183 | * It is now safe to release the object's resources | 96 | * It is now safe to release the object's resources |
@@ -285,12 +198,12 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, | |||
285 | } | 198 | } |
286 | 199 | ||
287 | static int | 200 | static int |
288 | kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | 201 | kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) |
289 | { | 202 | { |
290 | struct kvm_irq_routing_table *irq_rt; | 203 | struct kvm_irq_routing_table *irq_rt; |
291 | struct _irqfd *irqfd, *tmp; | 204 | struct _irqfd *irqfd, *tmp; |
292 | struct file *file = NULL; | 205 | struct file *file = NULL; |
293 | struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; | 206 | struct eventfd_ctx *eventfd = NULL; |
294 | int ret; | 207 | int ret; |
295 | unsigned int events; | 208 | unsigned int events; |
296 | 209 | ||
@@ -299,12 +212,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
299 | return -ENOMEM; | 212 | return -ENOMEM; |
300 | 213 | ||
301 | irqfd->kvm = kvm; | 214 | irqfd->kvm = kvm; |
302 | irqfd->gsi = args->gsi; | 215 | irqfd->gsi = gsi; |
303 | INIT_LIST_HEAD(&irqfd->list); | 216 | INIT_LIST_HEAD(&irqfd->list); |
304 | INIT_WORK(&irqfd->inject, irqfd_inject); | 217 | INIT_WORK(&irqfd->inject, irqfd_inject); |
305 | INIT_WORK(&irqfd->shutdown, irqfd_shutdown); | 218 | INIT_WORK(&irqfd->shutdown, irqfd_shutdown); |
306 | 219 | ||
307 | file = eventfd_fget(args->fd); | 220 | file = eventfd_fget(fd); |
308 | if (IS_ERR(file)) { | 221 | if (IS_ERR(file)) { |
309 | ret = PTR_ERR(file); | 222 | ret = PTR_ERR(file); |
310 | goto fail; | 223 | goto fail; |
@@ -318,54 +231,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
318 | 231 | ||
319 | irqfd->eventfd = eventfd; | 232 | irqfd->eventfd = eventfd; |
320 | 233 | ||
321 | if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { | ||
322 | struct _irqfd_resampler *resampler; | ||
323 | |||
324 | resamplefd = eventfd_ctx_fdget(args->resamplefd); | ||
325 | if (IS_ERR(resamplefd)) { | ||
326 | ret = PTR_ERR(resamplefd); | ||
327 | goto fail; | ||
328 | } | ||
329 | |||
330 | irqfd->resamplefd = resamplefd; | ||
331 | INIT_LIST_HEAD(&irqfd->resampler_link); | ||
332 | |||
333 | mutex_lock(&kvm->irqfds.resampler_lock); | ||
334 | |||
335 | list_for_each_entry(resampler, | ||
336 | &kvm->irqfds.resampler_list, link) { | ||
337 | if (resampler->notifier.gsi == irqfd->gsi) { | ||
338 | irqfd->resampler = resampler; | ||
339 | break; | ||
340 | } | ||
341 | } | ||
342 | |||
343 | if (!irqfd->resampler) { | ||
344 | resampler = kzalloc(sizeof(*resampler), GFP_KERNEL); | ||
345 | if (!resampler) { | ||
346 | ret = -ENOMEM; | ||
347 | mutex_unlock(&kvm->irqfds.resampler_lock); | ||
348 | goto fail; | ||
349 | } | ||
350 | |||
351 | resampler->kvm = kvm; | ||
352 | INIT_LIST_HEAD(&resampler->list); | ||
353 | resampler->notifier.gsi = irqfd->gsi; | ||
354 | resampler->notifier.irq_acked = irqfd_resampler_ack; | ||
355 | INIT_LIST_HEAD(&resampler->link); | ||
356 | |||
357 | list_add(&resampler->link, &kvm->irqfds.resampler_list); | ||
358 | kvm_register_irq_ack_notifier(kvm, | ||
359 | &resampler->notifier); | ||
360 | irqfd->resampler = resampler; | ||
361 | } | ||
362 | |||
363 | list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); | ||
364 | synchronize_rcu(); | ||
365 | |||
366 | mutex_unlock(&kvm->irqfds.resampler_lock); | ||
367 | } | ||
368 | |||
369 | /* | 234 | /* |
370 | * Install our own custom wake-up handling so we are notified via | 235 | * Install our own custom wake-up handling so we are notified via |
371 | * a callback whenever someone signals the underlying eventfd | 236 | * a callback whenever someone signals the underlying eventfd |
@@ -411,12 +276,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
411 | return 0; | 276 | return 0; |
412 | 277 | ||
413 | fail: | 278 | fail: |
414 | if (irqfd->resampler) | ||
415 | irqfd_resampler_shutdown(irqfd); | ||
416 | |||
417 | if (resamplefd && !IS_ERR(resamplefd)) | ||
418 | eventfd_ctx_put(resamplefd); | ||
419 | |||
420 | if (eventfd && !IS_ERR(eventfd)) | 279 | if (eventfd && !IS_ERR(eventfd)) |
421 | eventfd_ctx_put(eventfd); | 280 | eventfd_ctx_put(eventfd); |
422 | 281 | ||
@@ -426,38 +285,32 @@ fail: | |||
426 | kfree(irqfd); | 285 | kfree(irqfd); |
427 | return ret; | 286 | return ret; |
428 | } | 287 | } |
429 | #endif | ||
430 | 288 | ||
431 | void | 289 | void |
432 | kvm_eventfd_init(struct kvm *kvm) | 290 | kvm_eventfd_init(struct kvm *kvm) |
433 | { | 291 | { |
434 | #ifdef __KVM_HAVE_IOAPIC | ||
435 | spin_lock_init(&kvm->irqfds.lock); | 292 | spin_lock_init(&kvm->irqfds.lock); |
436 | INIT_LIST_HEAD(&kvm->irqfds.items); | 293 | INIT_LIST_HEAD(&kvm->irqfds.items); |
437 | INIT_LIST_HEAD(&kvm->irqfds.resampler_list); | ||
438 | mutex_init(&kvm->irqfds.resampler_lock); | ||
439 | #endif | ||
440 | INIT_LIST_HEAD(&kvm->ioeventfds); | 294 | INIT_LIST_HEAD(&kvm->ioeventfds); |
441 | } | 295 | } |
442 | 296 | ||
443 | #ifdef __KVM_HAVE_IOAPIC | ||
444 | /* | 297 | /* |
445 | * shutdown any irqfd's that match fd+gsi | 298 | * shutdown any irqfd's that match fd+gsi |
446 | */ | 299 | */ |
447 | static int | 300 | static int |
448 | kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) | 301 | kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) |
449 | { | 302 | { |
450 | struct _irqfd *irqfd, *tmp; | 303 | struct _irqfd *irqfd, *tmp; |
451 | struct eventfd_ctx *eventfd; | 304 | struct eventfd_ctx *eventfd; |
452 | 305 | ||
453 | eventfd = eventfd_ctx_fdget(args->fd); | 306 | eventfd = eventfd_ctx_fdget(fd); |
454 | if (IS_ERR(eventfd)) | 307 | if (IS_ERR(eventfd)) |
455 | return PTR_ERR(eventfd); | 308 | return PTR_ERR(eventfd); |
456 | 309 | ||
457 | spin_lock_irq(&kvm->irqfds.lock); | 310 | spin_lock_irq(&kvm->irqfds.lock); |
458 | 311 | ||
459 | list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { | 312 | list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { |
460 | if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { | 313 | if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) { |
461 | /* | 314 | /* |
462 | * This rcu_assign_pointer is needed for when | 315 | * This rcu_assign_pointer is needed for when |
463 | * another thread calls kvm_irq_routing_update before | 316 | * another thread calls kvm_irq_routing_update before |
@@ -485,15 +338,12 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) | |||
485 | } | 338 | } |
486 | 339 | ||
487 | int | 340 | int |
488 | kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) | 341 | kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) |
489 | { | 342 | { |
490 | if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE)) | 343 | if (flags & KVM_IRQFD_FLAG_DEASSIGN) |
491 | return -EINVAL; | 344 | return kvm_irqfd_deassign(kvm, fd, gsi); |
492 | |||
493 | if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) | ||
494 | return kvm_irqfd_deassign(kvm, args); | ||
495 | 345 | ||
496 | return kvm_irqfd_assign(kvm, args); | 346 | return kvm_irqfd_assign(kvm, fd, gsi); |
497 | } | 347 | } |
498 | 348 | ||
499 | /* | 349 | /* |
@@ -560,7 +410,6 @@ static void __exit irqfd_module_exit(void) | |||
560 | 410 | ||
561 | module_init(irqfd_module_init); | 411 | module_init(irqfd_module_init); |
562 | module_exit(irqfd_module_exit); | 412 | module_exit(irqfd_module_exit); |
563 | #endif | ||
564 | 413 | ||
565 | /* | 414 | /* |
566 | * -------------------------------------------------------------------- | 415 | * -------------------------------------------------------------------- |
@@ -737,8 +586,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
737 | 586 | ||
738 | kvm_iodevice_init(&p->dev, &ioeventfd_ops); | 587 | kvm_iodevice_init(&p->dev, &ioeventfd_ops); |
739 | 588 | ||
740 | ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length, | 589 | ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev); |
741 | &p->dev); | ||
742 | if (ret < 0) | 590 | if (ret < 0) |
743 | goto unlock_fail; | 591 | goto unlock_fail; |
744 | 592 | ||
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index cfb7e4d52dc..8df1ca104a7 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
@@ -185,56 +185,42 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) | |||
185 | irqe.dest_mode = 0; /* Physical mode. */ | 185 | irqe.dest_mode = 0; /* Physical mode. */ |
186 | /* need to read apic_id from apic regiest since | 186 | /* need to read apic_id from apic regiest since |
187 | * it can be rewritten */ | 187 | * it can be rewritten */ |
188 | irqe.dest_id = ioapic->kvm->bsp_vcpu_id; | 188 | irqe.dest_id = ioapic->kvm->bsp_vcpu->vcpu_id; |
189 | } | 189 | } |
190 | #endif | 190 | #endif |
191 | return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); | 191 | return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); |
192 | } | 192 | } |
193 | 193 | ||
194 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, | 194 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) |
195 | int level) | ||
196 | { | 195 | { |
197 | u32 old_irr; | 196 | u32 old_irr; |
198 | u32 mask = 1 << irq; | 197 | u32 mask = 1 << irq; |
199 | union kvm_ioapic_redirect_entry entry; | 198 | union kvm_ioapic_redirect_entry entry; |
200 | int ret, irq_level; | 199 | int ret = 1; |
201 | |||
202 | BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); | ||
203 | 200 | ||
204 | spin_lock(&ioapic->lock); | 201 | spin_lock(&ioapic->lock); |
205 | old_irr = ioapic->irr; | 202 | old_irr = ioapic->irr; |
206 | irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], | 203 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { |
207 | irq_source_id, level); | 204 | entry = ioapic->redirtbl[irq]; |
208 | entry = ioapic->redirtbl[irq]; | 205 | level ^= entry.fields.polarity; |
209 | irq_level ^= entry.fields.polarity; | 206 | if (!level) |
210 | if (!irq_level) { | 207 | ioapic->irr &= ~mask; |
211 | ioapic->irr &= ~mask; | 208 | else { |
212 | ret = 1; | 209 | int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); |
213 | } else { | 210 | ioapic->irr |= mask; |
214 | int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); | 211 | if ((edge && old_irr != ioapic->irr) || |
215 | ioapic->irr |= mask; | 212 | (!edge && !entry.fields.remote_irr)) |
216 | if ((edge && old_irr != ioapic->irr) || | 213 | ret = ioapic_service(ioapic, irq); |
217 | (!edge && !entry.fields.remote_irr)) | 214 | else |
218 | ret = ioapic_service(ioapic, irq); | 215 | ret = 0; /* report coalesced interrupt */ |
219 | else | 216 | } |
220 | ret = 0; /* report coalesced interrupt */ | 217 | trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); |
221 | } | 218 | } |
222 | trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); | ||
223 | spin_unlock(&ioapic->lock); | 219 | spin_unlock(&ioapic->lock); |
224 | 220 | ||
225 | return ret; | 221 | return ret; |
226 | } | 222 | } |
227 | 223 | ||
228 | void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) | ||
229 | { | ||
230 | int i; | ||
231 | |||
232 | spin_lock(&ioapic->lock); | ||
233 | for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) | ||
234 | __clear_bit(irq_source_id, &ioapic->irq_states[i]); | ||
235 | spin_unlock(&ioapic->lock); | ||
236 | } | ||
237 | |||
238 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, | 224 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, |
239 | int trigger_mode) | 225 | int trigger_mode) |
240 | { | 226 | { |
@@ -268,17 +254,13 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, | |||
268 | } | 254 | } |
269 | } | 255 | } |
270 | 256 | ||
271 | bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) | ||
272 | { | ||
273 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | ||
274 | smp_rmb(); | ||
275 | return test_bit(vector, ioapic->handled_vectors); | ||
276 | } | ||
277 | |||
278 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) | 257 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) |
279 | { | 258 | { |
280 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 259 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
281 | 260 | ||
261 | smp_rmb(); | ||
262 | if (!test_bit(vector, ioapic->handled_vectors)) | ||
263 | return; | ||
282 | spin_lock(&ioapic->lock); | 264 | spin_lock(&ioapic->lock); |
283 | __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); | 265 | __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); |
284 | spin_unlock(&ioapic->lock); | 266 | spin_unlock(&ioapic->lock); |
@@ -350,18 +332,9 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | |||
350 | (void*)addr, len, val); | 332 | (void*)addr, len, val); |
351 | ASSERT(!(addr & 0xf)); /* check alignment */ | 333 | ASSERT(!(addr & 0xf)); /* check alignment */ |
352 | 334 | ||
353 | switch (len) { | 335 | if (len == 4 || len == 8) |
354 | case 8: | ||
355 | case 4: | ||
356 | data = *(u32 *) val; | 336 | data = *(u32 *) val; |
357 | break; | 337 | else { |
358 | case 2: | ||
359 | data = *(u16 *) val; | ||
360 | break; | ||
361 | case 1: | ||
362 | data = *(u8 *) val; | ||
363 | break; | ||
364 | default: | ||
365 | printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); | 338 | printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); |
366 | return 0; | 339 | return 0; |
367 | } | 340 | } |
@@ -370,7 +343,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | |||
370 | spin_lock(&ioapic->lock); | 343 | spin_lock(&ioapic->lock); |
371 | switch (addr) { | 344 | switch (addr) { |
372 | case IOAPIC_REG_SELECT: | 345 | case IOAPIC_REG_SELECT: |
373 | ioapic->ioregsel = data & 0xFF; /* 8-bit register */ | 346 | ioapic->ioregsel = data; |
374 | break; | 347 | break; |
375 | 348 | ||
376 | case IOAPIC_REG_WINDOW: | 349 | case IOAPIC_REG_WINDOW: |
@@ -421,8 +394,7 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
421 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); | 394 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); |
422 | ioapic->kvm = kvm; | 395 | ioapic->kvm = kvm; |
423 | mutex_lock(&kvm->slots_lock); | 396 | mutex_lock(&kvm->slots_lock); |
424 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address, | 397 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); |
425 | IOAPIC_MEM_LENGTH, &ioapic->dev); | ||
426 | mutex_unlock(&kvm->slots_lock); | 398 | mutex_unlock(&kvm->slots_lock); |
427 | if (ret < 0) { | 399 | if (ret < 0) { |
428 | kvm->arch.vioapic = NULL; | 400 | kvm->arch.vioapic = NULL; |
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index a30abfe6ed1..0b190c34ccc 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h | |||
@@ -71,12 +71,9 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
71 | int short_hand, int dest, int dest_mode); | 71 | int short_hand, int dest, int dest_mode); |
72 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); | 72 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); |
73 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); | 73 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); |
74 | bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); | ||
75 | int kvm_ioapic_init(struct kvm *kvm); | 74 | int kvm_ioapic_init(struct kvm *kvm); |
76 | void kvm_ioapic_destroy(struct kvm *kvm); | 75 | void kvm_ioapic_destroy(struct kvm *kvm); |
77 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, | 76 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); |
78 | int level); | ||
79 | void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); | ||
80 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); | 77 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); |
81 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | 78 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, |
82 | struct kvm_lapic_irq *irq); | 79 | struct kvm_lapic_irq *irq); |
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 4a340cb2301..511e160f706 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
@@ -25,14 +25,12 @@ | |||
25 | 25 | ||
26 | #include <linux/list.h> | 26 | #include <linux/list.h> |
27 | #include <linux/kvm_host.h> | 27 | #include <linux/kvm_host.h> |
28 | #include <linux/module.h> | ||
29 | #include <linux/pci.h> | 28 | #include <linux/pci.h> |
30 | #include <linux/stat.h> | ||
31 | #include <linux/dmar.h> | 29 | #include <linux/dmar.h> |
32 | #include <linux/iommu.h> | 30 | #include <linux/iommu.h> |
33 | #include <linux/intel-iommu.h> | 31 | #include <linux/intel-iommu.h> |
34 | 32 | ||
35 | static bool allow_unsafe_assigned_interrupts; | 33 | static int allow_unsafe_assigned_interrupts; |
36 | module_param_named(allow_unsafe_assigned_interrupts, | 34 | module_param_named(allow_unsafe_assigned_interrupts, |
37 | allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR); | 35 | allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR); |
38 | MODULE_PARM_DESC(allow_unsafe_assigned_interrupts, | 36 | MODULE_PARM_DESC(allow_unsafe_assigned_interrupts, |
@@ -42,21 +40,21 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm); | |||
42 | static void kvm_iommu_put_pages(struct kvm *kvm, | 40 | static void kvm_iommu_put_pages(struct kvm *kvm, |
43 | gfn_t base_gfn, unsigned long npages); | 41 | gfn_t base_gfn, unsigned long npages); |
44 | 42 | ||
45 | static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, | 43 | static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, |
46 | unsigned long size) | 44 | gfn_t gfn, unsigned long size) |
47 | { | 45 | { |
48 | gfn_t end_gfn; | 46 | gfn_t end_gfn; |
49 | pfn_t pfn; | 47 | pfn_t pfn; |
50 | 48 | ||
51 | pfn = gfn_to_pfn_memslot(slot, gfn); | 49 | pfn = gfn_to_pfn_memslot(kvm, slot, gfn); |
52 | end_gfn = gfn + (size >> PAGE_SHIFT); | 50 | end_gfn = gfn + (size >> PAGE_SHIFT); |
53 | gfn += 1; | 51 | gfn += 1; |
54 | 52 | ||
55 | if (is_error_noslot_pfn(pfn)) | 53 | if (is_error_pfn(pfn)) |
56 | return pfn; | 54 | return pfn; |
57 | 55 | ||
58 | while (gfn < end_gfn) | 56 | while (gfn < end_gfn) |
59 | gfn_to_pfn_memslot(slot, gfn++); | 57 | gfn_to_pfn_memslot(kvm, slot, gfn++); |
60 | 58 | ||
61 | return pfn; | 59 | return pfn; |
62 | } | 60 | } |
@@ -105,8 +103,8 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) | |||
105 | * Pin all pages we are about to map in memory. This is | 103 | * Pin all pages we are about to map in memory. This is |
106 | * important because we unmap and unpin in 4kb steps later. | 104 | * important because we unmap and unpin in 4kb steps later. |
107 | */ | 105 | */ |
108 | pfn = kvm_pin_pages(slot, gfn, page_size); | 106 | pfn = kvm_pin_pages(kvm, slot, gfn, page_size); |
109 | if (is_error_noslot_pfn(pfn)) { | 107 | if (is_error_pfn(pfn)) { |
110 | gfn += 1; | 108 | gfn += 1; |
111 | continue; | 109 | continue; |
112 | } | 110 | } |
@@ -134,15 +132,14 @@ unmap_pages: | |||
134 | 132 | ||
135 | static int kvm_iommu_map_memslots(struct kvm *kvm) | 133 | static int kvm_iommu_map_memslots(struct kvm *kvm) |
136 | { | 134 | { |
137 | int idx, r = 0; | 135 | int i, idx, r = 0; |
138 | struct kvm_memslots *slots; | 136 | struct kvm_memslots *slots; |
139 | struct kvm_memory_slot *memslot; | ||
140 | 137 | ||
141 | idx = srcu_read_lock(&kvm->srcu); | 138 | idx = srcu_read_lock(&kvm->srcu); |
142 | slots = kvm_memslots(kvm); | 139 | slots = kvm_memslots(kvm); |
143 | 140 | ||
144 | kvm_for_each_memslot(memslot, slots) { | 141 | for (i = 0; i < slots->nmemslots; i++) { |
145 | r = kvm_iommu_map_pages(kvm, memslot); | 142 | r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); |
146 | if (r) | 143 | if (r) |
147 | break; | 144 | break; |
148 | } | 145 | } |
@@ -168,7 +165,11 @@ int kvm_assign_device(struct kvm *kvm, | |||
168 | 165 | ||
169 | r = iommu_attach_device(domain, &pdev->dev); | 166 | r = iommu_attach_device(domain, &pdev->dev); |
170 | if (r) { | 167 | if (r) { |
171 | dev_err(&pdev->dev, "kvm assign device failed ret %d", r); | 168 | printk(KERN_ERR "assign device %x:%x:%x.%x failed", |
169 | pci_domain_nr(pdev->bus), | ||
170 | pdev->bus->number, | ||
171 | PCI_SLOT(pdev->devfn), | ||
172 | PCI_FUNC(pdev->devfn)); | ||
172 | return r; | 173 | return r; |
173 | } | 174 | } |
174 | 175 | ||
@@ -186,8 +187,6 @@ int kvm_assign_device(struct kvm *kvm, | |||
186 | goto out_unmap; | 187 | goto out_unmap; |
187 | } | 188 | } |
188 | 189 | ||
189 | pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; | ||
190 | |||
191 | printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", | 190 | printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", |
192 | assigned_dev->host_segnr, | 191 | assigned_dev->host_segnr, |
193 | assigned_dev->host_busnr, | 192 | assigned_dev->host_busnr, |
@@ -216,8 +215,6 @@ int kvm_deassign_device(struct kvm *kvm, | |||
216 | 215 | ||
217 | iommu_detach_device(domain, &pdev->dev); | 216 | iommu_detach_device(domain, &pdev->dev); |
218 | 217 | ||
219 | pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; | ||
220 | |||
221 | printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", | 218 | printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", |
222 | assigned_dev->host_segnr, | 219 | assigned_dev->host_segnr, |
223 | assigned_dev->host_busnr, | 220 | assigned_dev->host_busnr, |
@@ -236,13 +233,9 @@ int kvm_iommu_map_guest(struct kvm *kvm) | |||
236 | return -ENODEV; | 233 | return -ENODEV; |
237 | } | 234 | } |
238 | 235 | ||
239 | mutex_lock(&kvm->slots_lock); | ||
240 | |||
241 | kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type); | 236 | kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type); |
242 | if (!kvm->arch.iommu_domain) { | 237 | if (!kvm->arch.iommu_domain) |
243 | r = -ENOMEM; | 238 | return -ENOMEM; |
244 | goto out_unlock; | ||
245 | } | ||
246 | 239 | ||
247 | if (!allow_unsafe_assigned_interrupts && | 240 | if (!allow_unsafe_assigned_interrupts && |
248 | !iommu_domain_has_cap(kvm->arch.iommu_domain, | 241 | !iommu_domain_has_cap(kvm->arch.iommu_domain, |
@@ -253,16 +246,17 @@ int kvm_iommu_map_guest(struct kvm *kvm) | |||
253 | " module option.\n", __func__); | 246 | " module option.\n", __func__); |
254 | iommu_domain_free(kvm->arch.iommu_domain); | 247 | iommu_domain_free(kvm->arch.iommu_domain); |
255 | kvm->arch.iommu_domain = NULL; | 248 | kvm->arch.iommu_domain = NULL; |
256 | r = -EPERM; | 249 | return -EPERM; |
257 | goto out_unlock; | ||
258 | } | 250 | } |
259 | 251 | ||
260 | r = kvm_iommu_map_memslots(kvm); | 252 | r = kvm_iommu_map_memslots(kvm); |
261 | if (r) | 253 | if (r) |
262 | kvm_iommu_unmap_memslots(kvm); | 254 | goto out_unmap; |
263 | 255 | ||
264 | out_unlock: | 256 | return 0; |
265 | mutex_unlock(&kvm->slots_lock); | 257 | |
258 | out_unmap: | ||
259 | kvm_iommu_unmap_memslots(kvm); | ||
266 | return r; | 260 | return r; |
267 | } | 261 | } |
268 | 262 | ||
@@ -296,12 +290,6 @@ static void kvm_iommu_put_pages(struct kvm *kvm, | |||
296 | 290 | ||
297 | /* Get physical address */ | 291 | /* Get physical address */ |
298 | phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); | 292 | phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); |
299 | |||
300 | if (!phys) { | ||
301 | gfn++; | ||
302 | continue; | ||
303 | } | ||
304 | |||
305 | pfn = phys >> PAGE_SHIFT; | 293 | pfn = phys >> PAGE_SHIFT; |
306 | 294 | ||
307 | /* Unmap address from IO address space */ | 295 | /* Unmap address from IO address space */ |
@@ -315,23 +303,18 @@ static void kvm_iommu_put_pages(struct kvm *kvm, | |||
315 | } | 303 | } |
316 | } | 304 | } |
317 | 305 | ||
318 | void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot) | ||
319 | { | ||
320 | kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages); | ||
321 | } | ||
322 | |||
323 | static int kvm_iommu_unmap_memslots(struct kvm *kvm) | 306 | static int kvm_iommu_unmap_memslots(struct kvm *kvm) |
324 | { | 307 | { |
325 | int idx; | 308 | int i, idx; |
326 | struct kvm_memslots *slots; | 309 | struct kvm_memslots *slots; |
327 | struct kvm_memory_slot *memslot; | ||
328 | 310 | ||
329 | idx = srcu_read_lock(&kvm->srcu); | 311 | idx = srcu_read_lock(&kvm->srcu); |
330 | slots = kvm_memslots(kvm); | 312 | slots = kvm_memslots(kvm); |
331 | 313 | ||
332 | kvm_for_each_memslot(memslot, slots) | 314 | for (i = 0; i < slots->nmemslots; i++) { |
333 | kvm_iommu_unmap_pages(kvm, memslot); | 315 | kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, |
334 | 316 | slots->memslots[i].npages); | |
317 | } | ||
335 | srcu_read_unlock(&kvm->srcu, idx); | 318 | srcu_read_unlock(&kvm->srcu, idx); |
336 | 319 | ||
337 | return 0; | 320 | return 0; |
@@ -345,11 +328,7 @@ int kvm_iommu_unmap_guest(struct kvm *kvm) | |||
345 | if (!domain) | 328 | if (!domain) |
346 | return 0; | 329 | return 0; |
347 | 330 | ||
348 | mutex_lock(&kvm->slots_lock); | ||
349 | kvm_iommu_unmap_memslots(kvm); | 331 | kvm_iommu_unmap_memslots(kvm); |
350 | kvm->arch.iommu_domain = NULL; | ||
351 | mutex_unlock(&kvm->slots_lock); | ||
352 | |||
353 | iommu_domain_free(domain); | 332 | iommu_domain_free(domain); |
354 | return 0; | 333 | return 0; |
355 | } | 334 | } |
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 656fa455e15..9f614b4e365 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
@@ -33,12 +33,26 @@ | |||
33 | 33 | ||
34 | #include "ioapic.h" | 34 | #include "ioapic.h" |
35 | 35 | ||
36 | static inline int kvm_irq_line_state(unsigned long *irq_state, | ||
37 | int irq_source_id, int level) | ||
38 | { | ||
39 | /* Logical OR for level trig interrupt */ | ||
40 | if (level) | ||
41 | set_bit(irq_source_id, irq_state); | ||
42 | else | ||
43 | clear_bit(irq_source_id, irq_state); | ||
44 | |||
45 | return !!(*irq_state); | ||
46 | } | ||
47 | |||
36 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, | 48 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, |
37 | struct kvm *kvm, int irq_source_id, int level) | 49 | struct kvm *kvm, int irq_source_id, int level) |
38 | { | 50 | { |
39 | #ifdef CONFIG_X86 | 51 | #ifdef CONFIG_X86 |
40 | struct kvm_pic *pic = pic_irqchip(kvm); | 52 | struct kvm_pic *pic = pic_irqchip(kvm); |
41 | return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); | 53 | level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin], |
54 | irq_source_id, level); | ||
55 | return kvm_pic_set_irq(pic, e->irqchip.pin, level); | ||
42 | #else | 56 | #else |
43 | return -1; | 57 | return -1; |
44 | #endif | 58 | #endif |
@@ -48,7 +62,10 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, | |||
48 | struct kvm *kvm, int irq_source_id, int level) | 62 | struct kvm *kvm, int irq_source_id, int level) |
49 | { | 63 | { |
50 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 64 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
51 | return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level); | 65 | level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin], |
66 | irq_source_id, level); | ||
67 | |||
68 | return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level); | ||
52 | } | 69 | } |
53 | 70 | ||
54 | inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) | 71 | inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) |
@@ -68,13 +85,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
68 | struct kvm_vcpu *vcpu, *lowest = NULL; | 85 | struct kvm_vcpu *vcpu, *lowest = NULL; |
69 | 86 | ||
70 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && | 87 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && |
71 | kvm_is_dm_lowest_prio(irq)) { | 88 | kvm_is_dm_lowest_prio(irq)) |
72 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); | 89 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); |
73 | irq->delivery_mode = APIC_DM_FIXED; | ||
74 | } | ||
75 | |||
76 | if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r)) | ||
77 | return r; | ||
78 | 90 | ||
79 | kvm_for_each_vcpu(i, vcpu, kvm) { | 91 | kvm_for_each_vcpu(i, vcpu, kvm) { |
80 | if (!kvm_apic_present(vcpu)) | 92 | if (!kvm_apic_present(vcpu)) |
@@ -102,23 +114,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
102 | return r; | 114 | return r; |
103 | } | 115 | } |
104 | 116 | ||
105 | static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, | ||
106 | struct kvm_lapic_irq *irq) | ||
107 | { | ||
108 | trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); | ||
109 | |||
110 | irq->dest_id = (e->msi.address_lo & | ||
111 | MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; | ||
112 | irq->vector = (e->msi.data & | ||
113 | MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; | ||
114 | irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; | ||
115 | irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; | ||
116 | irq->delivery_mode = e->msi.data & 0x700; | ||
117 | irq->level = 1; | ||
118 | irq->shorthand = 0; | ||
119 | /* TODO Deal with RH bit of MSI message address */ | ||
120 | } | ||
121 | |||
122 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | 117 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, |
123 | struct kvm *kvm, int irq_source_id, int level) | 118 | struct kvm *kvm, int irq_source_id, int level) |
124 | { | 119 | { |
@@ -127,38 +122,20 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | |||
127 | if (!level) | 122 | if (!level) |
128 | return -1; | 123 | return -1; |
129 | 124 | ||
130 | kvm_set_msi_irq(e, &irq); | 125 | trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); |
131 | |||
132 | return kvm_irq_delivery_to_apic(kvm, NULL, &irq); | ||
133 | } | ||
134 | |||
135 | |||
136 | static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, | ||
137 | struct kvm *kvm) | ||
138 | { | ||
139 | struct kvm_lapic_irq irq; | ||
140 | int r; | ||
141 | |||
142 | kvm_set_msi_irq(e, &irq); | ||
143 | |||
144 | if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r)) | ||
145 | return r; | ||
146 | else | ||
147 | return -EWOULDBLOCK; | ||
148 | } | ||
149 | |||
150 | int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) | ||
151 | { | ||
152 | struct kvm_kernel_irq_routing_entry route; | ||
153 | |||
154 | if (!irqchip_in_kernel(kvm) || msi->flags != 0) | ||
155 | return -EINVAL; | ||
156 | 126 | ||
157 | route.msi.address_lo = msi->address_lo; | 127 | irq.dest_id = (e->msi.address_lo & |
158 | route.msi.address_hi = msi->address_hi; | 128 | MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; |
159 | route.msi.data = msi->data; | 129 | irq.vector = (e->msi.data & |
130 | MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; | ||
131 | irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; | ||
132 | irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; | ||
133 | irq.delivery_mode = e->msi.data & 0x700; | ||
134 | irq.level = 1; | ||
135 | irq.shorthand = 0; | ||
160 | 136 | ||
161 | return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); | 137 | /* TODO Deal with RH bit of MSI message address */ |
138 | return kvm_irq_delivery_to_apic(kvm, NULL, &irq); | ||
162 | } | 139 | } |
163 | 140 | ||
164 | /* | 141 | /* |
@@ -199,44 +176,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) | |||
199 | return ret; | 176 | return ret; |
200 | } | 177 | } |
201 | 178 | ||
202 | /* | ||
203 | * Deliver an IRQ in an atomic context if we can, or return a failure, | ||
204 | * user can retry in a process context. | ||
205 | * Return value: | ||
206 | * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. | ||
207 | * Other values - No need to retry. | ||
208 | */ | ||
209 | int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) | ||
210 | { | ||
211 | struct kvm_kernel_irq_routing_entry *e; | ||
212 | int ret = -EINVAL; | ||
213 | struct kvm_irq_routing_table *irq_rt; | ||
214 | struct hlist_node *n; | ||
215 | |||
216 | trace_kvm_set_irq(irq, level, irq_source_id); | ||
217 | |||
218 | /* | ||
219 | * Injection into either PIC or IOAPIC might need to scan all CPUs, | ||
220 | * which would need to be retried from thread context; when same GSI | ||
221 | * is connected to both PIC and IOAPIC, we'd have to report a | ||
222 | * partial failure here. | ||
223 | * Since there's no easy way to do this, we only support injecting MSI | ||
224 | * which is limited to 1:1 GSI mapping. | ||
225 | */ | ||
226 | rcu_read_lock(); | ||
227 | irq_rt = rcu_dereference(kvm->irq_routing); | ||
228 | if (irq < irq_rt->nr_rt_entries) | ||
229 | hlist_for_each_entry(e, n, &irq_rt->map[irq], link) { | ||
230 | if (likely(e->type == KVM_IRQ_ROUTING_MSI)) | ||
231 | ret = kvm_set_msi_inatomic(e, kvm); | ||
232 | else | ||
233 | ret = -EWOULDBLOCK; | ||
234 | break; | ||
235 | } | ||
236 | rcu_read_unlock(); | ||
237 | return ret; | ||
238 | } | ||
239 | |||
240 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | 179 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) |
241 | { | 180 | { |
242 | struct kvm_irq_ack_notifier *kian; | 181 | struct kvm_irq_ack_notifier *kian; |
@@ -287,9 +226,6 @@ int kvm_request_irq_source_id(struct kvm *kvm) | |||
287 | } | 226 | } |
288 | 227 | ||
289 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); | 228 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); |
290 | #ifdef CONFIG_X86 | ||
291 | ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); | ||
292 | #endif | ||
293 | set_bit(irq_source_id, bitmap); | 229 | set_bit(irq_source_id, bitmap); |
294 | unlock: | 230 | unlock: |
295 | mutex_unlock(&kvm->irq_lock); | 231 | mutex_unlock(&kvm->irq_lock); |
@@ -299,10 +235,9 @@ unlock: | |||
299 | 235 | ||
300 | void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) | 236 | void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) |
301 | { | 237 | { |
238 | int i; | ||
239 | |||
302 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); | 240 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); |
303 | #ifdef CONFIG_X86 | ||
304 | ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); | ||
305 | #endif | ||
306 | 241 | ||
307 | mutex_lock(&kvm->irq_lock); | 242 | mutex_lock(&kvm->irq_lock); |
308 | if (irq_source_id < 0 || | 243 | if (irq_source_id < 0 || |
@@ -314,10 +249,14 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) | |||
314 | if (!irqchip_in_kernel(kvm)) | 249 | if (!irqchip_in_kernel(kvm)) |
315 | goto unlock; | 250 | goto unlock; |
316 | 251 | ||
317 | kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); | 252 | for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) { |
253 | clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]); | ||
254 | if (i >= 16) | ||
255 | continue; | ||
318 | #ifdef CONFIG_X86 | 256 | #ifdef CONFIG_X86 |
319 | kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id); | 257 | clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]); |
320 | #endif | 258 | #endif |
259 | } | ||
321 | unlock: | 260 | unlock: |
322 | mutex_unlock(&kvm->irq_lock); | 261 | mutex_unlock(&kvm->irq_lock); |
323 | } | 262 | } |
@@ -379,7 +318,6 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, | |||
379 | */ | 318 | */ |
380 | hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) | 319 | hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) |
381 | if (ei->type == KVM_IRQ_ROUTING_MSI || | 320 | if (ei->type == KVM_IRQ_ROUTING_MSI || |
382 | ue->type == KVM_IRQ_ROUTING_MSI || | ||
383 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) | 321 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) |
384 | return r; | 322 | return r; |
385 | 323 | ||
@@ -391,11 +329,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, | |||
391 | switch (ue->u.irqchip.irqchip) { | 329 | switch (ue->u.irqchip.irqchip) { |
392 | case KVM_IRQCHIP_PIC_MASTER: | 330 | case KVM_IRQCHIP_PIC_MASTER: |
393 | e->set = kvm_set_pic_irq; | 331 | e->set = kvm_set_pic_irq; |
394 | max_pin = PIC_NUM_PINS; | 332 | max_pin = 16; |
395 | break; | 333 | break; |
396 | case KVM_IRQCHIP_PIC_SLAVE: | 334 | case KVM_IRQCHIP_PIC_SLAVE: |
397 | e->set = kvm_set_pic_irq; | 335 | e->set = kvm_set_pic_irq; |
398 | max_pin = PIC_NUM_PINS; | 336 | max_pin = 16; |
399 | delta = 8; | 337 | delta = 8; |
400 | break; | 338 | break; |
401 | case KVM_IRQCHIP_IOAPIC: | 339 | case KVM_IRQCHIP_IOAPIC: |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1cd693a76a5..aefdda390f5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -47,8 +47,6 @@ | |||
47 | #include <linux/srcu.h> | 47 | #include <linux/srcu.h> |
48 | #include <linux/hugetlb.h> | 48 | #include <linux/hugetlb.h> |
49 | #include <linux/slab.h> | 49 | #include <linux/slab.h> |
50 | #include <linux/sort.h> | ||
51 | #include <linux/bsearch.h> | ||
52 | 50 | ||
53 | #include <asm/processor.h> | 51 | #include <asm/processor.h> |
54 | #include <asm/io.h> | 52 | #include <asm/io.h> |
@@ -100,7 +98,13 @@ EXPORT_SYMBOL_GPL(kvm_rebooting); | |||
100 | 98 | ||
101 | static bool largepages_enabled = true; | 99 | static bool largepages_enabled = true; |
102 | 100 | ||
103 | bool kvm_is_mmio_pfn(pfn_t pfn) | 101 | static struct page *hwpoison_page; |
102 | static pfn_t hwpoison_pfn; | ||
103 | |||
104 | struct page *fault_page; | ||
105 | pfn_t fault_pfn; | ||
106 | |||
107 | inline int kvm_is_mmio_pfn(pfn_t pfn) | ||
104 | { | 108 | { |
105 | if (pfn_valid(pfn)) { | 109 | if (pfn_valid(pfn)) { |
106 | int reserved; | 110 | int reserved; |
@@ -131,12 +135,11 @@ bool kvm_is_mmio_pfn(pfn_t pfn) | |||
131 | /* | 135 | /* |
132 | * Switches to specified vcpu, until a matching vcpu_put() | 136 | * Switches to specified vcpu, until a matching vcpu_put() |
133 | */ | 137 | */ |
134 | int vcpu_load(struct kvm_vcpu *vcpu) | 138 | void vcpu_load(struct kvm_vcpu *vcpu) |
135 | { | 139 | { |
136 | int cpu; | 140 | int cpu; |
137 | 141 | ||
138 | if (mutex_lock_killable(&vcpu->mutex)) | 142 | mutex_lock(&vcpu->mutex); |
139 | return -EINTR; | ||
140 | if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { | 143 | if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { |
141 | /* The thread running this VCPU changed. */ | 144 | /* The thread running this VCPU changed. */ |
142 | struct pid *oldpid = vcpu->pid; | 145 | struct pid *oldpid = vcpu->pid; |
@@ -149,7 +152,6 @@ int vcpu_load(struct kvm_vcpu *vcpu) | |||
149 | preempt_notifier_register(&vcpu->preempt_notifier); | 152 | preempt_notifier_register(&vcpu->preempt_notifier); |
150 | kvm_arch_vcpu_load(vcpu, cpu); | 153 | kvm_arch_vcpu_load(vcpu, cpu); |
151 | put_cpu(); | 154 | put_cpu(); |
152 | return 0; | ||
153 | } | 155 | } |
154 | 156 | ||
155 | void vcpu_put(struct kvm_vcpu *vcpu) | 157 | void vcpu_put(struct kvm_vcpu *vcpu) |
@@ -199,7 +201,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
199 | 201 | ||
200 | void kvm_flush_remote_tlbs(struct kvm *kvm) | 202 | void kvm_flush_remote_tlbs(struct kvm *kvm) |
201 | { | 203 | { |
202 | long dirty_count = kvm->tlbs_dirty; | 204 | int dirty_count = kvm->tlbs_dirty; |
203 | 205 | ||
204 | smp_mb(); | 206 | smp_mb(); |
205 | if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) | 207 | if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) |
@@ -212,11 +214,6 @@ void kvm_reload_remote_mmus(struct kvm *kvm) | |||
212 | make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); | 214 | make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); |
213 | } | 215 | } |
214 | 216 | ||
215 | void kvm_make_mclock_inprogress_request(struct kvm *kvm) | ||
216 | { | ||
217 | make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); | ||
218 | } | ||
219 | |||
220 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | 217 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) |
221 | { | 218 | { |
222 | struct page *page; | 219 | struct page *page; |
@@ -237,9 +234,6 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | |||
237 | } | 234 | } |
238 | vcpu->run = page_address(page); | 235 | vcpu->run = page_address(page); |
239 | 236 | ||
240 | kvm_vcpu_set_in_spin_loop(vcpu, false); | ||
241 | kvm_vcpu_set_dy_eligible(vcpu, false); | ||
242 | |||
243 | r = kvm_arch_vcpu_init(vcpu); | 237 | r = kvm_arch_vcpu_init(vcpu); |
244 | if (r < 0) | 238 | if (r < 0) |
245 | goto fail_free_run; | 239 | goto fail_free_run; |
@@ -293,15 +287,15 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
293 | */ | 287 | */ |
294 | idx = srcu_read_lock(&kvm->srcu); | 288 | idx = srcu_read_lock(&kvm->srcu); |
295 | spin_lock(&kvm->mmu_lock); | 289 | spin_lock(&kvm->mmu_lock); |
296 | |||
297 | kvm->mmu_notifier_seq++; | 290 | kvm->mmu_notifier_seq++; |
298 | need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty; | 291 | need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty; |
292 | spin_unlock(&kvm->mmu_lock); | ||
293 | srcu_read_unlock(&kvm->srcu, idx); | ||
294 | |||
299 | /* we've to flush the tlb before the pages can be freed */ | 295 | /* we've to flush the tlb before the pages can be freed */ |
300 | if (need_tlb_flush) | 296 | if (need_tlb_flush) |
301 | kvm_flush_remote_tlbs(kvm); | 297 | kvm_flush_remote_tlbs(kvm); |
302 | 298 | ||
303 | spin_unlock(&kvm->mmu_lock); | ||
304 | srcu_read_unlock(&kvm->srcu, idx); | ||
305 | } | 299 | } |
306 | 300 | ||
307 | static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, | 301 | static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, |
@@ -336,14 +330,15 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | |||
336 | * count is also read inside the mmu_lock critical section. | 330 | * count is also read inside the mmu_lock critical section. |
337 | */ | 331 | */ |
338 | kvm->mmu_notifier_count++; | 332 | kvm->mmu_notifier_count++; |
339 | need_tlb_flush = kvm_unmap_hva_range(kvm, start, end); | 333 | for (; start < end; start += PAGE_SIZE) |
334 | need_tlb_flush |= kvm_unmap_hva(kvm, start); | ||
340 | need_tlb_flush |= kvm->tlbs_dirty; | 335 | need_tlb_flush |= kvm->tlbs_dirty; |
336 | spin_unlock(&kvm->mmu_lock); | ||
337 | srcu_read_unlock(&kvm->srcu, idx); | ||
338 | |||
341 | /* we've to flush the tlb before the pages can be freed */ | 339 | /* we've to flush the tlb before the pages can be freed */ |
342 | if (need_tlb_flush) | 340 | if (need_tlb_flush) |
343 | kvm_flush_remote_tlbs(kvm); | 341 | kvm_flush_remote_tlbs(kvm); |
344 | |||
345 | spin_unlock(&kvm->mmu_lock); | ||
346 | srcu_read_unlock(&kvm->srcu, idx); | ||
347 | } | 342 | } |
348 | 343 | ||
349 | static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, | 344 | static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, |
@@ -360,11 +355,11 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, | |||
360 | * been freed. | 355 | * been freed. |
361 | */ | 356 | */ |
362 | kvm->mmu_notifier_seq++; | 357 | kvm->mmu_notifier_seq++; |
363 | smp_wmb(); | ||
364 | /* | 358 | /* |
365 | * The above sequence increase must be visible before the | 359 | * The above sequence increase must be visible before the |
366 | * below count decrease, which is ensured by the smp_wmb above | 360 | * below count decrease but both values are read by the kvm |
367 | * in conjunction with the smp_rmb in mmu_notifier_retry(). | 361 | * page fault under mmu_lock spinlock so we don't need to add |
362 | * a smb_wmb() here in between the two. | ||
368 | */ | 363 | */ |
369 | kvm->mmu_notifier_count--; | 364 | kvm->mmu_notifier_count--; |
370 | spin_unlock(&kvm->mmu_lock); | 365 | spin_unlock(&kvm->mmu_lock); |
@@ -381,14 +376,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | |||
381 | 376 | ||
382 | idx = srcu_read_lock(&kvm->srcu); | 377 | idx = srcu_read_lock(&kvm->srcu); |
383 | spin_lock(&kvm->mmu_lock); | 378 | spin_lock(&kvm->mmu_lock); |
384 | |||
385 | young = kvm_age_hva(kvm, address); | 379 | young = kvm_age_hva(kvm, address); |
386 | if (young) | ||
387 | kvm_flush_remote_tlbs(kvm); | ||
388 | |||
389 | spin_unlock(&kvm->mmu_lock); | 380 | spin_unlock(&kvm->mmu_lock); |
390 | srcu_read_unlock(&kvm->srcu, idx); | 381 | srcu_read_unlock(&kvm->srcu, idx); |
391 | 382 | ||
383 | if (young) | ||
384 | kvm_flush_remote_tlbs(kvm); | ||
385 | |||
392 | return young; | 386 | return young; |
393 | } | 387 | } |
394 | 388 | ||
@@ -415,7 +409,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, | |||
415 | int idx; | 409 | int idx; |
416 | 410 | ||
417 | idx = srcu_read_lock(&kvm->srcu); | 411 | idx = srcu_read_lock(&kvm->srcu); |
418 | kvm_arch_flush_shadow_all(kvm); | 412 | kvm_arch_flush_shadow(kvm); |
419 | srcu_read_unlock(&kvm->srcu, idx); | 413 | srcu_read_unlock(&kvm->srcu, idx); |
420 | } | 414 | } |
421 | 415 | ||
@@ -444,16 +438,7 @@ static int kvm_init_mmu_notifier(struct kvm *kvm) | |||
444 | 438 | ||
445 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ | 439 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ |
446 | 440 | ||
447 | static void kvm_init_memslots_id(struct kvm *kvm) | 441 | static struct kvm *kvm_create_vm(void) |
448 | { | ||
449 | int i; | ||
450 | struct kvm_memslots *slots = kvm->memslots; | ||
451 | |||
452 | for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) | ||
453 | slots->id_to_index[i] = slots->memslots[i].id = i; | ||
454 | } | ||
455 | |||
456 | static struct kvm *kvm_create_vm(unsigned long type) | ||
457 | { | 442 | { |
458 | int r, i; | 443 | int r, i; |
459 | struct kvm *kvm = kvm_arch_alloc_vm(); | 444 | struct kvm *kvm = kvm_arch_alloc_vm(); |
@@ -461,7 +446,7 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
461 | if (!kvm) | 446 | if (!kvm) |
462 | return ERR_PTR(-ENOMEM); | 447 | return ERR_PTR(-ENOMEM); |
463 | 448 | ||
464 | r = kvm_arch_init_vm(kvm, type); | 449 | r = kvm_arch_init_vm(kvm); |
465 | if (r) | 450 | if (r) |
466 | goto out_err_nodisable; | 451 | goto out_err_nodisable; |
467 | 452 | ||
@@ -478,7 +463,6 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
478 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 463 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
479 | if (!kvm->memslots) | 464 | if (!kvm->memslots) |
480 | goto out_err_nosrcu; | 465 | goto out_err_nosrcu; |
481 | kvm_init_memslots_id(kvm); | ||
482 | if (init_srcu_struct(&kvm->srcu)) | 466 | if (init_srcu_struct(&kvm->srcu)) |
483 | goto out_err_nosrcu; | 467 | goto out_err_nosrcu; |
484 | for (i = 0; i < KVM_NR_BUSES; i++) { | 468 | for (i = 0; i < KVM_NR_BUSES; i++) { |
@@ -519,33 +503,18 @@ out_err_nodisable: | |||
519 | return ERR_PTR(r); | 503 | return ERR_PTR(r); |
520 | } | 504 | } |
521 | 505 | ||
522 | /* | ||
523 | * Avoid using vmalloc for a small buffer. | ||
524 | * Should not be used when the size is statically known. | ||
525 | */ | ||
526 | void *kvm_kvzalloc(unsigned long size) | ||
527 | { | ||
528 | if (size > PAGE_SIZE) | ||
529 | return vzalloc(size); | ||
530 | else | ||
531 | return kzalloc(size, GFP_KERNEL); | ||
532 | } | ||
533 | |||
534 | void kvm_kvfree(const void *addr) | ||
535 | { | ||
536 | if (is_vmalloc_addr(addr)) | ||
537 | vfree(addr); | ||
538 | else | ||
539 | kfree(addr); | ||
540 | } | ||
541 | |||
542 | static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) | 506 | static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) |
543 | { | 507 | { |
544 | if (!memslot->dirty_bitmap) | 508 | if (!memslot->dirty_bitmap) |
545 | return; | 509 | return; |
546 | 510 | ||
547 | kvm_kvfree(memslot->dirty_bitmap); | 511 | if (2 * kvm_dirty_bitmap_bytes(memslot) > PAGE_SIZE) |
512 | vfree(memslot->dirty_bitmap_head); | ||
513 | else | ||
514 | kfree(memslot->dirty_bitmap_head); | ||
515 | |||
548 | memslot->dirty_bitmap = NULL; | 516 | memslot->dirty_bitmap = NULL; |
517 | memslot->dirty_bitmap_head = NULL; | ||
549 | } | 518 | } |
550 | 519 | ||
551 | /* | 520 | /* |
@@ -554,21 +523,33 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) | |||
554 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | 523 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, |
555 | struct kvm_memory_slot *dont) | 524 | struct kvm_memory_slot *dont) |
556 | { | 525 | { |
526 | int i; | ||
527 | |||
528 | if (!dont || free->rmap != dont->rmap) | ||
529 | vfree(free->rmap); | ||
530 | |||
557 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) | 531 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) |
558 | kvm_destroy_dirty_bitmap(free); | 532 | kvm_destroy_dirty_bitmap(free); |
559 | 533 | ||
560 | kvm_arch_free_memslot(free, dont); | 534 | |
535 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | ||
536 | if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { | ||
537 | vfree(free->lpage_info[i]); | ||
538 | free->lpage_info[i] = NULL; | ||
539 | } | ||
540 | } | ||
561 | 541 | ||
562 | free->npages = 0; | 542 | free->npages = 0; |
543 | free->rmap = NULL; | ||
563 | } | 544 | } |
564 | 545 | ||
565 | void kvm_free_physmem(struct kvm *kvm) | 546 | void kvm_free_physmem(struct kvm *kvm) |
566 | { | 547 | { |
548 | int i; | ||
567 | struct kvm_memslots *slots = kvm->memslots; | 549 | struct kvm_memslots *slots = kvm->memslots; |
568 | struct kvm_memory_slot *memslot; | ||
569 | 550 | ||
570 | kvm_for_each_memslot(memslot, slots) | 551 | for (i = 0; i < slots->nmemslots; ++i) |
571 | kvm_free_physmem_slot(memslot, NULL); | 552 | kvm_free_physmem_slot(&slots->memslots[i], NULL); |
572 | 553 | ||
573 | kfree(kvm->memslots); | 554 | kfree(kvm->memslots); |
574 | } | 555 | } |
@@ -589,7 +570,7 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
589 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 570 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) |
590 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); | 571 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); |
591 | #else | 572 | #else |
592 | kvm_arch_flush_shadow_all(kvm); | 573 | kvm_arch_flush_shadow(kvm); |
593 | #endif | 574 | #endif |
594 | kvm_arch_destroy_vm(kvm); | 575 | kvm_arch_destroy_vm(kvm); |
595 | kvm_free_physmem(kvm); | 576 | kvm_free_physmem(kvm); |
@@ -623,81 +604,28 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) | |||
623 | return 0; | 604 | return 0; |
624 | } | 605 | } |
625 | 606 | ||
607 | #ifndef CONFIG_S390 | ||
626 | /* | 608 | /* |
627 | * Allocation size is twice as large as the actual dirty bitmap size. | 609 | * Allocation size is twice as large as the actual dirty bitmap size. |
628 | * See x86's kvm_vm_ioctl_get_dirty_log() why this is needed. | 610 | * This makes it possible to do double buffering: see x86's |
611 | * kvm_vm_ioctl_get_dirty_log(). | ||
629 | */ | 612 | */ |
630 | static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) | 613 | static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) |
631 | { | 614 | { |
632 | #ifndef CONFIG_S390 | ||
633 | unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); | 615 | unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); |
634 | 616 | ||
635 | memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes); | 617 | if (dirty_bytes > PAGE_SIZE) |
618 | memslot->dirty_bitmap = vzalloc(dirty_bytes); | ||
619 | else | ||
620 | memslot->dirty_bitmap = kzalloc(dirty_bytes, GFP_KERNEL); | ||
621 | |||
636 | if (!memslot->dirty_bitmap) | 622 | if (!memslot->dirty_bitmap) |
637 | return -ENOMEM; | 623 | return -ENOMEM; |
638 | 624 | ||
639 | #endif /* !CONFIG_S390 */ | 625 | memslot->dirty_bitmap_head = memslot->dirty_bitmap; |
640 | return 0; | ||
641 | } | ||
642 | |||
643 | static int cmp_memslot(const void *slot1, const void *slot2) | ||
644 | { | ||
645 | struct kvm_memory_slot *s1, *s2; | ||
646 | |||
647 | s1 = (struct kvm_memory_slot *)slot1; | ||
648 | s2 = (struct kvm_memory_slot *)slot2; | ||
649 | |||
650 | if (s1->npages < s2->npages) | ||
651 | return 1; | ||
652 | if (s1->npages > s2->npages) | ||
653 | return -1; | ||
654 | |||
655 | return 0; | ||
656 | } | ||
657 | |||
658 | /* | ||
659 | * Sort the memslots base on its size, so the larger slots | ||
660 | * will get better fit. | ||
661 | */ | ||
662 | static void sort_memslots(struct kvm_memslots *slots) | ||
663 | { | ||
664 | int i; | ||
665 | |||
666 | sort(slots->memslots, KVM_MEM_SLOTS_NUM, | ||
667 | sizeof(struct kvm_memory_slot), cmp_memslot, NULL); | ||
668 | |||
669 | for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) | ||
670 | slots->id_to_index[slots->memslots[i].id] = i; | ||
671 | } | ||
672 | |||
673 | void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) | ||
674 | { | ||
675 | if (new) { | ||
676 | int id = new->id; | ||
677 | struct kvm_memory_slot *old = id_to_memslot(slots, id); | ||
678 | unsigned long npages = old->npages; | ||
679 | |||
680 | *old = *new; | ||
681 | if (new->npages != npages) | ||
682 | sort_memslots(slots); | ||
683 | } | ||
684 | |||
685 | slots->generation++; | ||
686 | } | ||
687 | |||
688 | static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) | ||
689 | { | ||
690 | u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; | ||
691 | |||
692 | #ifdef KVM_CAP_READONLY_MEM | ||
693 | valid_flags |= KVM_MEM_READONLY; | ||
694 | #endif | ||
695 | |||
696 | if (mem->flags & ~valid_flags) | ||
697 | return -EINVAL; | ||
698 | |||
699 | return 0; | 626 | return 0; |
700 | } | 627 | } |
628 | #endif /* !CONFIG_S390 */ | ||
701 | 629 | ||
702 | /* | 630 | /* |
703 | * Allocate some memory and give it an address in the guest physical address | 631 | * Allocate some memory and give it an address in the guest physical address |
@@ -714,14 +642,11 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
714 | int r; | 642 | int r; |
715 | gfn_t base_gfn; | 643 | gfn_t base_gfn; |
716 | unsigned long npages; | 644 | unsigned long npages; |
717 | struct kvm_memory_slot *memslot, *slot; | 645 | unsigned long i; |
646 | struct kvm_memory_slot *memslot; | ||
718 | struct kvm_memory_slot old, new; | 647 | struct kvm_memory_slot old, new; |
719 | struct kvm_memslots *slots, *old_memslots; | 648 | struct kvm_memslots *slots, *old_memslots; |
720 | 649 | ||
721 | r = check_memory_region_flags(mem); | ||
722 | if (r) | ||
723 | goto out; | ||
724 | |||
725 | r = -EINVAL; | 650 | r = -EINVAL; |
726 | /* General sanity checks */ | 651 | /* General sanity checks */ |
727 | if (mem->memory_size & (PAGE_SIZE - 1)) | 652 | if (mem->memory_size & (PAGE_SIZE - 1)) |
@@ -735,12 +660,12 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
735 | (void __user *)(unsigned long)mem->userspace_addr, | 660 | (void __user *)(unsigned long)mem->userspace_addr, |
736 | mem->memory_size))) | 661 | mem->memory_size))) |
737 | goto out; | 662 | goto out; |
738 | if (mem->slot >= KVM_MEM_SLOTS_NUM) | 663 | if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) |
739 | goto out; | 664 | goto out; |
740 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) | 665 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) |
741 | goto out; | 666 | goto out; |
742 | 667 | ||
743 | memslot = id_to_memslot(kvm->memslots, mem->slot); | 668 | memslot = &kvm->memslots->memslots[mem->slot]; |
744 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; | 669 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; |
745 | npages = mem->memory_size >> PAGE_SHIFT; | 670 | npages = mem->memory_size >> PAGE_SHIFT; |
746 | 671 | ||
@@ -765,11 +690,13 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
765 | 690 | ||
766 | /* Check for overlaps */ | 691 | /* Check for overlaps */ |
767 | r = -EEXIST; | 692 | r = -EEXIST; |
768 | kvm_for_each_memslot(slot, kvm->memslots) { | 693 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
769 | if (slot->id >= KVM_MEMORY_SLOTS || slot == memslot) | 694 | struct kvm_memory_slot *s = &kvm->memslots->memslots[i]; |
695 | |||
696 | if (s == memslot || !s->npages) | ||
770 | continue; | 697 | continue; |
771 | if (!((base_gfn + npages <= slot->base_gfn) || | 698 | if (!((base_gfn + npages <= s->base_gfn) || |
772 | (base_gfn >= slot->base_gfn + slot->npages))) | 699 | (base_gfn >= s->base_gfn + s->npages))) |
773 | goto out_free; | 700 | goto out_free; |
774 | } | 701 | } |
775 | 702 | ||
@@ -780,45 +707,92 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
780 | r = -ENOMEM; | 707 | r = -ENOMEM; |
781 | 708 | ||
782 | /* Allocate if a slot is being created */ | 709 | /* Allocate if a slot is being created */ |
783 | if (npages && !old.npages) { | 710 | #ifndef CONFIG_S390 |
711 | if (npages && !new.rmap) { | ||
712 | new.rmap = vzalloc(npages * sizeof(*new.rmap)); | ||
713 | |||
714 | if (!new.rmap) | ||
715 | goto out_free; | ||
716 | |||
784 | new.user_alloc = user_alloc; | 717 | new.user_alloc = user_alloc; |
785 | new.userspace_addr = mem->userspace_addr; | 718 | new.userspace_addr = mem->userspace_addr; |
719 | } | ||
720 | if (!npages) | ||
721 | goto skip_lpage; | ||
722 | |||
723 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | ||
724 | unsigned long ugfn; | ||
725 | unsigned long j; | ||
726 | int lpages; | ||
727 | int level = i + 2; | ||
728 | |||
729 | /* Avoid unused variable warning if no large pages */ | ||
730 | (void)level; | ||
731 | |||
732 | if (new.lpage_info[i]) | ||
733 | continue; | ||
786 | 734 | ||
787 | if (kvm_arch_create_memslot(&new, npages)) | 735 | lpages = 1 + ((base_gfn + npages - 1) |
736 | >> KVM_HPAGE_GFN_SHIFT(level)); | ||
737 | lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level); | ||
738 | |||
739 | new.lpage_info[i] = vzalloc(lpages * sizeof(*new.lpage_info[i])); | ||
740 | |||
741 | if (!new.lpage_info[i]) | ||
788 | goto out_free; | 742 | goto out_free; |
743 | |||
744 | if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) | ||
745 | new.lpage_info[i][0].write_count = 1; | ||
746 | if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) | ||
747 | new.lpage_info[i][lpages - 1].write_count = 1; | ||
748 | ugfn = new.userspace_addr >> PAGE_SHIFT; | ||
749 | /* | ||
750 | * If the gfn and userspace address are not aligned wrt each | ||
751 | * other, or if explicitly asked to, disable large page | ||
752 | * support for this slot | ||
753 | */ | ||
754 | if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || | ||
755 | !largepages_enabled) | ||
756 | for (j = 0; j < lpages; ++j) | ||
757 | new.lpage_info[i][j].write_count = 1; | ||
789 | } | 758 | } |
790 | 759 | ||
760 | skip_lpage: | ||
761 | |||
791 | /* Allocate page dirty bitmap if needed */ | 762 | /* Allocate page dirty bitmap if needed */ |
792 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { | 763 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { |
793 | if (kvm_create_dirty_bitmap(&new) < 0) | 764 | if (kvm_create_dirty_bitmap(&new) < 0) |
794 | goto out_free; | 765 | goto out_free; |
795 | /* destroy any largepage mappings for dirty tracking */ | 766 | /* destroy any largepage mappings for dirty tracking */ |
796 | } | 767 | } |
768 | #else /* not defined CONFIG_S390 */ | ||
769 | new.user_alloc = user_alloc; | ||
770 | if (user_alloc) | ||
771 | new.userspace_addr = mem->userspace_addr; | ||
772 | #endif /* not defined CONFIG_S390 */ | ||
797 | 773 | ||
798 | if (!npages || base_gfn != old.base_gfn) { | 774 | if (!npages) { |
799 | struct kvm_memory_slot *slot; | ||
800 | |||
801 | r = -ENOMEM; | 775 | r = -ENOMEM; |
802 | slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), | 776 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
803 | GFP_KERNEL); | ||
804 | if (!slots) | 777 | if (!slots) |
805 | goto out_free; | 778 | goto out_free; |
806 | slot = id_to_memslot(slots, mem->slot); | 779 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); |
807 | slot->flags |= KVM_MEMSLOT_INVALID; | 780 | if (mem->slot >= slots->nmemslots) |
808 | 781 | slots->nmemslots = mem->slot + 1; | |
809 | update_memslots(slots, NULL); | 782 | slots->generation++; |
783 | slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; | ||
810 | 784 | ||
811 | old_memslots = kvm->memslots; | 785 | old_memslots = kvm->memslots; |
812 | rcu_assign_pointer(kvm->memslots, slots); | 786 | rcu_assign_pointer(kvm->memslots, slots); |
813 | synchronize_srcu_expedited(&kvm->srcu); | 787 | synchronize_srcu_expedited(&kvm->srcu); |
814 | /* From this point no new shadow pages pointing to a deleted, | 788 | /* From this point no new shadow pages pointing to a deleted |
815 | * or moved, memslot will be created. | 789 | * memslot will be created. |
816 | * | 790 | * |
817 | * validation of sp->gfn happens in: | 791 | * validation of sp->gfn happens in: |
818 | * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) | 792 | * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) |
819 | * - kvm_is_visible_gfn (mmu_check_roots) | 793 | * - kvm_is_visible_gfn (mmu_check_roots) |
820 | */ | 794 | */ |
821 | kvm_arch_flush_shadow_memslot(kvm, slot); | 795 | kvm_arch_flush_shadow(kvm); |
822 | kfree(old_memslots); | 796 | kfree(old_memslots); |
823 | } | 797 | } |
824 | 798 | ||
@@ -826,33 +800,44 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
826 | if (r) | 800 | if (r) |
827 | goto out_free; | 801 | goto out_free; |
828 | 802 | ||
829 | /* map/unmap the pages in iommu page table */ | 803 | /* map the pages in iommu page table */ |
830 | if (npages) { | 804 | if (npages) { |
831 | r = kvm_iommu_map_pages(kvm, &new); | 805 | r = kvm_iommu_map_pages(kvm, &new); |
832 | if (r) | 806 | if (r) |
833 | goto out_free; | 807 | goto out_free; |
834 | } else | 808 | } |
835 | kvm_iommu_unmap_pages(kvm, &old); | ||
836 | 809 | ||
837 | r = -ENOMEM; | 810 | r = -ENOMEM; |
838 | slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), | 811 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
839 | GFP_KERNEL); | ||
840 | if (!slots) | 812 | if (!slots) |
841 | goto out_free; | 813 | goto out_free; |
814 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | ||
815 | if (mem->slot >= slots->nmemslots) | ||
816 | slots->nmemslots = mem->slot + 1; | ||
817 | slots->generation++; | ||
842 | 818 | ||
843 | /* actual memory is freed via old in kvm_free_physmem_slot below */ | 819 | /* actual memory is freed via old in kvm_free_physmem_slot below */ |
844 | if (!npages) { | 820 | if (!npages) { |
821 | new.rmap = NULL; | ||
845 | new.dirty_bitmap = NULL; | 822 | new.dirty_bitmap = NULL; |
846 | memset(&new.arch, 0, sizeof(new.arch)); | 823 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) |
824 | new.lpage_info[i] = NULL; | ||
847 | } | 825 | } |
848 | 826 | ||
849 | update_memslots(slots, &new); | 827 | slots->memslots[mem->slot] = new; |
850 | old_memslots = kvm->memslots; | 828 | old_memslots = kvm->memslots; |
851 | rcu_assign_pointer(kvm->memslots, slots); | 829 | rcu_assign_pointer(kvm->memslots, slots); |
852 | synchronize_srcu_expedited(&kvm->srcu); | 830 | synchronize_srcu_expedited(&kvm->srcu); |
853 | 831 | ||
854 | kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); | 832 | kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); |
855 | 833 | ||
834 | /* | ||
835 | * If the new memory slot is created, we need to clear all | ||
836 | * mmio sptes. | ||
837 | */ | ||
838 | if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) | ||
839 | kvm_arch_flush_shadow(kvm); | ||
840 | |||
856 | kvm_free_physmem_slot(&old, &new); | 841 | kvm_free_physmem_slot(&old, &new); |
857 | kfree(old_memslots); | 842 | kfree(old_memslots); |
858 | 843 | ||
@@ -901,7 +886,7 @@ int kvm_get_dirty_log(struct kvm *kvm, | |||
901 | if (log->slot >= KVM_MEMORY_SLOTS) | 886 | if (log->slot >= KVM_MEMORY_SLOTS) |
902 | goto out; | 887 | goto out; |
903 | 888 | ||
904 | memslot = id_to_memslot(kvm->memslots, log->slot); | 889 | memslot = &kvm->memslots->memslots[log->slot]; |
905 | r = -ENOENT; | 890 | r = -ENOENT; |
906 | if (!memslot->dirty_bitmap) | 891 | if (!memslot->dirty_bitmap) |
907 | goto out; | 892 | goto out; |
@@ -923,17 +908,74 @@ out: | |||
923 | return r; | 908 | return r; |
924 | } | 909 | } |
925 | 910 | ||
926 | bool kvm_largepages_enabled(void) | ||
927 | { | ||
928 | return largepages_enabled; | ||
929 | } | ||
930 | |||
931 | void kvm_disable_largepages(void) | 911 | void kvm_disable_largepages(void) |
932 | { | 912 | { |
933 | largepages_enabled = false; | 913 | largepages_enabled = false; |
934 | } | 914 | } |
935 | EXPORT_SYMBOL_GPL(kvm_disable_largepages); | 915 | EXPORT_SYMBOL_GPL(kvm_disable_largepages); |
936 | 916 | ||
917 | int is_error_page(struct page *page) | ||
918 | { | ||
919 | return page == bad_page || page == hwpoison_page || page == fault_page; | ||
920 | } | ||
921 | EXPORT_SYMBOL_GPL(is_error_page); | ||
922 | |||
923 | int is_error_pfn(pfn_t pfn) | ||
924 | { | ||
925 | return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn; | ||
926 | } | ||
927 | EXPORT_SYMBOL_GPL(is_error_pfn); | ||
928 | |||
929 | int is_hwpoison_pfn(pfn_t pfn) | ||
930 | { | ||
931 | return pfn == hwpoison_pfn; | ||
932 | } | ||
933 | EXPORT_SYMBOL_GPL(is_hwpoison_pfn); | ||
934 | |||
935 | int is_fault_pfn(pfn_t pfn) | ||
936 | { | ||
937 | return pfn == fault_pfn; | ||
938 | } | ||
939 | EXPORT_SYMBOL_GPL(is_fault_pfn); | ||
940 | |||
941 | int is_noslot_pfn(pfn_t pfn) | ||
942 | { | ||
943 | return pfn == bad_pfn; | ||
944 | } | ||
945 | EXPORT_SYMBOL_GPL(is_noslot_pfn); | ||
946 | |||
947 | int is_invalid_pfn(pfn_t pfn) | ||
948 | { | ||
949 | return pfn == hwpoison_pfn || pfn == fault_pfn; | ||
950 | } | ||
951 | EXPORT_SYMBOL_GPL(is_invalid_pfn); | ||
952 | |||
953 | static inline unsigned long bad_hva(void) | ||
954 | { | ||
955 | return PAGE_OFFSET; | ||
956 | } | ||
957 | |||
958 | int kvm_is_error_hva(unsigned long addr) | ||
959 | { | ||
960 | return addr == bad_hva(); | ||
961 | } | ||
962 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); | ||
963 | |||
964 | static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots, | ||
965 | gfn_t gfn) | ||
966 | { | ||
967 | int i; | ||
968 | |||
969 | for (i = 0; i < slots->nmemslots; ++i) { | ||
970 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | ||
971 | |||
972 | if (gfn >= memslot->base_gfn | ||
973 | && gfn < memslot->base_gfn + memslot->npages) | ||
974 | return memslot; | ||
975 | } | ||
976 | return NULL; | ||
977 | } | ||
978 | |||
937 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | 979 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) |
938 | { | 980 | { |
939 | return __gfn_to_memslot(kvm_memslots(kvm), gfn); | 981 | return __gfn_to_memslot(kvm_memslots(kvm), gfn); |
@@ -942,13 +984,20 @@ EXPORT_SYMBOL_GPL(gfn_to_memslot); | |||
942 | 984 | ||
943 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | 985 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) |
944 | { | 986 | { |
945 | struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn); | 987 | int i; |
988 | struct kvm_memslots *slots = kvm_memslots(kvm); | ||
946 | 989 | ||
947 | if (!memslot || memslot->id >= KVM_MEMORY_SLOTS || | 990 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
948 | memslot->flags & KVM_MEMSLOT_INVALID) | 991 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
949 | return 0; | 992 | |
993 | if (memslot->flags & KVM_MEMSLOT_INVALID) | ||
994 | continue; | ||
950 | 995 | ||
951 | return 1; | 996 | if (gfn >= memslot->base_gfn |
997 | && gfn < memslot->base_gfn + memslot->npages) | ||
998 | return 1; | ||
999 | } | ||
1000 | return 0; | ||
952 | } | 1001 | } |
953 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); | 1002 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); |
954 | 1003 | ||
@@ -976,38 +1025,17 @@ out: | |||
976 | return size; | 1025 | return size; |
977 | } | 1026 | } |
978 | 1027 | ||
979 | static bool memslot_is_readonly(struct kvm_memory_slot *slot) | 1028 | static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, |
980 | { | 1029 | gfn_t *nr_pages) |
981 | return slot->flags & KVM_MEM_READONLY; | ||
982 | } | ||
983 | |||
984 | static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, | ||
985 | gfn_t *nr_pages, bool write) | ||
986 | { | 1030 | { |
987 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) | 1031 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) |
988 | return KVM_HVA_ERR_BAD; | 1032 | return bad_hva(); |
989 | |||
990 | if (memslot_is_readonly(slot) && write) | ||
991 | return KVM_HVA_ERR_RO_BAD; | ||
992 | 1033 | ||
993 | if (nr_pages) | 1034 | if (nr_pages) |
994 | *nr_pages = slot->npages - (gfn - slot->base_gfn); | 1035 | *nr_pages = slot->npages - (gfn - slot->base_gfn); |
995 | 1036 | ||
996 | return __gfn_to_hva_memslot(slot, gfn); | 1037 | return gfn_to_hva_memslot(slot, gfn); |
997 | } | ||
998 | |||
999 | static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, | ||
1000 | gfn_t *nr_pages) | ||
1001 | { | ||
1002 | return __gfn_to_hva_many(slot, gfn, nr_pages, true); | ||
1003 | } | ||
1004 | |||
1005 | unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, | ||
1006 | gfn_t gfn) | ||
1007 | { | ||
1008 | return gfn_to_hva_many(slot, gfn, NULL); | ||
1009 | } | 1038 | } |
1010 | EXPORT_SYMBOL_GPL(gfn_to_hva_memslot); | ||
1011 | 1039 | ||
1012 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | 1040 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) |
1013 | { | 1041 | { |
@@ -1015,23 +1043,10 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | |||
1015 | } | 1043 | } |
1016 | EXPORT_SYMBOL_GPL(gfn_to_hva); | 1044 | EXPORT_SYMBOL_GPL(gfn_to_hva); |
1017 | 1045 | ||
1018 | /* | 1046 | static pfn_t get_fault_pfn(void) |
1019 | * The hva returned by this function is only allowed to be read. | ||
1020 | * It should pair with kvm_read_hva() or kvm_read_hva_atomic(). | ||
1021 | */ | ||
1022 | static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn) | ||
1023 | { | 1047 | { |
1024 | return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); | 1048 | get_page(fault_page); |
1025 | } | 1049 | return fault_pfn; |
1026 | |||
1027 | static int kvm_read_hva(void *data, void __user *hva, int len) | ||
1028 | { | ||
1029 | return __copy_from_user(data, hva, len); | ||
1030 | } | ||
1031 | |||
1032 | static int kvm_read_hva_atomic(void *data, void __user *hva, int len) | ||
1033 | { | ||
1034 | return __copy_from_user_inatomic(data, hva, len); | ||
1035 | } | 1050 | } |
1036 | 1051 | ||
1037 | int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, | 1052 | int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, |
@@ -1054,186 +1069,108 @@ static inline int check_user_page_hwpoison(unsigned long addr) | |||
1054 | return rc == -EHWPOISON; | 1069 | return rc == -EHWPOISON; |
1055 | } | 1070 | } |
1056 | 1071 | ||
1057 | /* | 1072 | static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, |
1058 | * The atomic path to get the writable pfn which will be stored in @pfn, | 1073 | bool *async, bool write_fault, bool *writable) |
1059 | * true indicates success, otherwise false is returned. | ||
1060 | */ | ||
1061 | static bool hva_to_pfn_fast(unsigned long addr, bool atomic, bool *async, | ||
1062 | bool write_fault, bool *writable, pfn_t *pfn) | ||
1063 | { | 1074 | { |
1064 | struct page *page[1]; | 1075 | struct page *page[1]; |
1065 | int npages; | 1076 | int npages = 0; |
1077 | pfn_t pfn; | ||
1066 | 1078 | ||
1067 | if (!(async || atomic)) | 1079 | /* we can do it either atomically or asynchronously, not both */ |
1068 | return false; | 1080 | BUG_ON(atomic && async); |
1069 | 1081 | ||
1070 | /* | 1082 | BUG_ON(!write_fault && !writable); |
1071 | * Fast pin a writable pfn only if it is a write fault request | ||
1072 | * or the caller allows to map a writable pfn for a read fault | ||
1073 | * request. | ||
1074 | */ | ||
1075 | if (!(write_fault || writable)) | ||
1076 | return false; | ||
1077 | 1083 | ||
1078 | npages = __get_user_pages_fast(addr, 1, 1, page); | 1084 | if (writable) |
1079 | if (npages == 1) { | 1085 | *writable = true; |
1080 | *pfn = page_to_pfn(page[0]); | ||
1081 | 1086 | ||
1082 | if (writable) | 1087 | if (atomic || async) |
1083 | *writable = true; | 1088 | npages = __get_user_pages_fast(addr, 1, 1, page); |
1084 | return true; | ||
1085 | } | ||
1086 | 1089 | ||
1087 | return false; | 1090 | if (unlikely(npages != 1) && !atomic) { |
1088 | } | 1091 | might_sleep(); |
1089 | 1092 | ||
1090 | /* | 1093 | if (writable) |
1091 | * The slow path to get the pfn of the specified host virtual address, | 1094 | *writable = write_fault; |
1092 | * 1 indicates success, -errno is returned if error is detected. | ||
1093 | */ | ||
1094 | static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, | ||
1095 | bool *writable, pfn_t *pfn) | ||
1096 | { | ||
1097 | struct page *page[1]; | ||
1098 | int npages = 0; | ||
1099 | |||
1100 | might_sleep(); | ||
1101 | |||
1102 | if (writable) | ||
1103 | *writable = write_fault; | ||
1104 | 1095 | ||
1105 | if (async) { | 1096 | if (async) { |
1106 | down_read(¤t->mm->mmap_sem); | 1097 | down_read(¤t->mm->mmap_sem); |
1107 | npages = get_user_page_nowait(current, current->mm, | 1098 | npages = get_user_page_nowait(current, current->mm, |
1108 | addr, write_fault, page); | 1099 | addr, write_fault, page); |
1109 | up_read(¤t->mm->mmap_sem); | 1100 | up_read(¤t->mm->mmap_sem); |
1110 | } else | 1101 | } else |
1111 | npages = get_user_pages_fast(addr, 1, write_fault, | 1102 | npages = get_user_pages_fast(addr, 1, write_fault, |
1112 | page); | 1103 | page); |
1113 | if (npages != 1) | 1104 | |
1114 | return npages; | 1105 | /* map read fault as writable if possible */ |
1115 | 1106 | if (unlikely(!write_fault) && npages == 1) { | |
1116 | /* map read fault as writable if possible */ | 1107 | struct page *wpage[1]; |
1117 | if (unlikely(!write_fault) && writable) { | 1108 | |
1118 | struct page *wpage[1]; | 1109 | npages = __get_user_pages_fast(addr, 1, 1, wpage); |
1119 | 1110 | if (npages == 1) { | |
1120 | npages = __get_user_pages_fast(addr, 1, 1, wpage); | 1111 | *writable = true; |
1121 | if (npages == 1) { | 1112 | put_page(page[0]); |
1122 | *writable = true; | 1113 | page[0] = wpage[0]; |
1123 | put_page(page[0]); | 1114 | } |
1124 | page[0] = wpage[0]; | 1115 | npages = 1; |
1125 | } | 1116 | } |
1126 | |||
1127 | npages = 1; | ||
1128 | } | 1117 | } |
1129 | *pfn = page_to_pfn(page[0]); | ||
1130 | return npages; | ||
1131 | } | ||
1132 | 1118 | ||
1133 | static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) | 1119 | if (unlikely(npages != 1)) { |
1134 | { | 1120 | struct vm_area_struct *vma; |
1135 | if (unlikely(!(vma->vm_flags & VM_READ))) | ||
1136 | return false; | ||
1137 | |||
1138 | if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE)))) | ||
1139 | return false; | ||
1140 | |||
1141 | return true; | ||
1142 | } | ||
1143 | |||
1144 | /* | ||
1145 | * Pin guest page in memory and return its pfn. | ||
1146 | * @addr: host virtual address which maps memory to the guest | ||
1147 | * @atomic: whether this function can sleep | ||
1148 | * @async: whether this function need to wait IO complete if the | ||
1149 | * host page is not in the memory | ||
1150 | * @write_fault: whether we should get a writable host page | ||
1151 | * @writable: whether it allows to map a writable host page for !@write_fault | ||
1152 | * | ||
1153 | * The function will map a writable host page for these two cases: | ||
1154 | * 1): @write_fault = true | ||
1155 | * 2): @write_fault = false && @writable, @writable will tell the caller | ||
1156 | * whether the mapping is writable. | ||
1157 | */ | ||
1158 | static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, | ||
1159 | bool write_fault, bool *writable) | ||
1160 | { | ||
1161 | struct vm_area_struct *vma; | ||
1162 | pfn_t pfn = 0; | ||
1163 | int npages; | ||
1164 | 1121 | ||
1165 | /* we can do it either atomically or asynchronously, not both */ | 1122 | if (atomic) |
1166 | BUG_ON(atomic && async); | 1123 | return get_fault_pfn(); |
1167 | 1124 | ||
1168 | if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn)) | 1125 | down_read(¤t->mm->mmap_sem); |
1169 | return pfn; | 1126 | if (npages == -EHWPOISON || |
1170 | 1127 | (!async && check_user_page_hwpoison(addr))) { | |
1171 | if (atomic) | 1128 | up_read(¤t->mm->mmap_sem); |
1172 | return KVM_PFN_ERR_FAULT; | 1129 | get_page(hwpoison_page); |
1130 | return page_to_pfn(hwpoison_page); | ||
1131 | } | ||
1173 | 1132 | ||
1174 | npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn); | 1133 | vma = find_vma_intersection(current->mm, addr, addr+1); |
1175 | if (npages == 1) | 1134 | |
1176 | return pfn; | 1135 | if (vma == NULL) |
1136 | pfn = get_fault_pfn(); | ||
1137 | else if ((vma->vm_flags & VM_PFNMAP)) { | ||
1138 | pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + | ||
1139 | vma->vm_pgoff; | ||
1140 | BUG_ON(!kvm_is_mmio_pfn(pfn)); | ||
1141 | } else { | ||
1142 | if (async && (vma->vm_flags & VM_WRITE)) | ||
1143 | *async = true; | ||
1144 | pfn = get_fault_pfn(); | ||
1145 | } | ||
1146 | up_read(¤t->mm->mmap_sem); | ||
1147 | } else | ||
1148 | pfn = page_to_pfn(page[0]); | ||
1177 | 1149 | ||
1178 | down_read(¤t->mm->mmap_sem); | ||
1179 | if (npages == -EHWPOISON || | ||
1180 | (!async && check_user_page_hwpoison(addr))) { | ||
1181 | pfn = KVM_PFN_ERR_HWPOISON; | ||
1182 | goto exit; | ||
1183 | } | ||
1184 | |||
1185 | vma = find_vma_intersection(current->mm, addr, addr + 1); | ||
1186 | |||
1187 | if (vma == NULL) | ||
1188 | pfn = KVM_PFN_ERR_FAULT; | ||
1189 | else if ((vma->vm_flags & VM_PFNMAP)) { | ||
1190 | pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + | ||
1191 | vma->vm_pgoff; | ||
1192 | BUG_ON(!kvm_is_mmio_pfn(pfn)); | ||
1193 | } else { | ||
1194 | if (async && vma_is_valid(vma, write_fault)) | ||
1195 | *async = true; | ||
1196 | pfn = KVM_PFN_ERR_FAULT; | ||
1197 | } | ||
1198 | exit: | ||
1199 | up_read(¤t->mm->mmap_sem); | ||
1200 | return pfn; | 1150 | return pfn; |
1201 | } | 1151 | } |
1202 | 1152 | ||
1203 | static pfn_t | 1153 | pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) |
1204 | __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, | ||
1205 | bool *async, bool write_fault, bool *writable) | ||
1206 | { | 1154 | { |
1207 | unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); | 1155 | return hva_to_pfn(kvm, addr, true, NULL, true, NULL); |
1208 | |||
1209 | if (addr == KVM_HVA_ERR_RO_BAD) | ||
1210 | return KVM_PFN_ERR_RO_FAULT; | ||
1211 | |||
1212 | if (kvm_is_error_hva(addr)) | ||
1213 | return KVM_PFN_NOSLOT; | ||
1214 | |||
1215 | /* Do not map writable pfn in the readonly memslot. */ | ||
1216 | if (writable && memslot_is_readonly(slot)) { | ||
1217 | *writable = false; | ||
1218 | writable = NULL; | ||
1219 | } | ||
1220 | |||
1221 | return hva_to_pfn(addr, atomic, async, write_fault, | ||
1222 | writable); | ||
1223 | } | 1156 | } |
1157 | EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); | ||
1224 | 1158 | ||
1225 | static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, | 1159 | static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, |
1226 | bool write_fault, bool *writable) | 1160 | bool write_fault, bool *writable) |
1227 | { | 1161 | { |
1228 | struct kvm_memory_slot *slot; | 1162 | unsigned long addr; |
1229 | 1163 | ||
1230 | if (async) | 1164 | if (async) |
1231 | *async = false; | 1165 | *async = false; |
1232 | 1166 | ||
1233 | slot = gfn_to_memslot(kvm, gfn); | 1167 | addr = gfn_to_hva(kvm, gfn); |
1168 | if (kvm_is_error_hva(addr)) { | ||
1169 | get_page(bad_page); | ||
1170 | return page_to_pfn(bad_page); | ||
1171 | } | ||
1234 | 1172 | ||
1235 | return __gfn_to_pfn_memslot(slot, gfn, atomic, async, write_fault, | 1173 | return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable); |
1236 | writable); | ||
1237 | } | 1174 | } |
1238 | 1175 | ||
1239 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) | 1176 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) |
@@ -1262,17 +1199,13 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, | |||
1262 | } | 1199 | } |
1263 | EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); | 1200 | EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); |
1264 | 1201 | ||
1265 | pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) | 1202 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, |
1203 | struct kvm_memory_slot *slot, gfn_t gfn) | ||
1266 | { | 1204 | { |
1267 | return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL); | 1205 | unsigned long addr = gfn_to_hva_memslot(slot, gfn); |
1206 | return hva_to_pfn(kvm, addr, false, NULL, true, NULL); | ||
1268 | } | 1207 | } |
1269 | 1208 | ||
1270 | pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn) | ||
1271 | { | ||
1272 | return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL); | ||
1273 | } | ||
1274 | EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); | ||
1275 | |||
1276 | int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, | 1209 | int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, |
1277 | int nr_pages) | 1210 | int nr_pages) |
1278 | { | 1211 | { |
@@ -1290,49 +1223,37 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, | |||
1290 | } | 1223 | } |
1291 | EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic); | 1224 | EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic); |
1292 | 1225 | ||
1293 | static struct page *kvm_pfn_to_page(pfn_t pfn) | ||
1294 | { | ||
1295 | if (is_error_noslot_pfn(pfn)) | ||
1296 | return KVM_ERR_PTR_BAD_PAGE; | ||
1297 | |||
1298 | if (kvm_is_mmio_pfn(pfn)) { | ||
1299 | WARN_ON(1); | ||
1300 | return KVM_ERR_PTR_BAD_PAGE; | ||
1301 | } | ||
1302 | |||
1303 | return pfn_to_page(pfn); | ||
1304 | } | ||
1305 | |||
1306 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | 1226 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) |
1307 | { | 1227 | { |
1308 | pfn_t pfn; | 1228 | pfn_t pfn; |
1309 | 1229 | ||
1310 | pfn = gfn_to_pfn(kvm, gfn); | 1230 | pfn = gfn_to_pfn(kvm, gfn); |
1231 | if (!kvm_is_mmio_pfn(pfn)) | ||
1232 | return pfn_to_page(pfn); | ||
1233 | |||
1234 | WARN_ON(kvm_is_mmio_pfn(pfn)); | ||
1311 | 1235 | ||
1312 | return kvm_pfn_to_page(pfn); | 1236 | get_page(bad_page); |
1237 | return bad_page; | ||
1313 | } | 1238 | } |
1314 | 1239 | ||
1315 | EXPORT_SYMBOL_GPL(gfn_to_page); | 1240 | EXPORT_SYMBOL_GPL(gfn_to_page); |
1316 | 1241 | ||
1317 | void kvm_release_page_clean(struct page *page) | 1242 | void kvm_release_page_clean(struct page *page) |
1318 | { | 1243 | { |
1319 | WARN_ON(is_error_page(page)); | ||
1320 | |||
1321 | kvm_release_pfn_clean(page_to_pfn(page)); | 1244 | kvm_release_pfn_clean(page_to_pfn(page)); |
1322 | } | 1245 | } |
1323 | EXPORT_SYMBOL_GPL(kvm_release_page_clean); | 1246 | EXPORT_SYMBOL_GPL(kvm_release_page_clean); |
1324 | 1247 | ||
1325 | void kvm_release_pfn_clean(pfn_t pfn) | 1248 | void kvm_release_pfn_clean(pfn_t pfn) |
1326 | { | 1249 | { |
1327 | if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn)) | 1250 | if (!kvm_is_mmio_pfn(pfn)) |
1328 | put_page(pfn_to_page(pfn)); | 1251 | put_page(pfn_to_page(pfn)); |
1329 | } | 1252 | } |
1330 | EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); | 1253 | EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); |
1331 | 1254 | ||
1332 | void kvm_release_page_dirty(struct page *page) | 1255 | void kvm_release_page_dirty(struct page *page) |
1333 | { | 1256 | { |
1334 | WARN_ON(is_error_page(page)); | ||
1335 | |||
1336 | kvm_release_pfn_dirty(page_to_pfn(page)); | 1257 | kvm_release_pfn_dirty(page_to_pfn(page)); |
1337 | } | 1258 | } |
1338 | EXPORT_SYMBOL_GPL(kvm_release_page_dirty); | 1259 | EXPORT_SYMBOL_GPL(kvm_release_page_dirty); |
@@ -1388,10 +1309,10 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | |||
1388 | int r; | 1309 | int r; |
1389 | unsigned long addr; | 1310 | unsigned long addr; |
1390 | 1311 | ||
1391 | addr = gfn_to_hva_read(kvm, gfn); | 1312 | addr = gfn_to_hva(kvm, gfn); |
1392 | if (kvm_is_error_hva(addr)) | 1313 | if (kvm_is_error_hva(addr)) |
1393 | return -EFAULT; | 1314 | return -EFAULT; |
1394 | r = kvm_read_hva(data, (void __user *)addr + offset, len); | 1315 | r = __copy_from_user(data, (void __user *)addr + offset, len); |
1395 | if (r) | 1316 | if (r) |
1396 | return -EFAULT; | 1317 | return -EFAULT; |
1397 | return 0; | 1318 | return 0; |
@@ -1426,11 +1347,11 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, | |||
1426 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1347 | gfn_t gfn = gpa >> PAGE_SHIFT; |
1427 | int offset = offset_in_page(gpa); | 1348 | int offset = offset_in_page(gpa); |
1428 | 1349 | ||
1429 | addr = gfn_to_hva_read(kvm, gfn); | 1350 | addr = gfn_to_hva(kvm, gfn); |
1430 | if (kvm_is_error_hva(addr)) | 1351 | if (kvm_is_error_hva(addr)) |
1431 | return -EFAULT; | 1352 | return -EFAULT; |
1432 | pagefault_disable(); | 1353 | pagefault_disable(); |
1433 | r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len); | 1354 | r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); |
1434 | pagefault_enable(); | 1355 | pagefault_enable(); |
1435 | if (r) | 1356 | if (r) |
1436 | return -EFAULT; | 1357 | return -EFAULT; |
@@ -1484,7 +1405,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | |||
1484 | 1405 | ||
1485 | ghc->gpa = gpa; | 1406 | ghc->gpa = gpa; |
1486 | ghc->generation = slots->generation; | 1407 | ghc->generation = slots->generation; |
1487 | ghc->memslot = gfn_to_memslot(kvm, gfn); | 1408 | ghc->memslot = __gfn_to_memslot(slots, gfn); |
1488 | ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL); | 1409 | ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL); |
1489 | if (!kvm_is_error_hva(ghc->hva)) | 1410 | if (!kvm_is_error_hva(ghc->hva)) |
1490 | ghc->hva += offset; | 1411 | ghc->hva += offset; |
@@ -1568,7 +1489,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, | |||
1568 | if (memslot && memslot->dirty_bitmap) { | 1489 | if (memslot && memslot->dirty_bitmap) { |
1569 | unsigned long rel_gfn = gfn - memslot->base_gfn; | 1490 | unsigned long rel_gfn = gfn - memslot->base_gfn; |
1570 | 1491 | ||
1571 | set_bit_le(rel_gfn, memslot->dirty_bitmap); | 1492 | __set_bit_le(rel_gfn, memslot->dirty_bitmap); |
1572 | } | 1493 | } |
1573 | } | 1494 | } |
1574 | 1495 | ||
@@ -1605,30 +1526,6 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
1605 | finish_wait(&vcpu->wq, &wait); | 1526 | finish_wait(&vcpu->wq, &wait); |
1606 | } | 1527 | } |
1607 | 1528 | ||
1608 | #ifndef CONFIG_S390 | ||
1609 | /* | ||
1610 | * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode. | ||
1611 | */ | ||
1612 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | ||
1613 | { | ||
1614 | int me; | ||
1615 | int cpu = vcpu->cpu; | ||
1616 | wait_queue_head_t *wqp; | ||
1617 | |||
1618 | wqp = kvm_arch_vcpu_wq(vcpu); | ||
1619 | if (waitqueue_active(wqp)) { | ||
1620 | wake_up_interruptible(wqp); | ||
1621 | ++vcpu->stat.halt_wakeup; | ||
1622 | } | ||
1623 | |||
1624 | me = get_cpu(); | ||
1625 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) | ||
1626 | if (kvm_arch_vcpu_should_kick(vcpu)) | ||
1627 | smp_send_reschedule(cpu); | ||
1628 | put_cpu(); | ||
1629 | } | ||
1630 | #endif /* !CONFIG_S390 */ | ||
1631 | |||
1632 | void kvm_resched(struct kvm_vcpu *vcpu) | 1529 | void kvm_resched(struct kvm_vcpu *vcpu) |
1633 | { | 1530 | { |
1634 | if (!need_resched()) | 1531 | if (!need_resched()) |
@@ -1637,68 +1534,6 @@ void kvm_resched(struct kvm_vcpu *vcpu) | |||
1637 | } | 1534 | } |
1638 | EXPORT_SYMBOL_GPL(kvm_resched); | 1535 | EXPORT_SYMBOL_GPL(kvm_resched); |
1639 | 1536 | ||
1640 | bool kvm_vcpu_yield_to(struct kvm_vcpu *target) | ||
1641 | { | ||
1642 | struct pid *pid; | ||
1643 | struct task_struct *task = NULL; | ||
1644 | |||
1645 | rcu_read_lock(); | ||
1646 | pid = rcu_dereference(target->pid); | ||
1647 | if (pid) | ||
1648 | task = get_pid_task(target->pid, PIDTYPE_PID); | ||
1649 | rcu_read_unlock(); | ||
1650 | if (!task) | ||
1651 | return false; | ||
1652 | if (task->flags & PF_VCPU) { | ||
1653 | put_task_struct(task); | ||
1654 | return false; | ||
1655 | } | ||
1656 | if (yield_to(task, 1)) { | ||
1657 | put_task_struct(task); | ||
1658 | return true; | ||
1659 | } | ||
1660 | put_task_struct(task); | ||
1661 | return false; | ||
1662 | } | ||
1663 | EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); | ||
1664 | |||
1665 | #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT | ||
1666 | /* | ||
1667 | * Helper that checks whether a VCPU is eligible for directed yield. | ||
1668 | * Most eligible candidate to yield is decided by following heuristics: | ||
1669 | * | ||
1670 | * (a) VCPU which has not done pl-exit or cpu relax intercepted recently | ||
1671 | * (preempted lock holder), indicated by @in_spin_loop. | ||
1672 | * Set at the beiginning and cleared at the end of interception/PLE handler. | ||
1673 | * | ||
1674 | * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get | ||
1675 | * chance last time (mostly it has become eligible now since we have probably | ||
1676 | * yielded to lockholder in last iteration. This is done by toggling | ||
1677 | * @dy_eligible each time a VCPU checked for eligibility.) | ||
1678 | * | ||
1679 | * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding | ||
1680 | * to preempted lock-holder could result in wrong VCPU selection and CPU | ||
1681 | * burning. Giving priority for a potential lock-holder increases lock | ||
1682 | * progress. | ||
1683 | * | ||
1684 | * Since algorithm is based on heuristics, accessing another VCPU data without | ||
1685 | * locking does not harm. It may result in trying to yield to same VCPU, fail | ||
1686 | * and continue with next VCPU and so on. | ||
1687 | */ | ||
1688 | bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) | ||
1689 | { | ||
1690 | bool eligible; | ||
1691 | |||
1692 | eligible = !vcpu->spin_loop.in_spin_loop || | ||
1693 | (vcpu->spin_loop.in_spin_loop && | ||
1694 | vcpu->spin_loop.dy_eligible); | ||
1695 | |||
1696 | if (vcpu->spin_loop.in_spin_loop) | ||
1697 | kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); | ||
1698 | |||
1699 | return eligible; | ||
1700 | } | ||
1701 | #endif | ||
1702 | void kvm_vcpu_on_spin(struct kvm_vcpu *me) | 1537 | void kvm_vcpu_on_spin(struct kvm_vcpu *me) |
1703 | { | 1538 | { |
1704 | struct kvm *kvm = me->kvm; | 1539 | struct kvm *kvm = me->kvm; |
@@ -1708,7 +1543,6 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
1708 | int pass; | 1543 | int pass; |
1709 | int i; | 1544 | int i; |
1710 | 1545 | ||
1711 | kvm_vcpu_set_in_spin_loop(me, true); | ||
1712 | /* | 1546 | /* |
1713 | * We boost the priority of a VCPU that is runnable but not | 1547 | * We boost the priority of a VCPU that is runnable but not |
1714 | * currently running, because it got preempted by something | 1548 | * currently running, because it got preempted by something |
@@ -1718,7 +1552,9 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
1718 | */ | 1552 | */ |
1719 | for (pass = 0; pass < 2 && !yielded; pass++) { | 1553 | for (pass = 0; pass < 2 && !yielded; pass++) { |
1720 | kvm_for_each_vcpu(i, vcpu, kvm) { | 1554 | kvm_for_each_vcpu(i, vcpu, kvm) { |
1721 | if (!pass && i <= last_boosted_vcpu) { | 1555 | struct task_struct *task = NULL; |
1556 | struct pid *pid; | ||
1557 | if (!pass && i < last_boosted_vcpu) { | ||
1722 | i = last_boosted_vcpu; | 1558 | i = last_boosted_vcpu; |
1723 | continue; | 1559 | continue; |
1724 | } else if (pass && i > last_boosted_vcpu) | 1560 | } else if (pass && i > last_boosted_vcpu) |
@@ -1727,19 +1563,26 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
1727 | continue; | 1563 | continue; |
1728 | if (waitqueue_active(&vcpu->wq)) | 1564 | if (waitqueue_active(&vcpu->wq)) |
1729 | continue; | 1565 | continue; |
1730 | if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) | 1566 | rcu_read_lock(); |
1567 | pid = rcu_dereference(vcpu->pid); | ||
1568 | if (pid) | ||
1569 | task = get_pid_task(vcpu->pid, PIDTYPE_PID); | ||
1570 | rcu_read_unlock(); | ||
1571 | if (!task) | ||
1731 | continue; | 1572 | continue; |
1732 | if (kvm_vcpu_yield_to(vcpu)) { | 1573 | if (task->flags & PF_VCPU) { |
1574 | put_task_struct(task); | ||
1575 | continue; | ||
1576 | } | ||
1577 | if (yield_to(task, 1)) { | ||
1578 | put_task_struct(task); | ||
1733 | kvm->last_boosted_vcpu = i; | 1579 | kvm->last_boosted_vcpu = i; |
1734 | yielded = 1; | 1580 | yielded = 1; |
1735 | break; | 1581 | break; |
1736 | } | 1582 | } |
1583 | put_task_struct(task); | ||
1737 | } | 1584 | } |
1738 | } | 1585 | } |
1739 | kvm_vcpu_set_in_spin_loop(me, false); | ||
1740 | |||
1741 | /* Ensure vcpu is not eligible during next spinloop */ | ||
1742 | kvm_vcpu_set_dy_eligible(me, false); | ||
1743 | } | 1586 | } |
1744 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); | 1587 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); |
1745 | 1588 | ||
@@ -1759,7 +1602,7 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1759 | page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); | 1602 | page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); |
1760 | #endif | 1603 | #endif |
1761 | else | 1604 | else |
1762 | return kvm_arch_vcpu_fault(vcpu, vmf); | 1605 | return VM_FAULT_SIGBUS; |
1763 | get_page(page); | 1606 | get_page(page); |
1764 | vmf->page = page; | 1607 | vmf->page = page; |
1765 | return 0; | 1608 | return 0; |
@@ -1820,10 +1663,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) | |||
1820 | goto vcpu_destroy; | 1663 | goto vcpu_destroy; |
1821 | 1664 | ||
1822 | mutex_lock(&kvm->lock); | 1665 | mutex_lock(&kvm->lock); |
1823 | if (!kvm_vcpu_compatible(vcpu)) { | ||
1824 | r = -EINVAL; | ||
1825 | goto unlock_vcpu_destroy; | ||
1826 | } | ||
1827 | if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { | 1666 | if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { |
1828 | r = -EINVAL; | 1667 | r = -EINVAL; |
1829 | goto unlock_vcpu_destroy; | 1668 | goto unlock_vcpu_destroy; |
@@ -1849,8 +1688,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) | |||
1849 | smp_wmb(); | 1688 | smp_wmb(); |
1850 | atomic_inc(&kvm->online_vcpus); | 1689 | atomic_inc(&kvm->online_vcpus); |
1851 | 1690 | ||
1691 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | ||
1692 | if (kvm->bsp_vcpu_id == id) | ||
1693 | kvm->bsp_vcpu = vcpu; | ||
1694 | #endif | ||
1852 | mutex_unlock(&kvm->lock); | 1695 | mutex_unlock(&kvm->lock); |
1853 | kvm_arch_vcpu_postcreate(vcpu); | ||
1854 | return r; | 1696 | return r; |
1855 | 1697 | ||
1856 | unlock_vcpu_destroy: | 1698 | unlock_vcpu_destroy: |
@@ -1893,9 +1735,7 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
1893 | #endif | 1735 | #endif |
1894 | 1736 | ||
1895 | 1737 | ||
1896 | r = vcpu_load(vcpu); | 1738 | vcpu_load(vcpu); |
1897 | if (r) | ||
1898 | return r; | ||
1899 | switch (ioctl) { | 1739 | switch (ioctl) { |
1900 | case KVM_RUN: | 1740 | case KVM_RUN: |
1901 | r = -EINVAL; | 1741 | r = -EINVAL; |
@@ -1926,12 +1766,17 @@ out_free1: | |||
1926 | struct kvm_regs *kvm_regs; | 1766 | struct kvm_regs *kvm_regs; |
1927 | 1767 | ||
1928 | r = -ENOMEM; | 1768 | r = -ENOMEM; |
1929 | kvm_regs = memdup_user(argp, sizeof(*kvm_regs)); | 1769 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); |
1930 | if (IS_ERR(kvm_regs)) { | 1770 | if (!kvm_regs) |
1931 | r = PTR_ERR(kvm_regs); | ||
1932 | goto out; | 1771 | goto out; |
1933 | } | 1772 | r = -EFAULT; |
1773 | if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) | ||
1774 | goto out_free2; | ||
1934 | r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); | 1775 | r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); |
1776 | if (r) | ||
1777 | goto out_free2; | ||
1778 | r = 0; | ||
1779 | out_free2: | ||
1935 | kfree(kvm_regs); | 1780 | kfree(kvm_regs); |
1936 | break; | 1781 | break; |
1937 | } | 1782 | } |
@@ -1950,13 +1795,17 @@ out_free1: | |||
1950 | break; | 1795 | break; |
1951 | } | 1796 | } |
1952 | case KVM_SET_SREGS: { | 1797 | case KVM_SET_SREGS: { |
1953 | kvm_sregs = memdup_user(argp, sizeof(*kvm_sregs)); | 1798 | kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); |
1954 | if (IS_ERR(kvm_sregs)) { | 1799 | r = -ENOMEM; |
1955 | r = PTR_ERR(kvm_sregs); | 1800 | if (!kvm_sregs) |
1956 | kvm_sregs = NULL; | 1801 | goto out; |
1802 | r = -EFAULT; | ||
1803 | if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) | ||
1957 | goto out; | 1804 | goto out; |
1958 | } | ||
1959 | r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); | 1805 | r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); |
1806 | if (r) | ||
1807 | goto out; | ||
1808 | r = 0; | ||
1960 | break; | 1809 | break; |
1961 | } | 1810 | } |
1962 | case KVM_GET_MP_STATE: { | 1811 | case KVM_GET_MP_STATE: { |
@@ -1978,6 +1827,9 @@ out_free1: | |||
1978 | if (copy_from_user(&mp_state, argp, sizeof mp_state)) | 1827 | if (copy_from_user(&mp_state, argp, sizeof mp_state)) |
1979 | goto out; | 1828 | goto out; |
1980 | r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); | 1829 | r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); |
1830 | if (r) | ||
1831 | goto out; | ||
1832 | r = 0; | ||
1981 | break; | 1833 | break; |
1982 | } | 1834 | } |
1983 | case KVM_TRANSLATE: { | 1835 | case KVM_TRANSLATE: { |
@@ -2002,6 +1854,9 @@ out_free1: | |||
2002 | if (copy_from_user(&dbg, argp, sizeof dbg)) | 1854 | if (copy_from_user(&dbg, argp, sizeof dbg)) |
2003 | goto out; | 1855 | goto out; |
2004 | r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); | 1856 | r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); |
1857 | if (r) | ||
1858 | goto out; | ||
1859 | r = 0; | ||
2005 | break; | 1860 | break; |
2006 | } | 1861 | } |
2007 | case KVM_SET_SIGNAL_MASK: { | 1862 | case KVM_SET_SIGNAL_MASK: { |
@@ -2042,13 +1897,17 @@ out_free1: | |||
2042 | break; | 1897 | break; |
2043 | } | 1898 | } |
2044 | case KVM_SET_FPU: { | 1899 | case KVM_SET_FPU: { |
2045 | fpu = memdup_user(argp, sizeof(*fpu)); | 1900 | fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); |
2046 | if (IS_ERR(fpu)) { | 1901 | r = -ENOMEM; |
2047 | r = PTR_ERR(fpu); | 1902 | if (!fpu) |
2048 | fpu = NULL; | 1903 | goto out; |
1904 | r = -EFAULT; | ||
1905 | if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) | ||
2049 | goto out; | 1906 | goto out; |
2050 | } | ||
2051 | r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); | 1907 | r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); |
1908 | if (r) | ||
1909 | goto out; | ||
1910 | r = 0; | ||
2052 | break; | 1911 | break; |
2053 | } | 1912 | } |
2054 | default: | 1913 | default: |
@@ -2091,10 +1950,9 @@ static long kvm_vcpu_compat_ioctl(struct file *filp, | |||
2091 | if (copy_from_user(&csigset, sigmask_arg->sigset, | 1950 | if (copy_from_user(&csigset, sigmask_arg->sigset, |
2092 | sizeof csigset)) | 1951 | sizeof csigset)) |
2093 | goto out; | 1952 | goto out; |
2094 | sigset_from_compat(&sigset, &csigset); | 1953 | } |
2095 | r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); | 1954 | sigset_from_compat(&sigset, &csigset); |
2096 | } else | 1955 | r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); |
2097 | r = kvm_vcpu_ioctl_set_sigmask(vcpu, NULL); | ||
2098 | break; | 1956 | break; |
2099 | } | 1957 | } |
2100 | default: | 1958 | default: |
@@ -2118,6 +1976,8 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2118 | switch (ioctl) { | 1976 | switch (ioctl) { |
2119 | case KVM_CREATE_VCPU: | 1977 | case KVM_CREATE_VCPU: |
2120 | r = kvm_vm_ioctl_create_vcpu(kvm, arg); | 1978 | r = kvm_vm_ioctl_create_vcpu(kvm, arg); |
1979 | if (r < 0) | ||
1980 | goto out; | ||
2121 | break; | 1981 | break; |
2122 | case KVM_SET_USER_MEMORY_REGION: { | 1982 | case KVM_SET_USER_MEMORY_REGION: { |
2123 | struct kvm_userspace_memory_region kvm_userspace_mem; | 1983 | struct kvm_userspace_memory_region kvm_userspace_mem; |
@@ -2128,6 +1988,8 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2128 | goto out; | 1988 | goto out; |
2129 | 1989 | ||
2130 | r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1); | 1990 | r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1); |
1991 | if (r) | ||
1992 | goto out; | ||
2131 | break; | 1993 | break; |
2132 | } | 1994 | } |
2133 | case KVM_GET_DIRTY_LOG: { | 1995 | case KVM_GET_DIRTY_LOG: { |
@@ -2137,6 +1999,8 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2137 | if (copy_from_user(&log, argp, sizeof log)) | 1999 | if (copy_from_user(&log, argp, sizeof log)) |
2138 | goto out; | 2000 | goto out; |
2139 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); | 2001 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); |
2002 | if (r) | ||
2003 | goto out; | ||
2140 | break; | 2004 | break; |
2141 | } | 2005 | } |
2142 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 2006 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
@@ -2146,6 +2010,9 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2146 | if (copy_from_user(&zone, argp, sizeof zone)) | 2010 | if (copy_from_user(&zone, argp, sizeof zone)) |
2147 | goto out; | 2011 | goto out; |
2148 | r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); | 2012 | r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); |
2013 | if (r) | ||
2014 | goto out; | ||
2015 | r = 0; | ||
2149 | break; | 2016 | break; |
2150 | } | 2017 | } |
2151 | case KVM_UNREGISTER_COALESCED_MMIO: { | 2018 | case KVM_UNREGISTER_COALESCED_MMIO: { |
@@ -2154,6 +2021,9 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2154 | if (copy_from_user(&zone, argp, sizeof zone)) | 2021 | if (copy_from_user(&zone, argp, sizeof zone)) |
2155 | goto out; | 2022 | goto out; |
2156 | r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); | 2023 | r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); |
2024 | if (r) | ||
2025 | goto out; | ||
2026 | r = 0; | ||
2157 | break; | 2027 | break; |
2158 | } | 2028 | } |
2159 | #endif | 2029 | #endif |
@@ -2163,7 +2033,7 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2163 | r = -EFAULT; | 2033 | r = -EFAULT; |
2164 | if (copy_from_user(&data, argp, sizeof data)) | 2034 | if (copy_from_user(&data, argp, sizeof data)) |
2165 | goto out; | 2035 | goto out; |
2166 | r = kvm_irqfd(kvm, &data); | 2036 | r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags); |
2167 | break; | 2037 | break; |
2168 | } | 2038 | } |
2169 | case KVM_IOEVENTFD: { | 2039 | case KVM_IOEVENTFD: { |
@@ -2186,40 +2056,6 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2186 | mutex_unlock(&kvm->lock); | 2056 | mutex_unlock(&kvm->lock); |
2187 | break; | 2057 | break; |
2188 | #endif | 2058 | #endif |
2189 | #ifdef CONFIG_HAVE_KVM_MSI | ||
2190 | case KVM_SIGNAL_MSI: { | ||
2191 | struct kvm_msi msi; | ||
2192 | |||
2193 | r = -EFAULT; | ||
2194 | if (copy_from_user(&msi, argp, sizeof msi)) | ||
2195 | goto out; | ||
2196 | r = kvm_send_userspace_msi(kvm, &msi); | ||
2197 | break; | ||
2198 | } | ||
2199 | #endif | ||
2200 | #ifdef __KVM_HAVE_IRQ_LINE | ||
2201 | case KVM_IRQ_LINE_STATUS: | ||
2202 | case KVM_IRQ_LINE: { | ||
2203 | struct kvm_irq_level irq_event; | ||
2204 | |||
2205 | r = -EFAULT; | ||
2206 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | ||
2207 | goto out; | ||
2208 | |||
2209 | r = kvm_vm_ioctl_irq_line(kvm, &irq_event); | ||
2210 | if (r) | ||
2211 | goto out; | ||
2212 | |||
2213 | r = -EFAULT; | ||
2214 | if (ioctl == KVM_IRQ_LINE_STATUS) { | ||
2215 | if (copy_to_user(argp, &irq_event, sizeof irq_event)) | ||
2216 | goto out; | ||
2217 | } | ||
2218 | |||
2219 | r = 0; | ||
2220 | break; | ||
2221 | } | ||
2222 | #endif | ||
2223 | default: | 2059 | default: |
2224 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); | 2060 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); |
2225 | if (r == -ENOTTY) | 2061 | if (r == -ENOTTY) |
@@ -2262,6 +2098,8 @@ static long kvm_vm_compat_ioctl(struct file *filp, | |||
2262 | log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); | 2098 | log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); |
2263 | 2099 | ||
2264 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); | 2100 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); |
2101 | if (r) | ||
2102 | goto out; | ||
2265 | break; | 2103 | break; |
2266 | } | 2104 | } |
2267 | default: | 2105 | default: |
@@ -2314,12 +2152,12 @@ static struct file_operations kvm_vm_fops = { | |||
2314 | .llseek = noop_llseek, | 2152 | .llseek = noop_llseek, |
2315 | }; | 2153 | }; |
2316 | 2154 | ||
2317 | static int kvm_dev_ioctl_create_vm(unsigned long type) | 2155 | static int kvm_dev_ioctl_create_vm(void) |
2318 | { | 2156 | { |
2319 | int r; | 2157 | int r; |
2320 | struct kvm *kvm; | 2158 | struct kvm *kvm; |
2321 | 2159 | ||
2322 | kvm = kvm_create_vm(type); | 2160 | kvm = kvm_create_vm(); |
2323 | if (IS_ERR(kvm)) | 2161 | if (IS_ERR(kvm)) |
2324 | return PTR_ERR(kvm); | 2162 | return PTR_ERR(kvm); |
2325 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 2163 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
@@ -2346,11 +2184,8 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) | |||
2346 | case KVM_CAP_SET_BOOT_CPU_ID: | 2184 | case KVM_CAP_SET_BOOT_CPU_ID: |
2347 | #endif | 2185 | #endif |
2348 | case KVM_CAP_INTERNAL_ERROR_DATA: | 2186 | case KVM_CAP_INTERNAL_ERROR_DATA: |
2349 | #ifdef CONFIG_HAVE_KVM_MSI | ||
2350 | case KVM_CAP_SIGNAL_MSI: | ||
2351 | #endif | ||
2352 | return 1; | 2187 | return 1; |
2353 | #ifdef KVM_CAP_IRQ_ROUTING | 2188 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
2354 | case KVM_CAP_IRQ_ROUTING: | 2189 | case KVM_CAP_IRQ_ROUTING: |
2355 | return KVM_MAX_IRQ_ROUTES; | 2190 | return KVM_MAX_IRQ_ROUTES; |
2356 | #endif | 2191 | #endif |
@@ -2373,7 +2208,10 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2373 | r = KVM_API_VERSION; | 2208 | r = KVM_API_VERSION; |
2374 | break; | 2209 | break; |
2375 | case KVM_CREATE_VM: | 2210 | case KVM_CREATE_VM: |
2376 | r = kvm_dev_ioctl_create_vm(arg); | 2211 | r = -EINVAL; |
2212 | if (arg) | ||
2213 | goto out; | ||
2214 | r = kvm_dev_ioctl_create_vm(); | ||
2377 | break; | 2215 | break; |
2378 | case KVM_CHECK_EXTENSION: | 2216 | case KVM_CHECK_EXTENSION: |
2379 | r = kvm_dev_ioctl_check_extension_generic(arg); | 2217 | r = kvm_dev_ioctl_check_extension_generic(arg); |
@@ -2553,89 +2391,24 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) | |||
2553 | int i; | 2391 | int i; |
2554 | 2392 | ||
2555 | for (i = 0; i < bus->dev_count; i++) { | 2393 | for (i = 0; i < bus->dev_count; i++) { |
2556 | struct kvm_io_device *pos = bus->range[i].dev; | 2394 | struct kvm_io_device *pos = bus->devs[i]; |
2557 | 2395 | ||
2558 | kvm_iodevice_destructor(pos); | 2396 | kvm_iodevice_destructor(pos); |
2559 | } | 2397 | } |
2560 | kfree(bus); | 2398 | kfree(bus); |
2561 | } | 2399 | } |
2562 | 2400 | ||
2563 | int kvm_io_bus_sort_cmp(const void *p1, const void *p2) | ||
2564 | { | ||
2565 | const struct kvm_io_range *r1 = p1; | ||
2566 | const struct kvm_io_range *r2 = p2; | ||
2567 | |||
2568 | if (r1->addr < r2->addr) | ||
2569 | return -1; | ||
2570 | if (r1->addr + r1->len > r2->addr + r2->len) | ||
2571 | return 1; | ||
2572 | return 0; | ||
2573 | } | ||
2574 | |||
2575 | int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, | ||
2576 | gpa_t addr, int len) | ||
2577 | { | ||
2578 | bus->range[bus->dev_count++] = (struct kvm_io_range) { | ||
2579 | .addr = addr, | ||
2580 | .len = len, | ||
2581 | .dev = dev, | ||
2582 | }; | ||
2583 | |||
2584 | sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range), | ||
2585 | kvm_io_bus_sort_cmp, NULL); | ||
2586 | |||
2587 | return 0; | ||
2588 | } | ||
2589 | |||
2590 | int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, | ||
2591 | gpa_t addr, int len) | ||
2592 | { | ||
2593 | struct kvm_io_range *range, key; | ||
2594 | int off; | ||
2595 | |||
2596 | key = (struct kvm_io_range) { | ||
2597 | .addr = addr, | ||
2598 | .len = len, | ||
2599 | }; | ||
2600 | |||
2601 | range = bsearch(&key, bus->range, bus->dev_count, | ||
2602 | sizeof(struct kvm_io_range), kvm_io_bus_sort_cmp); | ||
2603 | if (range == NULL) | ||
2604 | return -ENOENT; | ||
2605 | |||
2606 | off = range - bus->range; | ||
2607 | |||
2608 | while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0) | ||
2609 | off--; | ||
2610 | |||
2611 | return off; | ||
2612 | } | ||
2613 | |||
2614 | /* kvm_io_bus_write - called under kvm->slots_lock */ | 2401 | /* kvm_io_bus_write - called under kvm->slots_lock */ |
2615 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 2402 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
2616 | int len, const void *val) | 2403 | int len, const void *val) |
2617 | { | 2404 | { |
2618 | int idx; | 2405 | int i; |
2619 | struct kvm_io_bus *bus; | 2406 | struct kvm_io_bus *bus; |
2620 | struct kvm_io_range range; | ||
2621 | |||
2622 | range = (struct kvm_io_range) { | ||
2623 | .addr = addr, | ||
2624 | .len = len, | ||
2625 | }; | ||
2626 | 2407 | ||
2627 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | 2408 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); |
2628 | idx = kvm_io_bus_get_first_dev(bus, addr, len); | 2409 | for (i = 0; i < bus->dev_count; i++) |
2629 | if (idx < 0) | 2410 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) |
2630 | return -EOPNOTSUPP; | ||
2631 | |||
2632 | while (idx < bus->dev_count && | ||
2633 | kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { | ||
2634 | if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val)) | ||
2635 | return 0; | 2411 | return 0; |
2636 | idx++; | ||
2637 | } | ||
2638 | |||
2639 | return -EOPNOTSUPP; | 2412 | return -EOPNOTSUPP; |
2640 | } | 2413 | } |
2641 | 2414 | ||
@@ -2643,47 +2416,31 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
2643 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 2416 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
2644 | int len, void *val) | 2417 | int len, void *val) |
2645 | { | 2418 | { |
2646 | int idx; | 2419 | int i; |
2647 | struct kvm_io_bus *bus; | 2420 | struct kvm_io_bus *bus; |
2648 | struct kvm_io_range range; | ||
2649 | |||
2650 | range = (struct kvm_io_range) { | ||
2651 | .addr = addr, | ||
2652 | .len = len, | ||
2653 | }; | ||
2654 | 2421 | ||
2655 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | 2422 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); |
2656 | idx = kvm_io_bus_get_first_dev(bus, addr, len); | 2423 | for (i = 0; i < bus->dev_count; i++) |
2657 | if (idx < 0) | 2424 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) |
2658 | return -EOPNOTSUPP; | ||
2659 | |||
2660 | while (idx < bus->dev_count && | ||
2661 | kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { | ||
2662 | if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val)) | ||
2663 | return 0; | 2425 | return 0; |
2664 | idx++; | ||
2665 | } | ||
2666 | |||
2667 | return -EOPNOTSUPP; | 2426 | return -EOPNOTSUPP; |
2668 | } | 2427 | } |
2669 | 2428 | ||
2670 | /* Caller must hold slots_lock. */ | 2429 | /* Caller must hold slots_lock. */ |
2671 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 2430 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
2672 | int len, struct kvm_io_device *dev) | 2431 | struct kvm_io_device *dev) |
2673 | { | 2432 | { |
2674 | struct kvm_io_bus *new_bus, *bus; | 2433 | struct kvm_io_bus *new_bus, *bus; |
2675 | 2434 | ||
2676 | bus = kvm->buses[bus_idx]; | 2435 | bus = kvm->buses[bus_idx]; |
2677 | if (bus->dev_count > NR_IOBUS_DEVS - 1) | 2436 | if (bus->dev_count > NR_IOBUS_DEVS-1) |
2678 | return -ENOSPC; | 2437 | return -ENOSPC; |
2679 | 2438 | ||
2680 | new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) * | 2439 | new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); |
2681 | sizeof(struct kvm_io_range)), GFP_KERNEL); | ||
2682 | if (!new_bus) | 2440 | if (!new_bus) |
2683 | return -ENOMEM; | 2441 | return -ENOMEM; |
2684 | memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count * | 2442 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); |
2685 | sizeof(struct kvm_io_range))); | 2443 | new_bus->devs[new_bus->dev_count++] = dev; |
2686 | kvm_io_bus_insert_dev(new_bus, dev, addr, len); | ||
2687 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | 2444 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); |
2688 | synchronize_srcu_expedited(&kvm->srcu); | 2445 | synchronize_srcu_expedited(&kvm->srcu); |
2689 | kfree(bus); | 2446 | kfree(bus); |
@@ -2698,26 +2455,25 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | |||
2698 | int i, r; | 2455 | int i, r; |
2699 | struct kvm_io_bus *new_bus, *bus; | 2456 | struct kvm_io_bus *new_bus, *bus; |
2700 | 2457 | ||
2458 | new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); | ||
2459 | if (!new_bus) | ||
2460 | return -ENOMEM; | ||
2461 | |||
2701 | bus = kvm->buses[bus_idx]; | 2462 | bus = kvm->buses[bus_idx]; |
2463 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); | ||
2464 | |||
2702 | r = -ENOENT; | 2465 | r = -ENOENT; |
2703 | for (i = 0; i < bus->dev_count; i++) | 2466 | for (i = 0; i < new_bus->dev_count; i++) |
2704 | if (bus->range[i].dev == dev) { | 2467 | if (new_bus->devs[i] == dev) { |
2705 | r = 0; | 2468 | r = 0; |
2469 | new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; | ||
2706 | break; | 2470 | break; |
2707 | } | 2471 | } |
2708 | 2472 | ||
2709 | if (r) | 2473 | if (r) { |
2474 | kfree(new_bus); | ||
2710 | return r; | 2475 | return r; |
2711 | 2476 | } | |
2712 | new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) * | ||
2713 | sizeof(struct kvm_io_range)), GFP_KERNEL); | ||
2714 | if (!new_bus) | ||
2715 | return -ENOMEM; | ||
2716 | |||
2717 | memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); | ||
2718 | new_bus->dev_count--; | ||
2719 | memcpy(new_bus->range + i, bus->range + i + 1, | ||
2720 | (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); | ||
2721 | 2477 | ||
2722 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | 2478 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); |
2723 | synchronize_srcu_expedited(&kvm->srcu); | 2479 | synchronize_srcu_expedited(&kvm->srcu); |
@@ -2768,29 +2524,15 @@ static const struct file_operations *stat_fops[] = { | |||
2768 | [KVM_STAT_VM] = &vm_stat_fops, | 2524 | [KVM_STAT_VM] = &vm_stat_fops, |
2769 | }; | 2525 | }; |
2770 | 2526 | ||
2771 | static int kvm_init_debug(void) | 2527 | static void kvm_init_debug(void) |
2772 | { | 2528 | { |
2773 | int r = -EFAULT; | ||
2774 | struct kvm_stats_debugfs_item *p; | 2529 | struct kvm_stats_debugfs_item *p; |
2775 | 2530 | ||
2776 | kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); | 2531 | kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); |
2777 | if (kvm_debugfs_dir == NULL) | 2532 | for (p = debugfs_entries; p->name; ++p) |
2778 | goto out; | ||
2779 | |||
2780 | for (p = debugfs_entries; p->name; ++p) { | ||
2781 | p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, | 2533 | p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, |
2782 | (void *)(long)p->offset, | 2534 | (void *)(long)p->offset, |
2783 | stat_fops[p->kind]); | 2535 | stat_fops[p->kind]); |
2784 | if (p->dentry == NULL) | ||
2785 | goto out_dir; | ||
2786 | } | ||
2787 | |||
2788 | return 0; | ||
2789 | |||
2790 | out_dir: | ||
2791 | debugfs_remove_recursive(kvm_debugfs_dir); | ||
2792 | out: | ||
2793 | return r; | ||
2794 | } | 2536 | } |
2795 | 2537 | ||
2796 | static void kvm_exit_debug(void) | 2538 | static void kvm_exit_debug(void) |
@@ -2822,6 +2564,9 @@ static struct syscore_ops kvm_syscore_ops = { | |||
2822 | .resume = kvm_resume, | 2564 | .resume = kvm_resume, |
2823 | }; | 2565 | }; |
2824 | 2566 | ||
2567 | struct page *bad_page; | ||
2568 | pfn_t bad_pfn; | ||
2569 | |||
2825 | static inline | 2570 | static inline |
2826 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) | 2571 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) |
2827 | { | 2572 | { |
@@ -2853,6 +2598,33 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | |||
2853 | if (r) | 2598 | if (r) |
2854 | goto out_fail; | 2599 | goto out_fail; |
2855 | 2600 | ||
2601 | bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2602 | |||
2603 | if (bad_page == NULL) { | ||
2604 | r = -ENOMEM; | ||
2605 | goto out; | ||
2606 | } | ||
2607 | |||
2608 | bad_pfn = page_to_pfn(bad_page); | ||
2609 | |||
2610 | hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2611 | |||
2612 | if (hwpoison_page == NULL) { | ||
2613 | r = -ENOMEM; | ||
2614 | goto out_free_0; | ||
2615 | } | ||
2616 | |||
2617 | hwpoison_pfn = page_to_pfn(hwpoison_page); | ||
2618 | |||
2619 | fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2620 | |||
2621 | if (fault_page == NULL) { | ||
2622 | r = -ENOMEM; | ||
2623 | goto out_free_0; | ||
2624 | } | ||
2625 | |||
2626 | fault_pfn = page_to_pfn(fault_page); | ||
2627 | |||
2856 | if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { | 2628 | if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { |
2857 | r = -ENOMEM; | 2629 | r = -ENOMEM; |
2858 | goto out_free_0; | 2630 | goto out_free_0; |
@@ -2904,16 +2676,10 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | |||
2904 | kvm_preempt_ops.sched_in = kvm_sched_in; | 2676 | kvm_preempt_ops.sched_in = kvm_sched_in; |
2905 | kvm_preempt_ops.sched_out = kvm_sched_out; | 2677 | kvm_preempt_ops.sched_out = kvm_sched_out; |
2906 | 2678 | ||
2907 | r = kvm_init_debug(); | 2679 | kvm_init_debug(); |
2908 | if (r) { | ||
2909 | printk(KERN_ERR "kvm: create debugfs files failed\n"); | ||
2910 | goto out_undebugfs; | ||
2911 | } | ||
2912 | 2680 | ||
2913 | return 0; | 2681 | return 0; |
2914 | 2682 | ||
2915 | out_undebugfs: | ||
2916 | unregister_syscore_ops(&kvm_syscore_ops); | ||
2917 | out_unreg: | 2683 | out_unreg: |
2918 | kvm_async_pf_deinit(); | 2684 | kvm_async_pf_deinit(); |
2919 | out_free: | 2685 | out_free: |
@@ -2927,6 +2693,12 @@ out_free_1: | |||
2927 | out_free_0a: | 2693 | out_free_0a: |
2928 | free_cpumask_var(cpus_hardware_enabled); | 2694 | free_cpumask_var(cpus_hardware_enabled); |
2929 | out_free_0: | 2695 | out_free_0: |
2696 | if (fault_page) | ||
2697 | __free_page(fault_page); | ||
2698 | if (hwpoison_page) | ||
2699 | __free_page(hwpoison_page); | ||
2700 | __free_page(bad_page); | ||
2701 | out: | ||
2930 | kvm_arch_exit(); | 2702 | kvm_arch_exit(); |
2931 | out_fail: | 2703 | out_fail: |
2932 | return r; | 2704 | return r; |
@@ -2946,5 +2718,7 @@ void kvm_exit(void) | |||
2946 | kvm_arch_hardware_unsetup(); | 2718 | kvm_arch_hardware_unsetup(); |
2947 | kvm_arch_exit(); | 2719 | kvm_arch_exit(); |
2948 | free_cpumask_var(cpus_hardware_enabled); | 2720 | free_cpumask_var(cpus_hardware_enabled); |
2721 | __free_page(hwpoison_page); | ||
2722 | __free_page(bad_page); | ||
2949 | } | 2723 | } |
2950 | EXPORT_SYMBOL_GPL(kvm_exit); | 2724 | EXPORT_SYMBOL_GPL(kvm_exit); |