diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/Kconfig | 3 | ||||
-rw-r--r-- | virt/kvm/assigned-dev.c | 135 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 216 | ||||
-rw-r--r-- | virt/kvm/async_pf.h | 36 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 96 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 2 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 9 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 630 |
8 files changed, 857 insertions, 270 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 7f1178f6b839..f63ccb0a5982 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig | |||
@@ -15,3 +15,6 @@ config KVM_APIC_ARCHITECTURE | |||
15 | 15 | ||
16 | config KVM_MMIO | 16 | config KVM_MMIO |
17 | bool | 17 | bool |
18 | |||
19 | config KVM_ASYNC_PF | ||
20 | bool | ||
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 7c98928b09d9..6cc4b97ec458 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c | |||
@@ -55,58 +55,31 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | |||
55 | return index; | 55 | return index; |
56 | } | 56 | } |
57 | 57 | ||
58 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | 58 | static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) |
59 | { | 59 | { |
60 | struct kvm_assigned_dev_kernel *assigned_dev; | 60 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; |
61 | int i; | 61 | u32 vector; |
62 | int index; | ||
62 | 63 | ||
63 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, | 64 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) { |
64 | interrupt_work); | 65 | spin_lock(&assigned_dev->intx_lock); |
66 | disable_irq_nosync(irq); | ||
67 | assigned_dev->host_irq_disabled = true; | ||
68 | spin_unlock(&assigned_dev->intx_lock); | ||
69 | } | ||
65 | 70 | ||
66 | spin_lock_irq(&assigned_dev->assigned_dev_lock); | ||
67 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | 71 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { |
68 | struct kvm_guest_msix_entry *guest_entries = | 72 | index = find_index_from_host_irq(assigned_dev, irq); |
69 | assigned_dev->guest_msix_entries; | 73 | if (index >= 0) { |
70 | for (i = 0; i < assigned_dev->entries_nr; i++) { | 74 | vector = assigned_dev-> |
71 | if (!(guest_entries[i].flags & | 75 | guest_msix_entries[index].vector; |
72 | KVM_ASSIGNED_MSIX_PENDING)) | ||
73 | continue; | ||
74 | guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; | ||
75 | kvm_set_irq(assigned_dev->kvm, | 76 | kvm_set_irq(assigned_dev->kvm, |
76 | assigned_dev->irq_source_id, | 77 | assigned_dev->irq_source_id, vector, 1); |
77 | guest_entries[i].vector, 1); | ||
78 | } | 78 | } |
79 | } else | 79 | } else |
80 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | 80 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
81 | assigned_dev->guest_irq, 1); | 81 | assigned_dev->guest_irq, 1); |
82 | 82 | ||
83 | spin_unlock_irq(&assigned_dev->assigned_dev_lock); | ||
84 | } | ||
85 | |||
86 | static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | ||
87 | { | ||
88 | unsigned long flags; | ||
89 | struct kvm_assigned_dev_kernel *assigned_dev = | ||
90 | (struct kvm_assigned_dev_kernel *) dev_id; | ||
91 | |||
92 | spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); | ||
93 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
94 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
95 | if (index < 0) | ||
96 | goto out; | ||
97 | assigned_dev->guest_msix_entries[index].flags |= | ||
98 | KVM_ASSIGNED_MSIX_PENDING; | ||
99 | } | ||
100 | |||
101 | schedule_work(&assigned_dev->interrupt_work); | ||
102 | |||
103 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { | ||
104 | disable_irq_nosync(irq); | ||
105 | assigned_dev->host_irq_disabled = true; | ||
106 | } | ||
107 | |||
108 | out: | ||
109 | spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); | ||
110 | return IRQ_HANDLED; | 83 | return IRQ_HANDLED; |
111 | } | 84 | } |
112 | 85 | ||
@@ -114,7 +87,6 @@ out: | |||
114 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | 87 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) |
115 | { | 88 | { |
116 | struct kvm_assigned_dev_kernel *dev; | 89 | struct kvm_assigned_dev_kernel *dev; |
117 | unsigned long flags; | ||
118 | 90 | ||
119 | if (kian->gsi == -1) | 91 | if (kian->gsi == -1) |
120 | return; | 92 | return; |
@@ -127,12 +99,12 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
127 | /* The guest irq may be shared so this ack may be | 99 | /* The guest irq may be shared so this ack may be |
128 | * from another device. | 100 | * from another device. |
129 | */ | 101 | */ |
130 | spin_lock_irqsave(&dev->assigned_dev_lock, flags); | 102 | spin_lock(&dev->intx_lock); |
131 | if (dev->host_irq_disabled) { | 103 | if (dev->host_irq_disabled) { |
132 | enable_irq(dev->host_irq); | 104 | enable_irq(dev->host_irq); |
133 | dev->host_irq_disabled = false; | 105 | dev->host_irq_disabled = false; |
134 | } | 106 | } |
135 | spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); | 107 | spin_unlock(&dev->intx_lock); |
136 | } | 108 | } |
137 | 109 | ||
138 | static void deassign_guest_irq(struct kvm *kvm, | 110 | static void deassign_guest_irq(struct kvm *kvm, |
@@ -141,6 +113,9 @@ static void deassign_guest_irq(struct kvm *kvm, | |||
141 | kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); | 113 | kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); |
142 | assigned_dev->ack_notifier.gsi = -1; | 114 | assigned_dev->ack_notifier.gsi = -1; |
143 | 115 | ||
116 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | ||
117 | assigned_dev->guest_irq, 0); | ||
118 | |||
144 | if (assigned_dev->irq_source_id != -1) | 119 | if (assigned_dev->irq_source_id != -1) |
145 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | 120 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); |
146 | assigned_dev->irq_source_id = -1; | 121 | assigned_dev->irq_source_id = -1; |
@@ -152,28 +127,19 @@ static void deassign_host_irq(struct kvm *kvm, | |||
152 | struct kvm_assigned_dev_kernel *assigned_dev) | 127 | struct kvm_assigned_dev_kernel *assigned_dev) |
153 | { | 128 | { |
154 | /* | 129 | /* |
155 | * In kvm_free_device_irq, cancel_work_sync return true if: | 130 | * We disable irq here to prevent further events. |
156 | * 1. work is scheduled, and then cancelled. | ||
157 | * 2. work callback is executed. | ||
158 | * | ||
159 | * The first one ensured that the irq is disabled and no more events | ||
160 | * would happen. But for the second one, the irq may be enabled (e.g. | ||
161 | * for MSI). So we disable irq here to prevent further events. | ||
162 | * | 131 | * |
163 | * Notice this maybe result in nested disable if the interrupt type is | 132 | * Notice this maybe result in nested disable if the interrupt type is |
164 | * INTx, but it's OK for we are going to free it. | 133 | * INTx, but it's OK for we are going to free it. |
165 | * | 134 | * |
166 | * If this function is a part of VM destroy, please ensure that till | 135 | * If this function is a part of VM destroy, please ensure that till |
167 | * now, the kvm state is still legal for probably we also have to wait | 136 | * now, the kvm state is still legal for probably we also have to wait |
168 | * interrupt_work done. | 137 | * on a currently running IRQ handler. |
169 | */ | 138 | */ |
170 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | 139 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { |
171 | int i; | 140 | int i; |
172 | for (i = 0; i < assigned_dev->entries_nr; i++) | 141 | for (i = 0; i < assigned_dev->entries_nr; i++) |
173 | disable_irq_nosync(assigned_dev-> | 142 | disable_irq(assigned_dev->host_msix_entries[i].vector); |
174 | host_msix_entries[i].vector); | ||
175 | |||
176 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
177 | 143 | ||
178 | for (i = 0; i < assigned_dev->entries_nr; i++) | 144 | for (i = 0; i < assigned_dev->entries_nr; i++) |
179 | free_irq(assigned_dev->host_msix_entries[i].vector, | 145 | free_irq(assigned_dev->host_msix_entries[i].vector, |
@@ -185,8 +151,7 @@ static void deassign_host_irq(struct kvm *kvm, | |||
185 | pci_disable_msix(assigned_dev->dev); | 151 | pci_disable_msix(assigned_dev->dev); |
186 | } else { | 152 | } else { |
187 | /* Deal with MSI and INTx */ | 153 | /* Deal with MSI and INTx */ |
188 | disable_irq_nosync(assigned_dev->host_irq); | 154 | disable_irq(assigned_dev->host_irq); |
189 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
190 | 155 | ||
191 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | 156 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); |
192 | 157 | ||
@@ -233,6 +198,12 @@ static void kvm_free_assigned_device(struct kvm *kvm, | |||
233 | kvm_free_assigned_irq(kvm, assigned_dev); | 198 | kvm_free_assigned_irq(kvm, assigned_dev); |
234 | 199 | ||
235 | pci_reset_function(assigned_dev->dev); | 200 | pci_reset_function(assigned_dev->dev); |
201 | if (pci_load_and_free_saved_state(assigned_dev->dev, | ||
202 | &assigned_dev->pci_saved_state)) | ||
203 | printk(KERN_INFO "%s: Couldn't reload %s saved state\n", | ||
204 | __func__, dev_name(&assigned_dev->dev->dev)); | ||
205 | else | ||
206 | pci_restore_state(assigned_dev->dev); | ||
236 | 207 | ||
237 | pci_release_regions(assigned_dev->dev); | 208 | pci_release_regions(assigned_dev->dev); |
238 | pci_disable_device(assigned_dev->dev); | 209 | pci_disable_device(assigned_dev->dev); |
@@ -265,8 +236,8 @@ static int assigned_device_enable_host_intx(struct kvm *kvm, | |||
265 | * on the same interrupt line is not a happy situation: there | 236 | * on the same interrupt line is not a happy situation: there |
266 | * are going to be long delays in accepting, acking, etc. | 237 | * are going to be long delays in accepting, acking, etc. |
267 | */ | 238 | */ |
268 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, | 239 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, |
269 | 0, "kvm_assigned_intx_device", (void *)dev)) | 240 | IRQF_ONESHOT, dev->irq_name, (void *)dev)) |
270 | return -EIO; | 241 | return -EIO; |
271 | return 0; | 242 | return 0; |
272 | } | 243 | } |
@@ -284,8 +255,8 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, | |||
284 | } | 255 | } |
285 | 256 | ||
286 | dev->host_irq = dev->dev->irq; | 257 | dev->host_irq = dev->dev->irq; |
287 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, | 258 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, |
288 | "kvm_assigned_msi_device", (void *)dev)) { | 259 | 0, dev->irq_name, (void *)dev)) { |
289 | pci_disable_msi(dev->dev); | 260 | pci_disable_msi(dev->dev); |
290 | return -EIO; | 261 | return -EIO; |
291 | } | 262 | } |
@@ -310,10 +281,9 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, | |||
310 | return r; | 281 | return r; |
311 | 282 | ||
312 | for (i = 0; i < dev->entries_nr; i++) { | 283 | for (i = 0; i < dev->entries_nr; i++) { |
313 | r = request_irq(dev->host_msix_entries[i].vector, | 284 | r = request_threaded_irq(dev->host_msix_entries[i].vector, |
314 | kvm_assigned_dev_intr, 0, | 285 | NULL, kvm_assigned_dev_thread, |
315 | "kvm_assigned_msix_device", | 286 | 0, dev->irq_name, (void *)dev); |
316 | (void *)dev); | ||
317 | if (r) | 287 | if (r) |
318 | goto err; | 288 | goto err; |
319 | } | 289 | } |
@@ -370,6 +340,9 @@ static int assign_host_irq(struct kvm *kvm, | |||
370 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) | 340 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) |
371 | return r; | 341 | return r; |
372 | 342 | ||
343 | snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s", | ||
344 | pci_name(dev->dev)); | ||
345 | |||
373 | switch (host_irq_type) { | 346 | switch (host_irq_type) { |
374 | case KVM_DEV_IRQ_HOST_INTX: | 347 | case KVM_DEV_IRQ_HOST_INTX: |
375 | r = assigned_device_enable_host_intx(kvm, dev); | 348 | r = assigned_device_enable_host_intx(kvm, dev); |
@@ -547,19 +520,21 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
547 | } | 520 | } |
548 | 521 | ||
549 | pci_reset_function(dev); | 522 | pci_reset_function(dev); |
550 | 523 | pci_save_state(dev); | |
524 | match->pci_saved_state = pci_store_saved_state(dev); | ||
525 | if (!match->pci_saved_state) | ||
526 | printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", | ||
527 | __func__, dev_name(&dev->dev)); | ||
551 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | 528 | match->assigned_dev_id = assigned_dev->assigned_dev_id; |
552 | match->host_segnr = assigned_dev->segnr; | 529 | match->host_segnr = assigned_dev->segnr; |
553 | match->host_busnr = assigned_dev->busnr; | 530 | match->host_busnr = assigned_dev->busnr; |
554 | match->host_devfn = assigned_dev->devfn; | 531 | match->host_devfn = assigned_dev->devfn; |
555 | match->flags = assigned_dev->flags; | 532 | match->flags = assigned_dev->flags; |
556 | match->dev = dev; | 533 | match->dev = dev; |
557 | spin_lock_init(&match->assigned_dev_lock); | 534 | spin_lock_init(&match->intx_lock); |
558 | match->irq_source_id = -1; | 535 | match->irq_source_id = -1; |
559 | match->kvm = kvm; | 536 | match->kvm = kvm; |
560 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | 537 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; |
561 | INIT_WORK(&match->interrupt_work, | ||
562 | kvm_assigned_dev_interrupt_work_handler); | ||
563 | 538 | ||
564 | list_add(&match->list, &kvm->arch.assigned_dev_head); | 539 | list_add(&match->list, &kvm->arch.assigned_dev_head); |
565 | 540 | ||
@@ -579,6 +554,9 @@ out: | |||
579 | mutex_unlock(&kvm->lock); | 554 | mutex_unlock(&kvm->lock); |
580 | return r; | 555 | return r; |
581 | out_list_del: | 556 | out_list_del: |
557 | if (pci_load_and_free_saved_state(dev, &match->pci_saved_state)) | ||
558 | printk(KERN_INFO "%s: Couldn't reload %s saved state\n", | ||
559 | __func__, dev_name(&dev->dev)); | ||
582 | list_del(&match->list); | 560 | list_del(&match->list); |
583 | pci_release_regions(dev); | 561 | pci_release_regions(dev); |
584 | out_disable: | 562 | out_disable: |
@@ -651,9 +629,9 @@ static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | |||
651 | r = -ENOMEM; | 629 | r = -ENOMEM; |
652 | goto msix_nr_out; | 630 | goto msix_nr_out; |
653 | } | 631 | } |
654 | adev->guest_msix_entries = kzalloc( | 632 | adev->guest_msix_entries = |
655 | sizeof(struct kvm_guest_msix_entry) * | 633 | kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr, |
656 | entry_nr->entry_nr, GFP_KERNEL); | 634 | GFP_KERNEL); |
657 | if (!adev->guest_msix_entries) { | 635 | if (!adev->guest_msix_entries) { |
658 | kfree(adev->host_msix_entries); | 636 | kfree(adev->host_msix_entries); |
659 | r = -ENOMEM; | 637 | r = -ENOMEM; |
@@ -706,7 +684,7 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
706 | unsigned long arg) | 684 | unsigned long arg) |
707 | { | 685 | { |
708 | void __user *argp = (void __user *)arg; | 686 | void __user *argp = (void __user *)arg; |
709 | int r = -ENOTTY; | 687 | int r; |
710 | 688 | ||
711 | switch (ioctl) { | 689 | switch (ioctl) { |
712 | case KVM_ASSIGN_PCI_DEVICE: { | 690 | case KVM_ASSIGN_PCI_DEVICE: { |
@@ -724,7 +702,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
724 | r = -EOPNOTSUPP; | 702 | r = -EOPNOTSUPP; |
725 | break; | 703 | break; |
726 | } | 704 | } |
727 | #ifdef KVM_CAP_ASSIGN_DEV_IRQ | ||
728 | case KVM_ASSIGN_DEV_IRQ: { | 705 | case KVM_ASSIGN_DEV_IRQ: { |
729 | struct kvm_assigned_irq assigned_irq; | 706 | struct kvm_assigned_irq assigned_irq; |
730 | 707 | ||
@@ -747,8 +724,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
747 | goto out; | 724 | goto out; |
748 | break; | 725 | break; |
749 | } | 726 | } |
750 | #endif | ||
751 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | ||
752 | case KVM_DEASSIGN_PCI_DEVICE: { | 727 | case KVM_DEASSIGN_PCI_DEVICE: { |
753 | struct kvm_assigned_pci_dev assigned_dev; | 728 | struct kvm_assigned_pci_dev assigned_dev; |
754 | 729 | ||
@@ -760,7 +735,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
760 | goto out; | 735 | goto out; |
761 | break; | 736 | break; |
762 | } | 737 | } |
763 | #endif | ||
764 | #ifdef KVM_CAP_IRQ_ROUTING | 738 | #ifdef KVM_CAP_IRQ_ROUTING |
765 | case KVM_SET_GSI_ROUTING: { | 739 | case KVM_SET_GSI_ROUTING: { |
766 | struct kvm_irq_routing routing; | 740 | struct kvm_irq_routing routing; |
@@ -813,6 +787,9 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
813 | break; | 787 | break; |
814 | } | 788 | } |
815 | #endif | 789 | #endif |
790 | default: | ||
791 | r = -ENOTTY; | ||
792 | break; | ||
816 | } | 793 | } |
817 | out: | 794 | out: |
818 | return r; | 795 | return r; |
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c new file mode 100644 index 000000000000..74268b4c2ee1 --- /dev/null +++ b/virt/kvm/async_pf.c | |||
@@ -0,0 +1,216 @@ | |||
1 | /* | ||
2 | * kvm asynchronous fault support | ||
3 | * | ||
4 | * Copyright 2010 Red Hat, Inc. | ||
5 | * | ||
6 | * Author: | ||
7 | * Gleb Natapov <gleb@redhat.com> | ||
8 | * | ||
9 | * This file is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of version 2 of the GNU General Public License | ||
11 | * as published by the Free Software Foundation. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software Foundation, | ||
20 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | ||
21 | */ | ||
22 | |||
23 | #include <linux/kvm_host.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/mmu_context.h> | ||
27 | |||
28 | #include "async_pf.h" | ||
29 | #include <trace/events/kvm.h> | ||
30 | |||
31 | static struct kmem_cache *async_pf_cache; | ||
32 | |||
33 | int kvm_async_pf_init(void) | ||
34 | { | ||
35 | async_pf_cache = KMEM_CACHE(kvm_async_pf, 0); | ||
36 | |||
37 | if (!async_pf_cache) | ||
38 | return -ENOMEM; | ||
39 | |||
40 | return 0; | ||
41 | } | ||
42 | |||
43 | void kvm_async_pf_deinit(void) | ||
44 | { | ||
45 | if (async_pf_cache) | ||
46 | kmem_cache_destroy(async_pf_cache); | ||
47 | async_pf_cache = NULL; | ||
48 | } | ||
49 | |||
50 | void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu) | ||
51 | { | ||
52 | INIT_LIST_HEAD(&vcpu->async_pf.done); | ||
53 | INIT_LIST_HEAD(&vcpu->async_pf.queue); | ||
54 | spin_lock_init(&vcpu->async_pf.lock); | ||
55 | } | ||
56 | |||
57 | static void async_pf_execute(struct work_struct *work) | ||
58 | { | ||
59 | struct page *page = NULL; | ||
60 | struct kvm_async_pf *apf = | ||
61 | container_of(work, struct kvm_async_pf, work); | ||
62 | struct mm_struct *mm = apf->mm; | ||
63 | struct kvm_vcpu *vcpu = apf->vcpu; | ||
64 | unsigned long addr = apf->addr; | ||
65 | gva_t gva = apf->gva; | ||
66 | |||
67 | might_sleep(); | ||
68 | |||
69 | use_mm(mm); | ||
70 | down_read(&mm->mmap_sem); | ||
71 | get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL); | ||
72 | up_read(&mm->mmap_sem); | ||
73 | unuse_mm(mm); | ||
74 | |||
75 | spin_lock(&vcpu->async_pf.lock); | ||
76 | list_add_tail(&apf->link, &vcpu->async_pf.done); | ||
77 | apf->page = page; | ||
78 | apf->done = true; | ||
79 | spin_unlock(&vcpu->async_pf.lock); | ||
80 | |||
81 | /* | ||
82 | * apf may be freed by kvm_check_async_pf_completion() after | ||
83 | * this point | ||
84 | */ | ||
85 | |||
86 | trace_kvm_async_pf_completed(addr, page, gva); | ||
87 | |||
88 | if (waitqueue_active(&vcpu->wq)) | ||
89 | wake_up_interruptible(&vcpu->wq); | ||
90 | |||
91 | mmdrop(mm); | ||
92 | kvm_put_kvm(vcpu->kvm); | ||
93 | } | ||
94 | |||
95 | void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) | ||
96 | { | ||
97 | /* cancel outstanding work queue item */ | ||
98 | while (!list_empty(&vcpu->async_pf.queue)) { | ||
99 | struct kvm_async_pf *work = | ||
100 | list_entry(vcpu->async_pf.queue.next, | ||
101 | typeof(*work), queue); | ||
102 | cancel_work_sync(&work->work); | ||
103 | list_del(&work->queue); | ||
104 | if (!work->done) /* work was canceled */ | ||
105 | kmem_cache_free(async_pf_cache, work); | ||
106 | } | ||
107 | |||
108 | spin_lock(&vcpu->async_pf.lock); | ||
109 | while (!list_empty(&vcpu->async_pf.done)) { | ||
110 | struct kvm_async_pf *work = | ||
111 | list_entry(vcpu->async_pf.done.next, | ||
112 | typeof(*work), link); | ||
113 | list_del(&work->link); | ||
114 | if (work->page) | ||
115 | put_page(work->page); | ||
116 | kmem_cache_free(async_pf_cache, work); | ||
117 | } | ||
118 | spin_unlock(&vcpu->async_pf.lock); | ||
119 | |||
120 | vcpu->async_pf.queued = 0; | ||
121 | } | ||
122 | |||
123 | void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) | ||
124 | { | ||
125 | struct kvm_async_pf *work; | ||
126 | |||
127 | while (!list_empty_careful(&vcpu->async_pf.done) && | ||
128 | kvm_arch_can_inject_async_page_present(vcpu)) { | ||
129 | spin_lock(&vcpu->async_pf.lock); | ||
130 | work = list_first_entry(&vcpu->async_pf.done, typeof(*work), | ||
131 | link); | ||
132 | list_del(&work->link); | ||
133 | spin_unlock(&vcpu->async_pf.lock); | ||
134 | |||
135 | if (work->page) | ||
136 | kvm_arch_async_page_ready(vcpu, work); | ||
137 | kvm_arch_async_page_present(vcpu, work); | ||
138 | |||
139 | list_del(&work->queue); | ||
140 | vcpu->async_pf.queued--; | ||
141 | if (work->page) | ||
142 | put_page(work->page); | ||
143 | kmem_cache_free(async_pf_cache, work); | ||
144 | } | ||
145 | } | ||
146 | |||
147 | int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, | ||
148 | struct kvm_arch_async_pf *arch) | ||
149 | { | ||
150 | struct kvm_async_pf *work; | ||
151 | |||
152 | if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU) | ||
153 | return 0; | ||
154 | |||
155 | /* setup delayed work */ | ||
156 | |||
157 | /* | ||
158 | * do alloc nowait since if we are going to sleep anyway we | ||
159 | * may as well sleep faulting in page | ||
160 | */ | ||
161 | work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT); | ||
162 | if (!work) | ||
163 | return 0; | ||
164 | |||
165 | work->page = NULL; | ||
166 | work->done = false; | ||
167 | work->vcpu = vcpu; | ||
168 | work->gva = gva; | ||
169 | work->addr = gfn_to_hva(vcpu->kvm, gfn); | ||
170 | work->arch = *arch; | ||
171 | work->mm = current->mm; | ||
172 | atomic_inc(&work->mm->mm_count); | ||
173 | kvm_get_kvm(work->vcpu->kvm); | ||
174 | |||
175 | /* this can't really happen otherwise gfn_to_pfn_async | ||
176 | would succeed */ | ||
177 | if (unlikely(kvm_is_error_hva(work->addr))) | ||
178 | goto retry_sync; | ||
179 | |||
180 | INIT_WORK(&work->work, async_pf_execute); | ||
181 | if (!schedule_work(&work->work)) | ||
182 | goto retry_sync; | ||
183 | |||
184 | list_add_tail(&work->queue, &vcpu->async_pf.queue); | ||
185 | vcpu->async_pf.queued++; | ||
186 | kvm_arch_async_page_not_present(vcpu, work); | ||
187 | return 1; | ||
188 | retry_sync: | ||
189 | kvm_put_kvm(work->vcpu->kvm); | ||
190 | mmdrop(work->mm); | ||
191 | kmem_cache_free(async_pf_cache, work); | ||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) | ||
196 | { | ||
197 | struct kvm_async_pf *work; | ||
198 | |||
199 | if (!list_empty_careful(&vcpu->async_pf.done)) | ||
200 | return 0; | ||
201 | |||
202 | work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC); | ||
203 | if (!work) | ||
204 | return -ENOMEM; | ||
205 | |||
206 | work->page = bad_page; | ||
207 | get_page(bad_page); | ||
208 | INIT_LIST_HEAD(&work->queue); /* for list_del to work */ | ||
209 | |||
210 | spin_lock(&vcpu->async_pf.lock); | ||
211 | list_add_tail(&work->link, &vcpu->async_pf.done); | ||
212 | spin_unlock(&vcpu->async_pf.lock); | ||
213 | |||
214 | vcpu->async_pf.queued++; | ||
215 | return 0; | ||
216 | } | ||
diff --git a/virt/kvm/async_pf.h b/virt/kvm/async_pf.h new file mode 100644 index 000000000000..e7ef6447cb82 --- /dev/null +++ b/virt/kvm/async_pf.h | |||
@@ -0,0 +1,36 @@ | |||
1 | /* | ||
2 | * kvm asynchronous fault support | ||
3 | * | ||
4 | * Copyright 2010 Red Hat, Inc. | ||
5 | * | ||
6 | * Author: | ||
7 | * Gleb Natapov <gleb@redhat.com> | ||
8 | * | ||
9 | * This file is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of version 2 of the GNU General Public License | ||
11 | * as published by the Free Software Foundation. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software Foundation, | ||
20 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | ||
21 | */ | ||
22 | |||
23 | #ifndef __KVM_ASYNC_PF_H__ | ||
24 | #define __KVM_ASYNC_PF_H__ | ||
25 | |||
26 | #ifdef CONFIG_KVM_ASYNC_PF | ||
27 | int kvm_async_pf_init(void); | ||
28 | void kvm_async_pf_deinit(void); | ||
29 | void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu); | ||
30 | #else | ||
31 | #define kvm_async_pf_init() (0) | ||
32 | #define kvm_async_pf_deinit() do{}while(0) | ||
33 | #define kvm_async_pf_vcpu_init(C) do{}while(0) | ||
34 | #endif | ||
35 | |||
36 | #endif | ||
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index c1f1e3c62984..73358d256fa2 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -44,14 +44,19 @@ | |||
44 | */ | 44 | */ |
45 | 45 | ||
46 | struct _irqfd { | 46 | struct _irqfd { |
47 | struct kvm *kvm; | 47 | /* Used for MSI fast-path */ |
48 | struct eventfd_ctx *eventfd; | 48 | struct kvm *kvm; |
49 | int gsi; | 49 | wait_queue_t wait; |
50 | struct list_head list; | 50 | /* Update side is protected by irqfds.lock */ |
51 | poll_table pt; | 51 | struct kvm_kernel_irq_routing_entry __rcu *irq_entry; |
52 | wait_queue_t wait; | 52 | /* Used for level IRQ fast-path */ |
53 | struct work_struct inject; | 53 | int gsi; |
54 | struct work_struct shutdown; | 54 | struct work_struct inject; |
55 | /* Used for setup/shutdown */ | ||
56 | struct eventfd_ctx *eventfd; | ||
57 | struct list_head list; | ||
58 | poll_table pt; | ||
59 | struct work_struct shutdown; | ||
55 | }; | 60 | }; |
56 | 61 | ||
57 | static struct workqueue_struct *irqfd_cleanup_wq; | 62 | static struct workqueue_struct *irqfd_cleanup_wq; |
@@ -85,7 +90,7 @@ irqfd_shutdown(struct work_struct *work) | |||
85 | * We know no new events will be scheduled at this point, so block | 90 | * We know no new events will be scheduled at this point, so block |
86 | * until all previously outstanding events have completed | 91 | * until all previously outstanding events have completed |
87 | */ | 92 | */ |
88 | flush_work(&irqfd->inject); | 93 | flush_work_sync(&irqfd->inject); |
89 | 94 | ||
90 | /* | 95 | /* |
91 | * It is now safe to release the object's resources | 96 | * It is now safe to release the object's resources |
@@ -125,14 +130,22 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) | |||
125 | { | 130 | { |
126 | struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); | 131 | struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); |
127 | unsigned long flags = (unsigned long)key; | 132 | unsigned long flags = (unsigned long)key; |
133 | struct kvm_kernel_irq_routing_entry *irq; | ||
134 | struct kvm *kvm = irqfd->kvm; | ||
128 | 135 | ||
129 | if (flags & POLLIN) | 136 | if (flags & POLLIN) { |
137 | rcu_read_lock(); | ||
138 | irq = rcu_dereference(irqfd->irq_entry); | ||
130 | /* An event has been signaled, inject an interrupt */ | 139 | /* An event has been signaled, inject an interrupt */ |
131 | schedule_work(&irqfd->inject); | 140 | if (irq) |
141 | kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); | ||
142 | else | ||
143 | schedule_work(&irqfd->inject); | ||
144 | rcu_read_unlock(); | ||
145 | } | ||
132 | 146 | ||
133 | if (flags & POLLHUP) { | 147 | if (flags & POLLHUP) { |
134 | /* The eventfd is closing, detach from KVM */ | 148 | /* The eventfd is closing, detach from KVM */ |
135 | struct kvm *kvm = irqfd->kvm; | ||
136 | unsigned long flags; | 149 | unsigned long flags; |
137 | 150 | ||
138 | spin_lock_irqsave(&kvm->irqfds.lock, flags); | 151 | spin_lock_irqsave(&kvm->irqfds.lock, flags); |
@@ -163,9 +176,31 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, | |||
163 | add_wait_queue(wqh, &irqfd->wait); | 176 | add_wait_queue(wqh, &irqfd->wait); |
164 | } | 177 | } |
165 | 178 | ||
179 | /* Must be called under irqfds.lock */ | ||
180 | static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, | ||
181 | struct kvm_irq_routing_table *irq_rt) | ||
182 | { | ||
183 | struct kvm_kernel_irq_routing_entry *e; | ||
184 | struct hlist_node *n; | ||
185 | |||
186 | if (irqfd->gsi >= irq_rt->nr_rt_entries) { | ||
187 | rcu_assign_pointer(irqfd->irq_entry, NULL); | ||
188 | return; | ||
189 | } | ||
190 | |||
191 | hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) { | ||
192 | /* Only fast-path MSI. */ | ||
193 | if (e->type == KVM_IRQ_ROUTING_MSI) | ||
194 | rcu_assign_pointer(irqfd->irq_entry, e); | ||
195 | else | ||
196 | rcu_assign_pointer(irqfd->irq_entry, NULL); | ||
197 | } | ||
198 | } | ||
199 | |||
166 | static int | 200 | static int |
167 | kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) | 201 | kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) |
168 | { | 202 | { |
203 | struct kvm_irq_routing_table *irq_rt; | ||
169 | struct _irqfd *irqfd, *tmp; | 204 | struct _irqfd *irqfd, *tmp; |
170 | struct file *file = NULL; | 205 | struct file *file = NULL; |
171 | struct eventfd_ctx *eventfd = NULL; | 206 | struct eventfd_ctx *eventfd = NULL; |
@@ -215,6 +250,10 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) | |||
215 | goto fail; | 250 | goto fail; |
216 | } | 251 | } |
217 | 252 | ||
253 | irq_rt = rcu_dereference_protected(kvm->irq_routing, | ||
254 | lockdep_is_held(&kvm->irqfds.lock)); | ||
255 | irqfd_update(kvm, irqfd, irq_rt); | ||
256 | |||
218 | events = file->f_op->poll(file, &irqfd->pt); | 257 | events = file->f_op->poll(file, &irqfd->pt); |
219 | 258 | ||
220 | list_add_tail(&irqfd->list, &kvm->irqfds.items); | 259 | list_add_tail(&irqfd->list, &kvm->irqfds.items); |
@@ -271,8 +310,18 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) | |||
271 | spin_lock_irq(&kvm->irqfds.lock); | 310 | spin_lock_irq(&kvm->irqfds.lock); |
272 | 311 | ||
273 | list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { | 312 | list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { |
274 | if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) | 313 | if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) { |
314 | /* | ||
315 | * This rcu_assign_pointer is needed for when | ||
316 | * another thread calls kvm_irq_routing_update before | ||
317 | * we flush workqueue below (we synchronize with | ||
318 | * kvm_irq_routing_update using irqfds.lock). | ||
319 | * It is paired with synchronize_rcu done by caller | ||
320 | * of that function. | ||
321 | */ | ||
322 | rcu_assign_pointer(irqfd->irq_entry, NULL); | ||
275 | irqfd_deactivate(irqfd); | 323 | irqfd_deactivate(irqfd); |
324 | } | ||
276 | } | 325 | } |
277 | 326 | ||
278 | spin_unlock_irq(&kvm->irqfds.lock); | 327 | spin_unlock_irq(&kvm->irqfds.lock); |
@@ -322,6 +371,25 @@ kvm_irqfd_release(struct kvm *kvm) | |||
322 | } | 371 | } |
323 | 372 | ||
324 | /* | 373 | /* |
374 | * Change irq_routing and irqfd. | ||
375 | * Caller must invoke synchronize_rcu afterwards. | ||
376 | */ | ||
377 | void kvm_irq_routing_update(struct kvm *kvm, | ||
378 | struct kvm_irq_routing_table *irq_rt) | ||
379 | { | ||
380 | struct _irqfd *irqfd; | ||
381 | |||
382 | spin_lock_irq(&kvm->irqfds.lock); | ||
383 | |||
384 | rcu_assign_pointer(kvm->irq_routing, irq_rt); | ||
385 | |||
386 | list_for_each_entry(irqfd, &kvm->irqfds.items, list) | ||
387 | irqfd_update(kvm, irqfd, irq_rt); | ||
388 | |||
389 | spin_unlock_irq(&kvm->irqfds.lock); | ||
390 | } | ||
391 | |||
392 | /* | ||
325 | * create a host-wide workqueue for issuing deferred shutdown requests | 393 | * create a host-wide workqueue for issuing deferred shutdown requests |
326 | * aggregated from all vm* instances. We need our own isolated single-thread | 394 | * aggregated from all vm* instances. We need our own isolated single-thread |
327 | * queue to prevent deadlock against flushing the normal work-queue. | 395 | * queue to prevent deadlock against flushing the normal work-queue. |
@@ -510,7 +578,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
510 | 578 | ||
511 | mutex_lock(&kvm->slots_lock); | 579 | mutex_lock(&kvm->slots_lock); |
512 | 580 | ||
513 | /* Verify that there isnt a match already */ | 581 | /* Verify that there isn't a match already */ |
514 | if (ioeventfd_check_collision(kvm, p)) { | 582 | if (ioeventfd_check_collision(kvm, p)) { |
515 | ret = -EEXIST; | 583 | ret = -EEXIST; |
516 | goto unlock_fail; | 584 | goto unlock_fail; |
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 0b9df8303dcf..8df1ca104a7f 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
@@ -167,7 +167,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) | |||
167 | 167 | ||
168 | ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " | 168 | ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " |
169 | "vector=%x trig_mode=%x\n", | 169 | "vector=%x trig_mode=%x\n", |
170 | entry->fields.dest, entry->fields.dest_mode, | 170 | entry->fields.dest_id, entry->fields.dest_mode, |
171 | entry->fields.delivery_mode, entry->fields.vector, | 171 | entry->fields.delivery_mode, entry->fields.vector, |
172 | entry->fields.trig_mode); | 172 | entry->fields.trig_mode); |
173 | 173 | ||
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 369e38010ad5..9f614b4e365f 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
@@ -17,7 +17,7 @@ | |||
17 | * Authors: | 17 | * Authors: |
18 | * Yaozu (Eddie) Dong <Eddie.dong@intel.com> | 18 | * Yaozu (Eddie) Dong <Eddie.dong@intel.com> |
19 | * | 19 | * |
20 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | 20 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. |
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/kvm_host.h> | 23 | #include <linux/kvm_host.h> |
@@ -114,8 +114,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
114 | return r; | 114 | return r; |
115 | } | 115 | } |
116 | 116 | ||
117 | static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | 117 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, |
118 | struct kvm *kvm, int irq_source_id, int level) | 118 | struct kvm *kvm, int irq_source_id, int level) |
119 | { | 119 | { |
120 | struct kvm_lapic_irq irq; | 120 | struct kvm_lapic_irq irq; |
121 | 121 | ||
@@ -409,8 +409,9 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
409 | 409 | ||
410 | mutex_lock(&kvm->irq_lock); | 410 | mutex_lock(&kvm->irq_lock); |
411 | old = kvm->irq_routing; | 411 | old = kvm->irq_routing; |
412 | rcu_assign_pointer(kvm->irq_routing, new); | 412 | kvm_irq_routing_update(kvm, new); |
413 | mutex_unlock(&kvm->irq_lock); | 413 | mutex_unlock(&kvm->irq_lock); |
414 | |||
414 | synchronize_rcu(); | 415 | synchronize_rcu(); |
415 | 416 | ||
416 | new = old; | 417 | new = old; |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5186e728c53e..96ebc0679415 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * machines without emulation or binary translation. | 5 | * machines without emulation or binary translation. |
6 | * | 6 | * |
7 | * Copyright (C) 2006 Qumranet, Inc. | 7 | * Copyright (C) 2006 Qumranet, Inc. |
8 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | 8 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. |
9 | * | 9 | * |
10 | * Authors: | 10 | * Authors: |
11 | * Avi Kivity <avi@qumranet.com> | 11 | * Avi Kivity <avi@qumranet.com> |
@@ -30,7 +30,7 @@ | |||
30 | #include <linux/debugfs.h> | 30 | #include <linux/debugfs.h> |
31 | #include <linux/highmem.h> | 31 | #include <linux/highmem.h> |
32 | #include <linux/file.h> | 32 | #include <linux/file.h> |
33 | #include <linux/sysdev.h> | 33 | #include <linux/syscore_ops.h> |
34 | #include <linux/cpu.h> | 34 | #include <linux/cpu.h> |
35 | #include <linux/sched.h> | 35 | #include <linux/sched.h> |
36 | #include <linux/cpumask.h> | 36 | #include <linux/cpumask.h> |
@@ -52,9 +52,9 @@ | |||
52 | #include <asm/io.h> | 52 | #include <asm/io.h> |
53 | #include <asm/uaccess.h> | 53 | #include <asm/uaccess.h> |
54 | #include <asm/pgtable.h> | 54 | #include <asm/pgtable.h> |
55 | #include <asm-generic/bitops/le.h> | ||
56 | 55 | ||
57 | #include "coalesced_mmio.h" | 56 | #include "coalesced_mmio.h" |
57 | #include "async_pf.h" | ||
58 | 58 | ||
59 | #define CREATE_TRACE_POINTS | 59 | #define CREATE_TRACE_POINTS |
60 | #include <trace/events/kvm.h> | 60 | #include <trace/events/kvm.h> |
@@ -68,7 +68,7 @@ MODULE_LICENSE("GPL"); | |||
68 | * kvm->lock --> kvm->slots_lock --> kvm->irq_lock | 68 | * kvm->lock --> kvm->slots_lock --> kvm->irq_lock |
69 | */ | 69 | */ |
70 | 70 | ||
71 | DEFINE_SPINLOCK(kvm_lock); | 71 | DEFINE_RAW_SPINLOCK(kvm_lock); |
72 | LIST_HEAD(vm_list); | 72 | LIST_HEAD(vm_list); |
73 | 73 | ||
74 | static cpumask_var_t cpus_hardware_enabled; | 74 | static cpumask_var_t cpus_hardware_enabled; |
@@ -89,7 +89,8 @@ static void hardware_disable_all(void); | |||
89 | 89 | ||
90 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus); | 90 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus); |
91 | 91 | ||
92 | static bool kvm_rebooting; | 92 | bool kvm_rebooting; |
93 | EXPORT_SYMBOL_GPL(kvm_rebooting); | ||
93 | 94 | ||
94 | static bool largepages_enabled = true; | 95 | static bool largepages_enabled = true; |
95 | 96 | ||
@@ -102,8 +103,26 @@ static pfn_t fault_pfn; | |||
102 | inline int kvm_is_mmio_pfn(pfn_t pfn) | 103 | inline int kvm_is_mmio_pfn(pfn_t pfn) |
103 | { | 104 | { |
104 | if (pfn_valid(pfn)) { | 105 | if (pfn_valid(pfn)) { |
105 | struct page *page = compound_head(pfn_to_page(pfn)); | 106 | int reserved; |
106 | return PageReserved(page); | 107 | struct page *tail = pfn_to_page(pfn); |
108 | struct page *head = compound_trans_head(tail); | ||
109 | reserved = PageReserved(head); | ||
110 | if (head != tail) { | ||
111 | /* | ||
112 | * "head" is not a dangling pointer | ||
113 | * (compound_trans_head takes care of that) | ||
114 | * but the hugepage may have been splitted | ||
115 | * from under us (and we may not hold a | ||
116 | * reference count on the head page so it can | ||
117 | * be reused before we run PageReferenced), so | ||
118 | * we've to check PageTail before returning | ||
119 | * what we just read. | ||
120 | */ | ||
121 | smp_rmb(); | ||
122 | if (PageTail(tail)) | ||
123 | return reserved; | ||
124 | } | ||
125 | return PageReserved(tail); | ||
107 | } | 126 | } |
108 | 127 | ||
109 | return true; | 128 | return true; |
@@ -117,6 +136,14 @@ void vcpu_load(struct kvm_vcpu *vcpu) | |||
117 | int cpu; | 136 | int cpu; |
118 | 137 | ||
119 | mutex_lock(&vcpu->mutex); | 138 | mutex_lock(&vcpu->mutex); |
139 | if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { | ||
140 | /* The thread running this VCPU changed. */ | ||
141 | struct pid *oldpid = vcpu->pid; | ||
142 | struct pid *newpid = get_task_pid(current, PIDTYPE_PID); | ||
143 | rcu_assign_pointer(vcpu->pid, newpid); | ||
144 | synchronize_rcu(); | ||
145 | put_pid(oldpid); | ||
146 | } | ||
120 | cpu = get_cpu(); | 147 | cpu = get_cpu(); |
121 | preempt_notifier_register(&vcpu->preempt_notifier); | 148 | preempt_notifier_register(&vcpu->preempt_notifier); |
122 | kvm_arch_vcpu_load(vcpu, cpu); | 149 | kvm_arch_vcpu_load(vcpu, cpu); |
@@ -145,13 +172,16 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
145 | 172 | ||
146 | zalloc_cpumask_var(&cpus, GFP_ATOMIC); | 173 | zalloc_cpumask_var(&cpus, GFP_ATOMIC); |
147 | 174 | ||
148 | raw_spin_lock(&kvm->requests_lock); | 175 | me = get_cpu(); |
149 | me = smp_processor_id(); | ||
150 | kvm_for_each_vcpu(i, vcpu, kvm) { | 176 | kvm_for_each_vcpu(i, vcpu, kvm) { |
151 | if (kvm_make_check_request(req, vcpu)) | 177 | kvm_make_request(req, vcpu); |
152 | continue; | ||
153 | cpu = vcpu->cpu; | 178 | cpu = vcpu->cpu; |
154 | if (cpus != NULL && cpu != -1 && cpu != me) | 179 | |
180 | /* Set ->requests bit before we read ->mode */ | ||
181 | smp_mb(); | ||
182 | |||
183 | if (cpus != NULL && cpu != -1 && cpu != me && | ||
184 | kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE) | ||
155 | cpumask_set_cpu(cpu, cpus); | 185 | cpumask_set_cpu(cpu, cpus); |
156 | } | 186 | } |
157 | if (unlikely(cpus == NULL)) | 187 | if (unlikely(cpus == NULL)) |
@@ -160,15 +190,19 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
160 | smp_call_function_many(cpus, ack_flush, NULL, 1); | 190 | smp_call_function_many(cpus, ack_flush, NULL, 1); |
161 | else | 191 | else |
162 | called = false; | 192 | called = false; |
163 | raw_spin_unlock(&kvm->requests_lock); | 193 | put_cpu(); |
164 | free_cpumask_var(cpus); | 194 | free_cpumask_var(cpus); |
165 | return called; | 195 | return called; |
166 | } | 196 | } |
167 | 197 | ||
168 | void kvm_flush_remote_tlbs(struct kvm *kvm) | 198 | void kvm_flush_remote_tlbs(struct kvm *kvm) |
169 | { | 199 | { |
200 | int dirty_count = kvm->tlbs_dirty; | ||
201 | |||
202 | smp_mb(); | ||
170 | if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) | 203 | if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) |
171 | ++kvm->stat.remote_tlb_flush; | 204 | ++kvm->stat.remote_tlb_flush; |
205 | cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); | ||
172 | } | 206 | } |
173 | 207 | ||
174 | void kvm_reload_remote_mmus(struct kvm *kvm) | 208 | void kvm_reload_remote_mmus(struct kvm *kvm) |
@@ -185,7 +219,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | |||
185 | vcpu->cpu = -1; | 219 | vcpu->cpu = -1; |
186 | vcpu->kvm = kvm; | 220 | vcpu->kvm = kvm; |
187 | vcpu->vcpu_id = id; | 221 | vcpu->vcpu_id = id; |
222 | vcpu->pid = NULL; | ||
188 | init_waitqueue_head(&vcpu->wq); | 223 | init_waitqueue_head(&vcpu->wq); |
224 | kvm_async_pf_vcpu_init(vcpu); | ||
189 | 225 | ||
190 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 226 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
191 | if (!page) { | 227 | if (!page) { |
@@ -208,6 +244,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_init); | |||
208 | 244 | ||
209 | void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) | 245 | void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) |
210 | { | 246 | { |
247 | put_pid(vcpu->pid); | ||
211 | kvm_arch_vcpu_uninit(vcpu); | 248 | kvm_arch_vcpu_uninit(vcpu); |
212 | free_page((unsigned long)vcpu->run); | 249 | free_page((unsigned long)vcpu->run); |
213 | } | 250 | } |
@@ -247,7 +284,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
247 | idx = srcu_read_lock(&kvm->srcu); | 284 | idx = srcu_read_lock(&kvm->srcu); |
248 | spin_lock(&kvm->mmu_lock); | 285 | spin_lock(&kvm->mmu_lock); |
249 | kvm->mmu_notifier_seq++; | 286 | kvm->mmu_notifier_seq++; |
250 | need_tlb_flush = kvm_unmap_hva(kvm, address); | 287 | need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty; |
251 | spin_unlock(&kvm->mmu_lock); | 288 | spin_unlock(&kvm->mmu_lock); |
252 | srcu_read_unlock(&kvm->srcu, idx); | 289 | srcu_read_unlock(&kvm->srcu, idx); |
253 | 290 | ||
@@ -291,6 +328,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | |||
291 | kvm->mmu_notifier_count++; | 328 | kvm->mmu_notifier_count++; |
292 | for (; start < end; start += PAGE_SIZE) | 329 | for (; start < end; start += PAGE_SIZE) |
293 | need_tlb_flush |= kvm_unmap_hva(kvm, start); | 330 | need_tlb_flush |= kvm_unmap_hva(kvm, start); |
331 | need_tlb_flush |= kvm->tlbs_dirty; | ||
294 | spin_unlock(&kvm->mmu_lock); | 332 | spin_unlock(&kvm->mmu_lock); |
295 | srcu_read_unlock(&kvm->srcu, idx); | 333 | srcu_read_unlock(&kvm->srcu, idx); |
296 | 334 | ||
@@ -344,6 +382,22 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | |||
344 | return young; | 382 | return young; |
345 | } | 383 | } |
346 | 384 | ||
385 | static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn, | ||
386 | struct mm_struct *mm, | ||
387 | unsigned long address) | ||
388 | { | ||
389 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
390 | int young, idx; | ||
391 | |||
392 | idx = srcu_read_lock(&kvm->srcu); | ||
393 | spin_lock(&kvm->mmu_lock); | ||
394 | young = kvm_test_age_hva(kvm, address); | ||
395 | spin_unlock(&kvm->mmu_lock); | ||
396 | srcu_read_unlock(&kvm->srcu, idx); | ||
397 | |||
398 | return young; | ||
399 | } | ||
400 | |||
347 | static void kvm_mmu_notifier_release(struct mmu_notifier *mn, | 401 | static void kvm_mmu_notifier_release(struct mmu_notifier *mn, |
348 | struct mm_struct *mm) | 402 | struct mm_struct *mm) |
349 | { | 403 | { |
@@ -360,6 +414,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { | |||
360 | .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, | 414 | .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, |
361 | .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, | 415 | .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, |
362 | .clear_flush_young = kvm_mmu_notifier_clear_flush_young, | 416 | .clear_flush_young = kvm_mmu_notifier_clear_flush_young, |
417 | .test_young = kvm_mmu_notifier_test_young, | ||
363 | .change_pte = kvm_mmu_notifier_change_pte, | 418 | .change_pte = kvm_mmu_notifier_change_pte, |
364 | .release = kvm_mmu_notifier_release, | 419 | .release = kvm_mmu_notifier_release, |
365 | }; | 420 | }; |
@@ -381,11 +436,15 @@ static int kvm_init_mmu_notifier(struct kvm *kvm) | |||
381 | 436 | ||
382 | static struct kvm *kvm_create_vm(void) | 437 | static struct kvm *kvm_create_vm(void) |
383 | { | 438 | { |
384 | int r = 0, i; | 439 | int r, i; |
385 | struct kvm *kvm = kvm_arch_create_vm(); | 440 | struct kvm *kvm = kvm_arch_alloc_vm(); |
386 | 441 | ||
387 | if (IS_ERR(kvm)) | 442 | if (!kvm) |
388 | goto out; | 443 | return ERR_PTR(-ENOMEM); |
444 | |||
445 | r = kvm_arch_init_vm(kvm); | ||
446 | if (r) | ||
447 | goto out_err_nodisable; | ||
389 | 448 | ||
390 | r = hardware_enable_all(); | 449 | r = hardware_enable_all(); |
391 | if (r) | 450 | if (r) |
@@ -399,49 +458,61 @@ static struct kvm *kvm_create_vm(void) | |||
399 | r = -ENOMEM; | 458 | r = -ENOMEM; |
400 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 459 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
401 | if (!kvm->memslots) | 460 | if (!kvm->memslots) |
402 | goto out_err; | 461 | goto out_err_nosrcu; |
403 | if (init_srcu_struct(&kvm->srcu)) | 462 | if (init_srcu_struct(&kvm->srcu)) |
404 | goto out_err; | 463 | goto out_err_nosrcu; |
405 | for (i = 0; i < KVM_NR_BUSES; i++) { | 464 | for (i = 0; i < KVM_NR_BUSES; i++) { |
406 | kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), | 465 | kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), |
407 | GFP_KERNEL); | 466 | GFP_KERNEL); |
408 | if (!kvm->buses[i]) { | 467 | if (!kvm->buses[i]) |
409 | cleanup_srcu_struct(&kvm->srcu); | ||
410 | goto out_err; | 468 | goto out_err; |
411 | } | ||
412 | } | ||
413 | |||
414 | r = kvm_init_mmu_notifier(kvm); | ||
415 | if (r) { | ||
416 | cleanup_srcu_struct(&kvm->srcu); | ||
417 | goto out_err; | ||
418 | } | 469 | } |
419 | 470 | ||
471 | spin_lock_init(&kvm->mmu_lock); | ||
420 | kvm->mm = current->mm; | 472 | kvm->mm = current->mm; |
421 | atomic_inc(&kvm->mm->mm_count); | 473 | atomic_inc(&kvm->mm->mm_count); |
422 | spin_lock_init(&kvm->mmu_lock); | ||
423 | raw_spin_lock_init(&kvm->requests_lock); | ||
424 | kvm_eventfd_init(kvm); | 474 | kvm_eventfd_init(kvm); |
425 | mutex_init(&kvm->lock); | 475 | mutex_init(&kvm->lock); |
426 | mutex_init(&kvm->irq_lock); | 476 | mutex_init(&kvm->irq_lock); |
427 | mutex_init(&kvm->slots_lock); | 477 | mutex_init(&kvm->slots_lock); |
428 | atomic_set(&kvm->users_count, 1); | 478 | atomic_set(&kvm->users_count, 1); |
429 | spin_lock(&kvm_lock); | 479 | |
480 | r = kvm_init_mmu_notifier(kvm); | ||
481 | if (r) | ||
482 | goto out_err; | ||
483 | |||
484 | raw_spin_lock(&kvm_lock); | ||
430 | list_add(&kvm->vm_list, &vm_list); | 485 | list_add(&kvm->vm_list, &vm_list); |
431 | spin_unlock(&kvm_lock); | 486 | raw_spin_unlock(&kvm_lock); |
432 | out: | 487 | |
433 | return kvm; | 488 | return kvm; |
434 | 489 | ||
435 | out_err: | 490 | out_err: |
491 | cleanup_srcu_struct(&kvm->srcu); | ||
492 | out_err_nosrcu: | ||
436 | hardware_disable_all(); | 493 | hardware_disable_all(); |
437 | out_err_nodisable: | 494 | out_err_nodisable: |
438 | for (i = 0; i < KVM_NR_BUSES; i++) | 495 | for (i = 0; i < KVM_NR_BUSES; i++) |
439 | kfree(kvm->buses[i]); | 496 | kfree(kvm->buses[i]); |
440 | kfree(kvm->memslots); | 497 | kfree(kvm->memslots); |
441 | kfree(kvm); | 498 | kvm_arch_free_vm(kvm); |
442 | return ERR_PTR(r); | 499 | return ERR_PTR(r); |
443 | } | 500 | } |
444 | 501 | ||
502 | static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) | ||
503 | { | ||
504 | if (!memslot->dirty_bitmap) | ||
505 | return; | ||
506 | |||
507 | if (2 * kvm_dirty_bitmap_bytes(memslot) > PAGE_SIZE) | ||
508 | vfree(memslot->dirty_bitmap_head); | ||
509 | else | ||
510 | kfree(memslot->dirty_bitmap_head); | ||
511 | |||
512 | memslot->dirty_bitmap = NULL; | ||
513 | memslot->dirty_bitmap_head = NULL; | ||
514 | } | ||
515 | |||
445 | /* | 516 | /* |
446 | * Free any memory in @free but not in @dont. | 517 | * Free any memory in @free but not in @dont. |
447 | */ | 518 | */ |
@@ -454,7 +525,7 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | |||
454 | vfree(free->rmap); | 525 | vfree(free->rmap); |
455 | 526 | ||
456 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) | 527 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) |
457 | vfree(free->dirty_bitmap); | 528 | kvm_destroy_dirty_bitmap(free); |
458 | 529 | ||
459 | 530 | ||
460 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | 531 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { |
@@ -465,7 +536,6 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | |||
465 | } | 536 | } |
466 | 537 | ||
467 | free->npages = 0; | 538 | free->npages = 0; |
468 | free->dirty_bitmap = NULL; | ||
469 | free->rmap = NULL; | 539 | free->rmap = NULL; |
470 | } | 540 | } |
471 | 541 | ||
@@ -486,9 +556,9 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
486 | struct mm_struct *mm = kvm->mm; | 556 | struct mm_struct *mm = kvm->mm; |
487 | 557 | ||
488 | kvm_arch_sync_events(kvm); | 558 | kvm_arch_sync_events(kvm); |
489 | spin_lock(&kvm_lock); | 559 | raw_spin_lock(&kvm_lock); |
490 | list_del(&kvm->vm_list); | 560 | list_del(&kvm->vm_list); |
491 | spin_unlock(&kvm_lock); | 561 | raw_spin_unlock(&kvm_lock); |
492 | kvm_free_irq_routing(kvm); | 562 | kvm_free_irq_routing(kvm); |
493 | for (i = 0; i < KVM_NR_BUSES; i++) | 563 | for (i = 0; i < KVM_NR_BUSES; i++) |
494 | kvm_io_bus_destroy(kvm->buses[i]); | 564 | kvm_io_bus_destroy(kvm->buses[i]); |
@@ -499,6 +569,9 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
499 | kvm_arch_flush_shadow(kvm); | 569 | kvm_arch_flush_shadow(kvm); |
500 | #endif | 570 | #endif |
501 | kvm_arch_destroy_vm(kvm); | 571 | kvm_arch_destroy_vm(kvm); |
572 | kvm_free_physmem(kvm); | ||
573 | cleanup_srcu_struct(&kvm->srcu); | ||
574 | kvm_arch_free_vm(kvm); | ||
502 | hardware_disable_all(); | 575 | hardware_disable_all(); |
503 | mmdrop(mm); | 576 | mmdrop(mm); |
504 | } | 577 | } |
@@ -527,6 +600,29 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) | |||
527 | return 0; | 600 | return 0; |
528 | } | 601 | } |
529 | 602 | ||
603 | #ifndef CONFIG_S390 | ||
604 | /* | ||
605 | * Allocation size is twice as large as the actual dirty bitmap size. | ||
606 | * This makes it possible to do double buffering: see x86's | ||
607 | * kvm_vm_ioctl_get_dirty_log(). | ||
608 | */ | ||
609 | static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) | ||
610 | { | ||
611 | unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); | ||
612 | |||
613 | if (dirty_bytes > PAGE_SIZE) | ||
614 | memslot->dirty_bitmap = vzalloc(dirty_bytes); | ||
615 | else | ||
616 | memslot->dirty_bitmap = kzalloc(dirty_bytes, GFP_KERNEL); | ||
617 | |||
618 | if (!memslot->dirty_bitmap) | ||
619 | return -ENOMEM; | ||
620 | |||
621 | memslot->dirty_bitmap_head = memslot->dirty_bitmap; | ||
622 | return 0; | ||
623 | } | ||
624 | #endif /* !CONFIG_S390 */ | ||
625 | |||
530 | /* | 626 | /* |
531 | * Allocate some memory and give it an address in the guest physical address | 627 | * Allocate some memory and give it an address in the guest physical address |
532 | * space. | 628 | * space. |
@@ -539,7 +635,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
539 | struct kvm_userspace_memory_region *mem, | 635 | struct kvm_userspace_memory_region *mem, |
540 | int user_alloc) | 636 | int user_alloc) |
541 | { | 637 | { |
542 | int r, flush_shadow = 0; | 638 | int r; |
543 | gfn_t base_gfn; | 639 | gfn_t base_gfn; |
544 | unsigned long npages; | 640 | unsigned long npages; |
545 | unsigned long i; | 641 | unsigned long i; |
@@ -553,7 +649,12 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
553 | goto out; | 649 | goto out; |
554 | if (mem->guest_phys_addr & (PAGE_SIZE - 1)) | 650 | if (mem->guest_phys_addr & (PAGE_SIZE - 1)) |
555 | goto out; | 651 | goto out; |
556 | if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) | 652 | /* We can read the guest memory with __xxx_user() later on. */ |
653 | if (user_alloc && | ||
654 | ((mem->userspace_addr & (PAGE_SIZE - 1)) || | ||
655 | !access_ok(VERIFY_WRITE, | ||
656 | (void __user *)(unsigned long)mem->userspace_addr, | ||
657 | mem->memory_size))) | ||
557 | goto out; | 658 | goto out; |
558 | if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) | 659 | if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) |
559 | goto out; | 660 | goto out; |
@@ -604,13 +705,11 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
604 | /* Allocate if a slot is being created */ | 705 | /* Allocate if a slot is being created */ |
605 | #ifndef CONFIG_S390 | 706 | #ifndef CONFIG_S390 |
606 | if (npages && !new.rmap) { | 707 | if (npages && !new.rmap) { |
607 | new.rmap = vmalloc(npages * sizeof(*new.rmap)); | 708 | new.rmap = vzalloc(npages * sizeof(*new.rmap)); |
608 | 709 | ||
609 | if (!new.rmap) | 710 | if (!new.rmap) |
610 | goto out_free; | 711 | goto out_free; |
611 | 712 | ||
612 | memset(new.rmap, 0, npages * sizeof(*new.rmap)); | ||
613 | |||
614 | new.user_alloc = user_alloc; | 713 | new.user_alloc = user_alloc; |
615 | new.userspace_addr = mem->userspace_addr; | 714 | new.userspace_addr = mem->userspace_addr; |
616 | } | 715 | } |
@@ -633,14 +732,11 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
633 | >> KVM_HPAGE_GFN_SHIFT(level)); | 732 | >> KVM_HPAGE_GFN_SHIFT(level)); |
634 | lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level); | 733 | lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level); |
635 | 734 | ||
636 | new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); | 735 | new.lpage_info[i] = vzalloc(lpages * sizeof(*new.lpage_info[i])); |
637 | 736 | ||
638 | if (!new.lpage_info[i]) | 737 | if (!new.lpage_info[i]) |
639 | goto out_free; | 738 | goto out_free; |
640 | 739 | ||
641 | memset(new.lpage_info[i], 0, | ||
642 | lpages * sizeof(*new.lpage_info[i])); | ||
643 | |||
644 | if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) | 740 | if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) |
645 | new.lpage_info[i][0].write_count = 1; | 741 | new.lpage_info[i][0].write_count = 1; |
646 | if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) | 742 | if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) |
@@ -661,15 +757,9 @@ skip_lpage: | |||
661 | 757 | ||
662 | /* Allocate page dirty bitmap if needed */ | 758 | /* Allocate page dirty bitmap if needed */ |
663 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { | 759 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { |
664 | unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new); | 760 | if (kvm_create_dirty_bitmap(&new) < 0) |
665 | |||
666 | new.dirty_bitmap = vmalloc(dirty_bytes); | ||
667 | if (!new.dirty_bitmap) | ||
668 | goto out_free; | 761 | goto out_free; |
669 | memset(new.dirty_bitmap, 0, dirty_bytes); | ||
670 | /* destroy any largepage mappings for dirty tracking */ | 762 | /* destroy any largepage mappings for dirty tracking */ |
671 | if (old.npages) | ||
672 | flush_shadow = 1; | ||
673 | } | 763 | } |
674 | #else /* not defined CONFIG_S390 */ | 764 | #else /* not defined CONFIG_S390 */ |
675 | new.user_alloc = user_alloc; | 765 | new.user_alloc = user_alloc; |
@@ -685,6 +775,7 @@ skip_lpage: | |||
685 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 775 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); |
686 | if (mem->slot >= slots->nmemslots) | 776 | if (mem->slot >= slots->nmemslots) |
687 | slots->nmemslots = mem->slot + 1; | 777 | slots->nmemslots = mem->slot + 1; |
778 | slots->generation++; | ||
688 | slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; | 779 | slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; |
689 | 780 | ||
690 | old_memslots = kvm->memslots; | 781 | old_memslots = kvm->memslots; |
@@ -705,14 +796,12 @@ skip_lpage: | |||
705 | if (r) | 796 | if (r) |
706 | goto out_free; | 797 | goto out_free; |
707 | 798 | ||
708 | #ifdef CONFIG_DMAR | ||
709 | /* map the pages in iommu page table */ | 799 | /* map the pages in iommu page table */ |
710 | if (npages) { | 800 | if (npages) { |
711 | r = kvm_iommu_map_pages(kvm, &new); | 801 | r = kvm_iommu_map_pages(kvm, &new); |
712 | if (r) | 802 | if (r) |
713 | goto out_free; | 803 | goto out_free; |
714 | } | 804 | } |
715 | #endif | ||
716 | 805 | ||
717 | r = -ENOMEM; | 806 | r = -ENOMEM; |
718 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 807 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
@@ -721,6 +810,7 @@ skip_lpage: | |||
721 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 810 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); |
722 | if (mem->slot >= slots->nmemslots) | 811 | if (mem->slot >= slots->nmemslots) |
723 | slots->nmemslots = mem->slot + 1; | 812 | slots->nmemslots = mem->slot + 1; |
813 | slots->generation++; | ||
724 | 814 | ||
725 | /* actual memory is freed via old in kvm_free_physmem_slot below */ | 815 | /* actual memory is freed via old in kvm_free_physmem_slot below */ |
726 | if (!npages) { | 816 | if (!npages) { |
@@ -740,9 +830,6 @@ skip_lpage: | |||
740 | kvm_free_physmem_slot(&old, &new); | 830 | kvm_free_physmem_slot(&old, &new); |
741 | kfree(old_memslots); | 831 | kfree(old_memslots); |
742 | 832 | ||
743 | if (flush_shadow) | ||
744 | kvm_arch_flush_shadow(kvm); | ||
745 | |||
746 | return 0; | 833 | return 0; |
747 | 834 | ||
748 | out_free: | 835 | out_free: |
@@ -851,10 +938,10 @@ int kvm_is_error_hva(unsigned long addr) | |||
851 | } | 938 | } |
852 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); | 939 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); |
853 | 940 | ||
854 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | 941 | static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots, |
942 | gfn_t gfn) | ||
855 | { | 943 | { |
856 | int i; | 944 | int i; |
857 | struct kvm_memslots *slots = kvm_memslots(kvm); | ||
858 | 945 | ||
859 | for (i = 0; i < slots->nmemslots; ++i) { | 946 | for (i = 0; i < slots->nmemslots; ++i) { |
860 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 947 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
@@ -865,6 +952,11 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | |||
865 | } | 952 | } |
866 | return NULL; | 953 | return NULL; |
867 | } | 954 | } |
955 | |||
956 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | ||
957 | { | ||
958 | return __gfn_to_memslot(kvm_memslots(kvm), gfn); | ||
959 | } | ||
868 | EXPORT_SYMBOL_GPL(gfn_to_memslot); | 960 | EXPORT_SYMBOL_GPL(gfn_to_memslot); |
869 | 961 | ||
870 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | 962 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) |
@@ -910,97 +1002,203 @@ out: | |||
910 | return size; | 1002 | return size; |
911 | } | 1003 | } |
912 | 1004 | ||
913 | int memslot_id(struct kvm *kvm, gfn_t gfn) | 1005 | static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, |
1006 | gfn_t *nr_pages) | ||
914 | { | 1007 | { |
915 | int i; | 1008 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) |
916 | struct kvm_memslots *slots = kvm_memslots(kvm); | 1009 | return bad_hva(); |
917 | struct kvm_memory_slot *memslot = NULL; | ||
918 | 1010 | ||
919 | for (i = 0; i < slots->nmemslots; ++i) { | 1011 | if (nr_pages) |
920 | memslot = &slots->memslots[i]; | 1012 | *nr_pages = slot->npages - (gfn - slot->base_gfn); |
921 | 1013 | ||
922 | if (gfn >= memslot->base_gfn | 1014 | return gfn_to_hva_memslot(slot, gfn); |
923 | && gfn < memslot->base_gfn + memslot->npages) | 1015 | } |
924 | break; | ||
925 | } | ||
926 | 1016 | ||
927 | return memslot - slots->memslots; | 1017 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) |
1018 | { | ||
1019 | return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL); | ||
928 | } | 1020 | } |
1021 | EXPORT_SYMBOL_GPL(gfn_to_hva); | ||
929 | 1022 | ||
930 | static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) | 1023 | static pfn_t get_fault_pfn(void) |
931 | { | 1024 | { |
932 | return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; | 1025 | get_page(fault_page); |
1026 | return fault_pfn; | ||
933 | } | 1027 | } |
934 | 1028 | ||
935 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | 1029 | int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, |
1030 | unsigned long start, int write, struct page **page) | ||
936 | { | 1031 | { |
937 | struct kvm_memory_slot *slot; | 1032 | int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET; |
938 | 1033 | ||
939 | slot = gfn_to_memslot(kvm, gfn); | 1034 | if (write) |
940 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) | 1035 | flags |= FOLL_WRITE; |
941 | return bad_hva(); | 1036 | |
942 | return gfn_to_hva_memslot(slot, gfn); | 1037 | return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); |
943 | } | 1038 | } |
944 | EXPORT_SYMBOL_GPL(gfn_to_hva); | ||
945 | 1039 | ||
946 | static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) | 1040 | static inline int check_user_page_hwpoison(unsigned long addr) |
1041 | { | ||
1042 | int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; | ||
1043 | |||
1044 | rc = __get_user_pages(current, current->mm, addr, 1, | ||
1045 | flags, NULL, NULL, NULL); | ||
1046 | return rc == -EHWPOISON; | ||
1047 | } | ||
1048 | |||
1049 | static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, | ||
1050 | bool *async, bool write_fault, bool *writable) | ||
947 | { | 1051 | { |
948 | struct page *page[1]; | 1052 | struct page *page[1]; |
949 | int npages; | 1053 | int npages = 0; |
950 | pfn_t pfn; | 1054 | pfn_t pfn; |
951 | 1055 | ||
952 | might_sleep(); | 1056 | /* we can do it either atomically or asynchronously, not both */ |
1057 | BUG_ON(atomic && async); | ||
1058 | |||
1059 | BUG_ON(!write_fault && !writable); | ||
953 | 1060 | ||
954 | npages = get_user_pages_fast(addr, 1, 1, page); | 1061 | if (writable) |
1062 | *writable = true; | ||
1063 | |||
1064 | if (atomic || async) | ||
1065 | npages = __get_user_pages_fast(addr, 1, 1, page); | ||
1066 | |||
1067 | if (unlikely(npages != 1) && !atomic) { | ||
1068 | might_sleep(); | ||
1069 | |||
1070 | if (writable) | ||
1071 | *writable = write_fault; | ||
1072 | |||
1073 | if (async) { | ||
1074 | down_read(¤t->mm->mmap_sem); | ||
1075 | npages = get_user_page_nowait(current, current->mm, | ||
1076 | addr, write_fault, page); | ||
1077 | up_read(¤t->mm->mmap_sem); | ||
1078 | } else | ||
1079 | npages = get_user_pages_fast(addr, 1, write_fault, | ||
1080 | page); | ||
1081 | |||
1082 | /* map read fault as writable if possible */ | ||
1083 | if (unlikely(!write_fault) && npages == 1) { | ||
1084 | struct page *wpage[1]; | ||
1085 | |||
1086 | npages = __get_user_pages_fast(addr, 1, 1, wpage); | ||
1087 | if (npages == 1) { | ||
1088 | *writable = true; | ||
1089 | put_page(page[0]); | ||
1090 | page[0] = wpage[0]; | ||
1091 | } | ||
1092 | npages = 1; | ||
1093 | } | ||
1094 | } | ||
955 | 1095 | ||
956 | if (unlikely(npages != 1)) { | 1096 | if (unlikely(npages != 1)) { |
957 | struct vm_area_struct *vma; | 1097 | struct vm_area_struct *vma; |
958 | 1098 | ||
1099 | if (atomic) | ||
1100 | return get_fault_pfn(); | ||
1101 | |||
959 | down_read(¤t->mm->mmap_sem); | 1102 | down_read(¤t->mm->mmap_sem); |
960 | if (is_hwpoison_address(addr)) { | 1103 | if (npages == -EHWPOISON || |
1104 | (!async && check_user_page_hwpoison(addr))) { | ||
961 | up_read(¤t->mm->mmap_sem); | 1105 | up_read(¤t->mm->mmap_sem); |
962 | get_page(hwpoison_page); | 1106 | get_page(hwpoison_page); |
963 | return page_to_pfn(hwpoison_page); | 1107 | return page_to_pfn(hwpoison_page); |
964 | } | 1108 | } |
965 | 1109 | ||
966 | vma = find_vma(current->mm, addr); | 1110 | vma = find_vma_intersection(current->mm, addr, addr+1); |
967 | 1111 | ||
968 | if (vma == NULL || addr < vma->vm_start || | 1112 | if (vma == NULL) |
969 | !(vma->vm_flags & VM_PFNMAP)) { | 1113 | pfn = get_fault_pfn(); |
970 | up_read(¤t->mm->mmap_sem); | 1114 | else if ((vma->vm_flags & VM_PFNMAP)) { |
971 | get_page(fault_page); | 1115 | pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + |
972 | return page_to_pfn(fault_page); | 1116 | vma->vm_pgoff; |
1117 | BUG_ON(!kvm_is_mmio_pfn(pfn)); | ||
1118 | } else { | ||
1119 | if (async && (vma->vm_flags & VM_WRITE)) | ||
1120 | *async = true; | ||
1121 | pfn = get_fault_pfn(); | ||
973 | } | 1122 | } |
974 | |||
975 | pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | ||
976 | up_read(¤t->mm->mmap_sem); | 1123 | up_read(¤t->mm->mmap_sem); |
977 | BUG_ON(!kvm_is_mmio_pfn(pfn)); | ||
978 | } else | 1124 | } else |
979 | pfn = page_to_pfn(page[0]); | 1125 | pfn = page_to_pfn(page[0]); |
980 | 1126 | ||
981 | return pfn; | 1127 | return pfn; |
982 | } | 1128 | } |
983 | 1129 | ||
984 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | 1130 | pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) |
1131 | { | ||
1132 | return hva_to_pfn(kvm, addr, true, NULL, true, NULL); | ||
1133 | } | ||
1134 | EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); | ||
1135 | |||
1136 | static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, | ||
1137 | bool write_fault, bool *writable) | ||
985 | { | 1138 | { |
986 | unsigned long addr; | 1139 | unsigned long addr; |
987 | 1140 | ||
1141 | if (async) | ||
1142 | *async = false; | ||
1143 | |||
988 | addr = gfn_to_hva(kvm, gfn); | 1144 | addr = gfn_to_hva(kvm, gfn); |
989 | if (kvm_is_error_hva(addr)) { | 1145 | if (kvm_is_error_hva(addr)) { |
990 | get_page(bad_page); | 1146 | get_page(bad_page); |
991 | return page_to_pfn(bad_page); | 1147 | return page_to_pfn(bad_page); |
992 | } | 1148 | } |
993 | 1149 | ||
994 | return hva_to_pfn(kvm, addr); | 1150 | return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable); |
1151 | } | ||
1152 | |||
1153 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) | ||
1154 | { | ||
1155 | return __gfn_to_pfn(kvm, gfn, true, NULL, true, NULL); | ||
1156 | } | ||
1157 | EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); | ||
1158 | |||
1159 | pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async, | ||
1160 | bool write_fault, bool *writable) | ||
1161 | { | ||
1162 | return __gfn_to_pfn(kvm, gfn, false, async, write_fault, writable); | ||
1163 | } | ||
1164 | EXPORT_SYMBOL_GPL(gfn_to_pfn_async); | ||
1165 | |||
1166 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | ||
1167 | { | ||
1168 | return __gfn_to_pfn(kvm, gfn, false, NULL, true, NULL); | ||
995 | } | 1169 | } |
996 | EXPORT_SYMBOL_GPL(gfn_to_pfn); | 1170 | EXPORT_SYMBOL_GPL(gfn_to_pfn); |
997 | 1171 | ||
1172 | pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, | ||
1173 | bool *writable) | ||
1174 | { | ||
1175 | return __gfn_to_pfn(kvm, gfn, false, NULL, write_fault, writable); | ||
1176 | } | ||
1177 | EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); | ||
1178 | |||
998 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | 1179 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, |
999 | struct kvm_memory_slot *slot, gfn_t gfn) | 1180 | struct kvm_memory_slot *slot, gfn_t gfn) |
1000 | { | 1181 | { |
1001 | unsigned long addr = gfn_to_hva_memslot(slot, gfn); | 1182 | unsigned long addr = gfn_to_hva_memslot(slot, gfn); |
1002 | return hva_to_pfn(kvm, addr); | 1183 | return hva_to_pfn(kvm, addr, false, NULL, true, NULL); |
1184 | } | ||
1185 | |||
1186 | int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, | ||
1187 | int nr_pages) | ||
1188 | { | ||
1189 | unsigned long addr; | ||
1190 | gfn_t entry; | ||
1191 | |||
1192 | addr = gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, &entry); | ||
1193 | if (kvm_is_error_hva(addr)) | ||
1194 | return -1; | ||
1195 | |||
1196 | if (entry < nr_pages) | ||
1197 | return 0; | ||
1198 | |||
1199 | return __get_user_pages_fast(addr, nr_pages, 1, pages); | ||
1003 | } | 1200 | } |
1201 | EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic); | ||
1004 | 1202 | ||
1005 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | 1203 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) |
1006 | { | 1204 | { |
@@ -1091,7 +1289,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | |||
1091 | addr = gfn_to_hva(kvm, gfn); | 1289 | addr = gfn_to_hva(kvm, gfn); |
1092 | if (kvm_is_error_hva(addr)) | 1290 | if (kvm_is_error_hva(addr)) |
1093 | return -EFAULT; | 1291 | return -EFAULT; |
1094 | r = copy_from_user(data, (void __user *)addr + offset, len); | 1292 | r = __copy_from_user(data, (void __user *)addr + offset, len); |
1095 | if (r) | 1293 | if (r) |
1096 | return -EFAULT; | 1294 | return -EFAULT; |
1097 | return 0; | 1295 | return 0; |
@@ -1175,9 +1373,51 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, | |||
1175 | return 0; | 1373 | return 0; |
1176 | } | 1374 | } |
1177 | 1375 | ||
1376 | int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | ||
1377 | gpa_t gpa) | ||
1378 | { | ||
1379 | struct kvm_memslots *slots = kvm_memslots(kvm); | ||
1380 | int offset = offset_in_page(gpa); | ||
1381 | gfn_t gfn = gpa >> PAGE_SHIFT; | ||
1382 | |||
1383 | ghc->gpa = gpa; | ||
1384 | ghc->generation = slots->generation; | ||
1385 | ghc->memslot = __gfn_to_memslot(slots, gfn); | ||
1386 | ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL); | ||
1387 | if (!kvm_is_error_hva(ghc->hva)) | ||
1388 | ghc->hva += offset; | ||
1389 | else | ||
1390 | return -EFAULT; | ||
1391 | |||
1392 | return 0; | ||
1393 | } | ||
1394 | EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); | ||
1395 | |||
1396 | int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | ||
1397 | void *data, unsigned long len) | ||
1398 | { | ||
1399 | struct kvm_memslots *slots = kvm_memslots(kvm); | ||
1400 | int r; | ||
1401 | |||
1402 | if (slots->generation != ghc->generation) | ||
1403 | kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa); | ||
1404 | |||
1405 | if (kvm_is_error_hva(ghc->hva)) | ||
1406 | return -EFAULT; | ||
1407 | |||
1408 | r = copy_to_user((void __user *)ghc->hva, data, len); | ||
1409 | if (r) | ||
1410 | return -EFAULT; | ||
1411 | mark_page_dirty_in_slot(kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT); | ||
1412 | |||
1413 | return 0; | ||
1414 | } | ||
1415 | EXPORT_SYMBOL_GPL(kvm_write_guest_cached); | ||
1416 | |||
1178 | int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) | 1417 | int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) |
1179 | { | 1418 | { |
1180 | return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len); | 1419 | return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page, |
1420 | offset, len); | ||
1181 | } | 1421 | } |
1182 | EXPORT_SYMBOL_GPL(kvm_clear_guest_page); | 1422 | EXPORT_SYMBOL_GPL(kvm_clear_guest_page); |
1183 | 1423 | ||
@@ -1200,18 +1440,24 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) | |||
1200 | } | 1440 | } |
1201 | EXPORT_SYMBOL_GPL(kvm_clear_guest); | 1441 | EXPORT_SYMBOL_GPL(kvm_clear_guest); |
1202 | 1442 | ||
1203 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | 1443 | void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, |
1444 | gfn_t gfn) | ||
1204 | { | 1445 | { |
1205 | struct kvm_memory_slot *memslot; | ||
1206 | |||
1207 | memslot = gfn_to_memslot(kvm, gfn); | ||
1208 | if (memslot && memslot->dirty_bitmap) { | 1446 | if (memslot && memslot->dirty_bitmap) { |
1209 | unsigned long rel_gfn = gfn - memslot->base_gfn; | 1447 | unsigned long rel_gfn = gfn - memslot->base_gfn; |
1210 | 1448 | ||
1211 | generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); | 1449 | __set_bit_le(rel_gfn, memslot->dirty_bitmap); |
1212 | } | 1450 | } |
1213 | } | 1451 | } |
1214 | 1452 | ||
1453 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | ||
1454 | { | ||
1455 | struct kvm_memory_slot *memslot; | ||
1456 | |||
1457 | memslot = gfn_to_memslot(kvm, gfn); | ||
1458 | mark_page_dirty_in_slot(kvm, memslot, gfn); | ||
1459 | } | ||
1460 | |||
1215 | /* | 1461 | /* |
1216 | * The vCPU has executed a HLT instruction with in-kernel mode enabled. | 1462 | * The vCPU has executed a HLT instruction with in-kernel mode enabled. |
1217 | */ | 1463 | */ |
@@ -1245,18 +1491,55 @@ void kvm_resched(struct kvm_vcpu *vcpu) | |||
1245 | } | 1491 | } |
1246 | EXPORT_SYMBOL_GPL(kvm_resched); | 1492 | EXPORT_SYMBOL_GPL(kvm_resched); |
1247 | 1493 | ||
1248 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu) | 1494 | void kvm_vcpu_on_spin(struct kvm_vcpu *me) |
1249 | { | 1495 | { |
1250 | ktime_t expires; | 1496 | struct kvm *kvm = me->kvm; |
1251 | DEFINE_WAIT(wait); | 1497 | struct kvm_vcpu *vcpu; |
1252 | 1498 | int last_boosted_vcpu = me->kvm->last_boosted_vcpu; | |
1253 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | 1499 | int yielded = 0; |
1254 | 1500 | int pass; | |
1255 | /* Sleep for 100 us, and hope lock-holder got scheduled */ | 1501 | int i; |
1256 | expires = ktime_add_ns(ktime_get(), 100000UL); | ||
1257 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); | ||
1258 | 1502 | ||
1259 | finish_wait(&vcpu->wq, &wait); | 1503 | /* |
1504 | * We boost the priority of a VCPU that is runnable but not | ||
1505 | * currently running, because it got preempted by something | ||
1506 | * else and called schedule in __vcpu_run. Hopefully that | ||
1507 | * VCPU is holding the lock that we need and will release it. | ||
1508 | * We approximate round-robin by starting at the last boosted VCPU. | ||
1509 | */ | ||
1510 | for (pass = 0; pass < 2 && !yielded; pass++) { | ||
1511 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
1512 | struct task_struct *task = NULL; | ||
1513 | struct pid *pid; | ||
1514 | if (!pass && i < last_boosted_vcpu) { | ||
1515 | i = last_boosted_vcpu; | ||
1516 | continue; | ||
1517 | } else if (pass && i > last_boosted_vcpu) | ||
1518 | break; | ||
1519 | if (vcpu == me) | ||
1520 | continue; | ||
1521 | if (waitqueue_active(&vcpu->wq)) | ||
1522 | continue; | ||
1523 | rcu_read_lock(); | ||
1524 | pid = rcu_dereference(vcpu->pid); | ||
1525 | if (pid) | ||
1526 | task = get_pid_task(vcpu->pid, PIDTYPE_PID); | ||
1527 | rcu_read_unlock(); | ||
1528 | if (!task) | ||
1529 | continue; | ||
1530 | if (task->flags & PF_VCPU) { | ||
1531 | put_task_struct(task); | ||
1532 | continue; | ||
1533 | } | ||
1534 | if (yield_to(task, 1)) { | ||
1535 | put_task_struct(task); | ||
1536 | kvm->last_boosted_vcpu = i; | ||
1537 | yielded = 1; | ||
1538 | break; | ||
1539 | } | ||
1540 | put_task_struct(task); | ||
1541 | } | ||
1542 | } | ||
1260 | } | 1543 | } |
1261 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); | 1544 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); |
1262 | 1545 | ||
@@ -1305,6 +1588,7 @@ static struct file_operations kvm_vcpu_fops = { | |||
1305 | .unlocked_ioctl = kvm_vcpu_ioctl, | 1588 | .unlocked_ioctl = kvm_vcpu_ioctl, |
1306 | .compat_ioctl = kvm_vcpu_ioctl, | 1589 | .compat_ioctl = kvm_vcpu_ioctl, |
1307 | .mmap = kvm_vcpu_mmap, | 1590 | .mmap = kvm_vcpu_mmap, |
1591 | .llseek = noop_llseek, | ||
1308 | }; | 1592 | }; |
1309 | 1593 | ||
1310 | /* | 1594 | /* |
@@ -1412,6 +1696,7 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
1412 | if (arg) | 1696 | if (arg) |
1413 | goto out; | 1697 | goto out; |
1414 | r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); | 1698 | r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); |
1699 | trace_kvm_userspace_exit(vcpu->run->exit_reason, r); | ||
1415 | break; | 1700 | break; |
1416 | case KVM_GET_REGS: { | 1701 | case KVM_GET_REGS: { |
1417 | struct kvm_regs *kvm_regs; | 1702 | struct kvm_regs *kvm_regs; |
@@ -1774,11 +2059,12 @@ static struct file_operations kvm_vm_fops = { | |||
1774 | .compat_ioctl = kvm_vm_compat_ioctl, | 2059 | .compat_ioctl = kvm_vm_compat_ioctl, |
1775 | #endif | 2060 | #endif |
1776 | .mmap = kvm_vm_mmap, | 2061 | .mmap = kvm_vm_mmap, |
2062 | .llseek = noop_llseek, | ||
1777 | }; | 2063 | }; |
1778 | 2064 | ||
1779 | static int kvm_dev_ioctl_create_vm(void) | 2065 | static int kvm_dev_ioctl_create_vm(void) |
1780 | { | 2066 | { |
1781 | int fd, r; | 2067 | int r; |
1782 | struct kvm *kvm; | 2068 | struct kvm *kvm; |
1783 | 2069 | ||
1784 | kvm = kvm_create_vm(); | 2070 | kvm = kvm_create_vm(); |
@@ -1791,11 +2077,11 @@ static int kvm_dev_ioctl_create_vm(void) | |||
1791 | return r; | 2077 | return r; |
1792 | } | 2078 | } |
1793 | #endif | 2079 | #endif |
1794 | fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); | 2080 | r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); |
1795 | if (fd < 0) | 2081 | if (r < 0) |
1796 | kvm_put_kvm(kvm); | 2082 | kvm_put_kvm(kvm); |
1797 | 2083 | ||
1798 | return fd; | 2084 | return r; |
1799 | } | 2085 | } |
1800 | 2086 | ||
1801 | static long kvm_dev_ioctl_check_extension_generic(long arg) | 2087 | static long kvm_dev_ioctl_check_extension_generic(long arg) |
@@ -1867,6 +2153,7 @@ out: | |||
1867 | static struct file_operations kvm_chardev_ops = { | 2153 | static struct file_operations kvm_chardev_ops = { |
1868 | .unlocked_ioctl = kvm_dev_ioctl, | 2154 | .unlocked_ioctl = kvm_dev_ioctl, |
1869 | .compat_ioctl = kvm_dev_ioctl, | 2155 | .compat_ioctl = kvm_dev_ioctl, |
2156 | .llseek = noop_llseek, | ||
1870 | }; | 2157 | }; |
1871 | 2158 | ||
1872 | static struct miscdevice kvm_dev = { | 2159 | static struct miscdevice kvm_dev = { |
@@ -1875,7 +2162,7 @@ static struct miscdevice kvm_dev = { | |||
1875 | &kvm_chardev_ops, | 2162 | &kvm_chardev_ops, |
1876 | }; | 2163 | }; |
1877 | 2164 | ||
1878 | static void hardware_enable(void *junk) | 2165 | static void hardware_enable_nolock(void *junk) |
1879 | { | 2166 | { |
1880 | int cpu = raw_smp_processor_id(); | 2167 | int cpu = raw_smp_processor_id(); |
1881 | int r; | 2168 | int r; |
@@ -1895,7 +2182,14 @@ static void hardware_enable(void *junk) | |||
1895 | } | 2182 | } |
1896 | } | 2183 | } |
1897 | 2184 | ||
1898 | static void hardware_disable(void *junk) | 2185 | static void hardware_enable(void *junk) |
2186 | { | ||
2187 | raw_spin_lock(&kvm_lock); | ||
2188 | hardware_enable_nolock(junk); | ||
2189 | raw_spin_unlock(&kvm_lock); | ||
2190 | } | ||
2191 | |||
2192 | static void hardware_disable_nolock(void *junk) | ||
1899 | { | 2193 | { |
1900 | int cpu = raw_smp_processor_id(); | 2194 | int cpu = raw_smp_processor_id(); |
1901 | 2195 | ||
@@ -1905,32 +2199,39 @@ static void hardware_disable(void *junk) | |||
1905 | kvm_arch_hardware_disable(NULL); | 2199 | kvm_arch_hardware_disable(NULL); |
1906 | } | 2200 | } |
1907 | 2201 | ||
2202 | static void hardware_disable(void *junk) | ||
2203 | { | ||
2204 | raw_spin_lock(&kvm_lock); | ||
2205 | hardware_disable_nolock(junk); | ||
2206 | raw_spin_unlock(&kvm_lock); | ||
2207 | } | ||
2208 | |||
1908 | static void hardware_disable_all_nolock(void) | 2209 | static void hardware_disable_all_nolock(void) |
1909 | { | 2210 | { |
1910 | BUG_ON(!kvm_usage_count); | 2211 | BUG_ON(!kvm_usage_count); |
1911 | 2212 | ||
1912 | kvm_usage_count--; | 2213 | kvm_usage_count--; |
1913 | if (!kvm_usage_count) | 2214 | if (!kvm_usage_count) |
1914 | on_each_cpu(hardware_disable, NULL, 1); | 2215 | on_each_cpu(hardware_disable_nolock, NULL, 1); |
1915 | } | 2216 | } |
1916 | 2217 | ||
1917 | static void hardware_disable_all(void) | 2218 | static void hardware_disable_all(void) |
1918 | { | 2219 | { |
1919 | spin_lock(&kvm_lock); | 2220 | raw_spin_lock(&kvm_lock); |
1920 | hardware_disable_all_nolock(); | 2221 | hardware_disable_all_nolock(); |
1921 | spin_unlock(&kvm_lock); | 2222 | raw_spin_unlock(&kvm_lock); |
1922 | } | 2223 | } |
1923 | 2224 | ||
1924 | static int hardware_enable_all(void) | 2225 | static int hardware_enable_all(void) |
1925 | { | 2226 | { |
1926 | int r = 0; | 2227 | int r = 0; |
1927 | 2228 | ||
1928 | spin_lock(&kvm_lock); | 2229 | raw_spin_lock(&kvm_lock); |
1929 | 2230 | ||
1930 | kvm_usage_count++; | 2231 | kvm_usage_count++; |
1931 | if (kvm_usage_count == 1) { | 2232 | if (kvm_usage_count == 1) { |
1932 | atomic_set(&hardware_enable_failed, 0); | 2233 | atomic_set(&hardware_enable_failed, 0); |
1933 | on_each_cpu(hardware_enable, NULL, 1); | 2234 | on_each_cpu(hardware_enable_nolock, NULL, 1); |
1934 | 2235 | ||
1935 | if (atomic_read(&hardware_enable_failed)) { | 2236 | if (atomic_read(&hardware_enable_failed)) { |
1936 | hardware_disable_all_nolock(); | 2237 | hardware_disable_all_nolock(); |
@@ -1938,7 +2239,7 @@ static int hardware_enable_all(void) | |||
1938 | } | 2239 | } |
1939 | } | 2240 | } |
1940 | 2241 | ||
1941 | spin_unlock(&kvm_lock); | 2242 | raw_spin_unlock(&kvm_lock); |
1942 | 2243 | ||
1943 | return r; | 2244 | return r; |
1944 | } | 2245 | } |
@@ -1968,18 +2269,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | |||
1968 | } | 2269 | } |
1969 | 2270 | ||
1970 | 2271 | ||
1971 | asmlinkage void kvm_handle_fault_on_reboot(void) | 2272 | asmlinkage void kvm_spurious_fault(void) |
1972 | { | 2273 | { |
1973 | if (kvm_rebooting) { | ||
1974 | /* spin while reset goes on */ | ||
1975 | local_irq_enable(); | ||
1976 | while (true) | ||
1977 | ; | ||
1978 | } | ||
1979 | /* Fault while not rebooting. We want the trace. */ | 2274 | /* Fault while not rebooting. We want the trace. */ |
1980 | BUG(); | 2275 | BUG(); |
1981 | } | 2276 | } |
1982 | EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); | 2277 | EXPORT_SYMBOL_GPL(kvm_spurious_fault); |
1983 | 2278 | ||
1984 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, | 2279 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, |
1985 | void *v) | 2280 | void *v) |
@@ -1992,7 +2287,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val, | |||
1992 | */ | 2287 | */ |
1993 | printk(KERN_INFO "kvm: exiting hardware virtualization\n"); | 2288 | printk(KERN_INFO "kvm: exiting hardware virtualization\n"); |
1994 | kvm_rebooting = true; | 2289 | kvm_rebooting = true; |
1995 | on_each_cpu(hardware_disable, NULL, 1); | 2290 | on_each_cpu(hardware_disable_nolock, NULL, 1); |
1996 | return NOTIFY_OK; | 2291 | return NOTIFY_OK; |
1997 | } | 2292 | } |
1998 | 2293 | ||
@@ -2106,10 +2401,10 @@ static int vm_stat_get(void *_offset, u64 *val) | |||
2106 | struct kvm *kvm; | 2401 | struct kvm *kvm; |
2107 | 2402 | ||
2108 | *val = 0; | 2403 | *val = 0; |
2109 | spin_lock(&kvm_lock); | 2404 | raw_spin_lock(&kvm_lock); |
2110 | list_for_each_entry(kvm, &vm_list, vm_list) | 2405 | list_for_each_entry(kvm, &vm_list, vm_list) |
2111 | *val += *(u32 *)((void *)kvm + offset); | 2406 | *val += *(u32 *)((void *)kvm + offset); |
2112 | spin_unlock(&kvm_lock); | 2407 | raw_spin_unlock(&kvm_lock); |
2113 | return 0; | 2408 | return 0; |
2114 | } | 2409 | } |
2115 | 2410 | ||
@@ -2123,12 +2418,12 @@ static int vcpu_stat_get(void *_offset, u64 *val) | |||
2123 | int i; | 2418 | int i; |
2124 | 2419 | ||
2125 | *val = 0; | 2420 | *val = 0; |
2126 | spin_lock(&kvm_lock); | 2421 | raw_spin_lock(&kvm_lock); |
2127 | list_for_each_entry(kvm, &vm_list, vm_list) | 2422 | list_for_each_entry(kvm, &vm_list, vm_list) |
2128 | kvm_for_each_vcpu(i, vcpu, kvm) | 2423 | kvm_for_each_vcpu(i, vcpu, kvm) |
2129 | *val += *(u32 *)((void *)vcpu + offset); | 2424 | *val += *(u32 *)((void *)vcpu + offset); |
2130 | 2425 | ||
2131 | spin_unlock(&kvm_lock); | 2426 | raw_spin_unlock(&kvm_lock); |
2132 | return 0; | 2427 | return 0; |
2133 | } | 2428 | } |
2134 | 2429 | ||
@@ -2159,31 +2454,26 @@ static void kvm_exit_debug(void) | |||
2159 | debugfs_remove(kvm_debugfs_dir); | 2454 | debugfs_remove(kvm_debugfs_dir); |
2160 | } | 2455 | } |
2161 | 2456 | ||
2162 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) | 2457 | static int kvm_suspend(void) |
2163 | { | 2458 | { |
2164 | if (kvm_usage_count) | 2459 | if (kvm_usage_count) |
2165 | hardware_disable(NULL); | 2460 | hardware_disable_nolock(NULL); |
2166 | return 0; | 2461 | return 0; |
2167 | } | 2462 | } |
2168 | 2463 | ||
2169 | static int kvm_resume(struct sys_device *dev) | 2464 | static void kvm_resume(void) |
2170 | { | 2465 | { |
2171 | if (kvm_usage_count) | 2466 | if (kvm_usage_count) { |
2172 | hardware_enable(NULL); | 2467 | WARN_ON(raw_spin_is_locked(&kvm_lock)); |
2173 | return 0; | 2468 | hardware_enable_nolock(NULL); |
2469 | } | ||
2174 | } | 2470 | } |
2175 | 2471 | ||
2176 | static struct sysdev_class kvm_sysdev_class = { | 2472 | static struct syscore_ops kvm_syscore_ops = { |
2177 | .name = "kvm", | ||
2178 | .suspend = kvm_suspend, | 2473 | .suspend = kvm_suspend, |
2179 | .resume = kvm_resume, | 2474 | .resume = kvm_resume, |
2180 | }; | 2475 | }; |
2181 | 2476 | ||
2182 | static struct sys_device kvm_sysdev = { | ||
2183 | .id = 0, | ||
2184 | .cls = &kvm_sysdev_class, | ||
2185 | }; | ||
2186 | |||
2187 | struct page *bad_page; | 2477 | struct page *bad_page; |
2188 | pfn_t bad_pfn; | 2478 | pfn_t bad_pfn; |
2189 | 2479 | ||
@@ -2267,14 +2557,6 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | |||
2267 | goto out_free_2; | 2557 | goto out_free_2; |
2268 | register_reboot_notifier(&kvm_reboot_notifier); | 2558 | register_reboot_notifier(&kvm_reboot_notifier); |
2269 | 2559 | ||
2270 | r = sysdev_class_register(&kvm_sysdev_class); | ||
2271 | if (r) | ||
2272 | goto out_free_3; | ||
2273 | |||
2274 | r = sysdev_register(&kvm_sysdev); | ||
2275 | if (r) | ||
2276 | goto out_free_4; | ||
2277 | |||
2278 | /* A kmem cache lets us meet the alignment requirements of fx_save. */ | 2560 | /* A kmem cache lets us meet the alignment requirements of fx_save. */ |
2279 | if (!vcpu_align) | 2561 | if (!vcpu_align) |
2280 | vcpu_align = __alignof__(struct kvm_vcpu); | 2562 | vcpu_align = __alignof__(struct kvm_vcpu); |
@@ -2282,9 +2564,13 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | |||
2282 | 0, NULL); | 2564 | 0, NULL); |
2283 | if (!kvm_vcpu_cache) { | 2565 | if (!kvm_vcpu_cache) { |
2284 | r = -ENOMEM; | 2566 | r = -ENOMEM; |
2285 | goto out_free_5; | 2567 | goto out_free_3; |
2286 | } | 2568 | } |
2287 | 2569 | ||
2570 | r = kvm_async_pf_init(); | ||
2571 | if (r) | ||
2572 | goto out_free; | ||
2573 | |||
2288 | kvm_chardev_ops.owner = module; | 2574 | kvm_chardev_ops.owner = module; |
2289 | kvm_vm_fops.owner = module; | 2575 | kvm_vm_fops.owner = module; |
2290 | kvm_vcpu_fops.owner = module; | 2576 | kvm_vcpu_fops.owner = module; |
@@ -2292,9 +2578,11 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | |||
2292 | r = misc_register(&kvm_dev); | 2578 | r = misc_register(&kvm_dev); |
2293 | if (r) { | 2579 | if (r) { |
2294 | printk(KERN_ERR "kvm: misc device register failed\n"); | 2580 | printk(KERN_ERR "kvm: misc device register failed\n"); |
2295 | goto out_free; | 2581 | goto out_unreg; |
2296 | } | 2582 | } |
2297 | 2583 | ||
2584 | register_syscore_ops(&kvm_syscore_ops); | ||
2585 | |||
2298 | kvm_preempt_ops.sched_in = kvm_sched_in; | 2586 | kvm_preempt_ops.sched_in = kvm_sched_in; |
2299 | kvm_preempt_ops.sched_out = kvm_sched_out; | 2587 | kvm_preempt_ops.sched_out = kvm_sched_out; |
2300 | 2588 | ||
@@ -2302,12 +2590,10 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | |||
2302 | 2590 | ||
2303 | return 0; | 2591 | return 0; |
2304 | 2592 | ||
2593 | out_unreg: | ||
2594 | kvm_async_pf_deinit(); | ||
2305 | out_free: | 2595 | out_free: |
2306 | kmem_cache_destroy(kvm_vcpu_cache); | 2596 | kmem_cache_destroy(kvm_vcpu_cache); |
2307 | out_free_5: | ||
2308 | sysdev_unregister(&kvm_sysdev); | ||
2309 | out_free_4: | ||
2310 | sysdev_class_unregister(&kvm_sysdev_class); | ||
2311 | out_free_3: | 2597 | out_free_3: |
2312 | unregister_reboot_notifier(&kvm_reboot_notifier); | 2598 | unregister_reboot_notifier(&kvm_reboot_notifier); |
2313 | unregister_cpu_notifier(&kvm_cpu_notifier); | 2599 | unregister_cpu_notifier(&kvm_cpu_notifier); |
@@ -2334,11 +2620,11 @@ void kvm_exit(void) | |||
2334 | kvm_exit_debug(); | 2620 | kvm_exit_debug(); |
2335 | misc_deregister(&kvm_dev); | 2621 | misc_deregister(&kvm_dev); |
2336 | kmem_cache_destroy(kvm_vcpu_cache); | 2622 | kmem_cache_destroy(kvm_vcpu_cache); |
2337 | sysdev_unregister(&kvm_sysdev); | 2623 | kvm_async_pf_deinit(); |
2338 | sysdev_class_unregister(&kvm_sysdev_class); | 2624 | unregister_syscore_ops(&kvm_syscore_ops); |
2339 | unregister_reboot_notifier(&kvm_reboot_notifier); | 2625 | unregister_reboot_notifier(&kvm_reboot_notifier); |
2340 | unregister_cpu_notifier(&kvm_cpu_notifier); | 2626 | unregister_cpu_notifier(&kvm_cpu_notifier); |
2341 | on_each_cpu(hardware_disable, NULL, 1); | 2627 | on_each_cpu(hardware_disable_nolock, NULL, 1); |
2342 | kvm_arch_hardware_unsetup(); | 2628 | kvm_arch_hardware_unsetup(); |
2343 | kvm_arch_exit(); | 2629 | kvm_arch_exit(); |
2344 | free_cpumask_var(cpus_hardware_enabled); | 2630 | free_cpumask_var(cpus_hardware_enabled); |