diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 153 |
1 files changed, 130 insertions, 23 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d4eae6af0738..904d7b7bd780 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -47,6 +47,10 @@ | |||
47 | #include <asm/uaccess.h> | 47 | #include <asm/uaccess.h> |
48 | #include <asm/pgtable.h> | 48 | #include <asm/pgtable.h> |
49 | 49 | ||
50 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
51 | #include "coalesced_mmio.h" | ||
52 | #endif | ||
53 | |||
50 | MODULE_AUTHOR("Qumranet"); | 54 | MODULE_AUTHOR("Qumranet"); |
51 | MODULE_LICENSE("GPL"); | 55 | MODULE_LICENSE("GPL"); |
52 | 56 | ||
@@ -65,6 +69,8 @@ struct dentry *kvm_debugfs_dir; | |||
65 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | 69 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, |
66 | unsigned long arg); | 70 | unsigned long arg); |
67 | 71 | ||
72 | bool kvm_rebooting; | ||
73 | |||
68 | static inline int valid_vcpu(int n) | 74 | static inline int valid_vcpu(int n) |
69 | { | 75 | { |
70 | return likely(n >= 0 && n < KVM_MAX_VCPUS); | 76 | return likely(n >= 0 && n < KVM_MAX_VCPUS); |
@@ -99,10 +105,11 @@ static void ack_flush(void *_completed) | |||
99 | 105 | ||
100 | void kvm_flush_remote_tlbs(struct kvm *kvm) | 106 | void kvm_flush_remote_tlbs(struct kvm *kvm) |
101 | { | 107 | { |
102 | int i, cpu; | 108 | int i, cpu, me; |
103 | cpumask_t cpus; | 109 | cpumask_t cpus; |
104 | struct kvm_vcpu *vcpu; | 110 | struct kvm_vcpu *vcpu; |
105 | 111 | ||
112 | me = get_cpu(); | ||
106 | cpus_clear(cpus); | 113 | cpus_clear(cpus); |
107 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | 114 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { |
108 | vcpu = kvm->vcpus[i]; | 115 | vcpu = kvm->vcpus[i]; |
@@ -111,21 +118,24 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) | |||
111 | if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) | 118 | if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) |
112 | continue; | 119 | continue; |
113 | cpu = vcpu->cpu; | 120 | cpu = vcpu->cpu; |
114 | if (cpu != -1 && cpu != raw_smp_processor_id()) | 121 | if (cpu != -1 && cpu != me) |
115 | cpu_set(cpu, cpus); | 122 | cpu_set(cpu, cpus); |
116 | } | 123 | } |
117 | if (cpus_empty(cpus)) | 124 | if (cpus_empty(cpus)) |
118 | return; | 125 | goto out; |
119 | ++kvm->stat.remote_tlb_flush; | 126 | ++kvm->stat.remote_tlb_flush; |
120 | smp_call_function_mask(cpus, ack_flush, NULL, 1); | 127 | smp_call_function_mask(cpus, ack_flush, NULL, 1); |
128 | out: | ||
129 | put_cpu(); | ||
121 | } | 130 | } |
122 | 131 | ||
123 | void kvm_reload_remote_mmus(struct kvm *kvm) | 132 | void kvm_reload_remote_mmus(struct kvm *kvm) |
124 | { | 133 | { |
125 | int i, cpu; | 134 | int i, cpu, me; |
126 | cpumask_t cpus; | 135 | cpumask_t cpus; |
127 | struct kvm_vcpu *vcpu; | 136 | struct kvm_vcpu *vcpu; |
128 | 137 | ||
138 | me = get_cpu(); | ||
129 | cpus_clear(cpus); | 139 | cpus_clear(cpus); |
130 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | 140 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { |
131 | vcpu = kvm->vcpus[i]; | 141 | vcpu = kvm->vcpus[i]; |
@@ -134,12 +144,14 @@ void kvm_reload_remote_mmus(struct kvm *kvm) | |||
134 | if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | 144 | if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) |
135 | continue; | 145 | continue; |
136 | cpu = vcpu->cpu; | 146 | cpu = vcpu->cpu; |
137 | if (cpu != -1 && cpu != raw_smp_processor_id()) | 147 | if (cpu != -1 && cpu != me) |
138 | cpu_set(cpu, cpus); | 148 | cpu_set(cpu, cpus); |
139 | } | 149 | } |
140 | if (cpus_empty(cpus)) | 150 | if (cpus_empty(cpus)) |
141 | return; | 151 | goto out; |
142 | smp_call_function_mask(cpus, ack_flush, NULL, 1); | 152 | smp_call_function_mask(cpus, ack_flush, NULL, 1); |
153 | out: | ||
154 | put_cpu(); | ||
143 | } | 155 | } |
144 | 156 | ||
145 | 157 | ||
@@ -183,10 +195,23 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); | |||
183 | static struct kvm *kvm_create_vm(void) | 195 | static struct kvm *kvm_create_vm(void) |
184 | { | 196 | { |
185 | struct kvm *kvm = kvm_arch_create_vm(); | 197 | struct kvm *kvm = kvm_arch_create_vm(); |
198 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
199 | struct page *page; | ||
200 | #endif | ||
186 | 201 | ||
187 | if (IS_ERR(kvm)) | 202 | if (IS_ERR(kvm)) |
188 | goto out; | 203 | goto out; |
189 | 204 | ||
205 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
206 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
207 | if (!page) { | ||
208 | kfree(kvm); | ||
209 | return ERR_PTR(-ENOMEM); | ||
210 | } | ||
211 | kvm->coalesced_mmio_ring = | ||
212 | (struct kvm_coalesced_mmio_ring *)page_address(page); | ||
213 | #endif | ||
214 | |||
190 | kvm->mm = current->mm; | 215 | kvm->mm = current->mm; |
191 | atomic_inc(&kvm->mm->mm_count); | 216 | atomic_inc(&kvm->mm->mm_count); |
192 | spin_lock_init(&kvm->mmu_lock); | 217 | spin_lock_init(&kvm->mmu_lock); |
@@ -198,6 +223,9 @@ static struct kvm *kvm_create_vm(void) | |||
198 | spin_lock(&kvm_lock); | 223 | spin_lock(&kvm_lock); |
199 | list_add(&kvm->vm_list, &vm_list); | 224 | list_add(&kvm->vm_list, &vm_list); |
200 | spin_unlock(&kvm_lock); | 225 | spin_unlock(&kvm_lock); |
226 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
227 | kvm_coalesced_mmio_init(kvm); | ||
228 | #endif | ||
201 | out: | 229 | out: |
202 | return kvm; | 230 | return kvm; |
203 | } | 231 | } |
@@ -240,6 +268,10 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
240 | spin_unlock(&kvm_lock); | 268 | spin_unlock(&kvm_lock); |
241 | kvm_io_bus_destroy(&kvm->pio_bus); | 269 | kvm_io_bus_destroy(&kvm->pio_bus); |
242 | kvm_io_bus_destroy(&kvm->mmio_bus); | 270 | kvm_io_bus_destroy(&kvm->mmio_bus); |
271 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
272 | if (kvm->coalesced_mmio_ring != NULL) | ||
273 | free_page((unsigned long)kvm->coalesced_mmio_ring); | ||
274 | #endif | ||
243 | kvm_arch_destroy_vm(kvm); | 275 | kvm_arch_destroy_vm(kvm); |
244 | mmdrop(mm); | 276 | mmdrop(mm); |
245 | } | 277 | } |
@@ -333,6 +365,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
333 | r = -ENOMEM; | 365 | r = -ENOMEM; |
334 | 366 | ||
335 | /* Allocate if a slot is being created */ | 367 | /* Allocate if a slot is being created */ |
368 | #ifndef CONFIG_S390 | ||
336 | if (npages && !new.rmap) { | 369 | if (npages && !new.rmap) { |
337 | new.rmap = vmalloc(npages * sizeof(struct page *)); | 370 | new.rmap = vmalloc(npages * sizeof(struct page *)); |
338 | 371 | ||
@@ -373,10 +406,14 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
373 | goto out_free; | 406 | goto out_free; |
374 | memset(new.dirty_bitmap, 0, dirty_bytes); | 407 | memset(new.dirty_bitmap, 0, dirty_bytes); |
375 | } | 408 | } |
409 | #endif /* not defined CONFIG_S390 */ | ||
376 | 410 | ||
377 | if (mem->slot >= kvm->nmemslots) | 411 | if (mem->slot >= kvm->nmemslots) |
378 | kvm->nmemslots = mem->slot + 1; | 412 | kvm->nmemslots = mem->slot + 1; |
379 | 413 | ||
414 | if (!npages) | ||
415 | kvm_arch_flush_shadow(kvm); | ||
416 | |||
380 | *memslot = new; | 417 | *memslot = new; |
381 | 418 | ||
382 | r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); | 419 | r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); |
@@ -532,6 +569,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | |||
532 | struct page *page[1]; | 569 | struct page *page[1]; |
533 | unsigned long addr; | 570 | unsigned long addr; |
534 | int npages; | 571 | int npages; |
572 | pfn_t pfn; | ||
535 | 573 | ||
536 | might_sleep(); | 574 | might_sleep(); |
537 | 575 | ||
@@ -544,19 +582,38 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | |||
544 | npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, | 582 | npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, |
545 | NULL); | 583 | NULL); |
546 | 584 | ||
547 | if (npages != 1) { | 585 | if (unlikely(npages != 1)) { |
548 | get_page(bad_page); | 586 | struct vm_area_struct *vma; |
549 | return page_to_pfn(bad_page); | ||
550 | } | ||
551 | 587 | ||
552 | return page_to_pfn(page[0]); | 588 | vma = find_vma(current->mm, addr); |
589 | if (vma == NULL || addr < vma->vm_start || | ||
590 | !(vma->vm_flags & VM_PFNMAP)) { | ||
591 | get_page(bad_page); | ||
592 | return page_to_pfn(bad_page); | ||
593 | } | ||
594 | |||
595 | pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | ||
596 | BUG_ON(pfn_valid(pfn)); | ||
597 | } else | ||
598 | pfn = page_to_pfn(page[0]); | ||
599 | |||
600 | return pfn; | ||
553 | } | 601 | } |
554 | 602 | ||
555 | EXPORT_SYMBOL_GPL(gfn_to_pfn); | 603 | EXPORT_SYMBOL_GPL(gfn_to_pfn); |
556 | 604 | ||
557 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | 605 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) |
558 | { | 606 | { |
559 | return pfn_to_page(gfn_to_pfn(kvm, gfn)); | 607 | pfn_t pfn; |
608 | |||
609 | pfn = gfn_to_pfn(kvm, gfn); | ||
610 | if (pfn_valid(pfn)) | ||
611 | return pfn_to_page(pfn); | ||
612 | |||
613 | WARN_ON(!pfn_valid(pfn)); | ||
614 | |||
615 | get_page(bad_page); | ||
616 | return bad_page; | ||
560 | } | 617 | } |
561 | 618 | ||
562 | EXPORT_SYMBOL_GPL(gfn_to_page); | 619 | EXPORT_SYMBOL_GPL(gfn_to_page); |
@@ -569,7 +626,8 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean); | |||
569 | 626 | ||
570 | void kvm_release_pfn_clean(pfn_t pfn) | 627 | void kvm_release_pfn_clean(pfn_t pfn) |
571 | { | 628 | { |
572 | put_page(pfn_to_page(pfn)); | 629 | if (pfn_valid(pfn)) |
630 | put_page(pfn_to_page(pfn)); | ||
573 | } | 631 | } |
574 | EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); | 632 | EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); |
575 | 633 | ||
@@ -594,21 +652,25 @@ EXPORT_SYMBOL_GPL(kvm_set_page_dirty); | |||
594 | 652 | ||
595 | void kvm_set_pfn_dirty(pfn_t pfn) | 653 | void kvm_set_pfn_dirty(pfn_t pfn) |
596 | { | 654 | { |
597 | struct page *page = pfn_to_page(pfn); | 655 | if (pfn_valid(pfn)) { |
598 | if (!PageReserved(page)) | 656 | struct page *page = pfn_to_page(pfn); |
599 | SetPageDirty(page); | 657 | if (!PageReserved(page)) |
658 | SetPageDirty(page); | ||
659 | } | ||
600 | } | 660 | } |
601 | EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); | 661 | EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); |
602 | 662 | ||
603 | void kvm_set_pfn_accessed(pfn_t pfn) | 663 | void kvm_set_pfn_accessed(pfn_t pfn) |
604 | { | 664 | { |
605 | mark_page_accessed(pfn_to_page(pfn)); | 665 | if (pfn_valid(pfn)) |
666 | mark_page_accessed(pfn_to_page(pfn)); | ||
606 | } | 667 | } |
607 | EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); | 668 | EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); |
608 | 669 | ||
609 | void kvm_get_pfn(pfn_t pfn) | 670 | void kvm_get_pfn(pfn_t pfn) |
610 | { | 671 | { |
611 | get_page(pfn_to_page(pfn)); | 672 | if (pfn_valid(pfn)) |
673 | get_page(pfn_to_page(pfn)); | ||
612 | } | 674 | } |
613 | EXPORT_SYMBOL_GPL(kvm_get_pfn); | 675 | EXPORT_SYMBOL_GPL(kvm_get_pfn); |
614 | 676 | ||
@@ -799,6 +861,10 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
799 | else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) | 861 | else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) |
800 | page = virt_to_page(vcpu->arch.pio_data); | 862 | page = virt_to_page(vcpu->arch.pio_data); |
801 | #endif | 863 | #endif |
864 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
865 | else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) | ||
866 | page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); | ||
867 | #endif | ||
802 | else | 868 | else |
803 | return VM_FAULT_SIGBUS; | 869 | return VM_FAULT_SIGBUS; |
804 | get_page(page); | 870 | get_page(page); |
@@ -1121,6 +1187,32 @@ static long kvm_vm_ioctl(struct file *filp, | |||
1121 | goto out; | 1187 | goto out; |
1122 | break; | 1188 | break; |
1123 | } | 1189 | } |
1190 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
1191 | case KVM_REGISTER_COALESCED_MMIO: { | ||
1192 | struct kvm_coalesced_mmio_zone zone; | ||
1193 | r = -EFAULT; | ||
1194 | if (copy_from_user(&zone, argp, sizeof zone)) | ||
1195 | goto out; | ||
1196 | r = -ENXIO; | ||
1197 | r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); | ||
1198 | if (r) | ||
1199 | goto out; | ||
1200 | r = 0; | ||
1201 | break; | ||
1202 | } | ||
1203 | case KVM_UNREGISTER_COALESCED_MMIO: { | ||
1204 | struct kvm_coalesced_mmio_zone zone; | ||
1205 | r = -EFAULT; | ||
1206 | if (copy_from_user(&zone, argp, sizeof zone)) | ||
1207 | goto out; | ||
1208 | r = -ENXIO; | ||
1209 | r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); | ||
1210 | if (r) | ||
1211 | goto out; | ||
1212 | r = 0; | ||
1213 | break; | ||
1214 | } | ||
1215 | #endif | ||
1124 | default: | 1216 | default: |
1125 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); | 1217 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); |
1126 | } | 1218 | } |
@@ -1179,7 +1271,6 @@ static int kvm_dev_ioctl_create_vm(void) | |||
1179 | static long kvm_dev_ioctl(struct file *filp, | 1271 | static long kvm_dev_ioctl(struct file *filp, |
1180 | unsigned int ioctl, unsigned long arg) | 1272 | unsigned int ioctl, unsigned long arg) |
1181 | { | 1273 | { |
1182 | void __user *argp = (void __user *)arg; | ||
1183 | long r = -EINVAL; | 1274 | long r = -EINVAL; |
1184 | 1275 | ||
1185 | switch (ioctl) { | 1276 | switch (ioctl) { |
@@ -1196,7 +1287,7 @@ static long kvm_dev_ioctl(struct file *filp, | |||
1196 | r = kvm_dev_ioctl_create_vm(); | 1287 | r = kvm_dev_ioctl_create_vm(); |
1197 | break; | 1288 | break; |
1198 | case KVM_CHECK_EXTENSION: | 1289 | case KVM_CHECK_EXTENSION: |
1199 | r = kvm_dev_ioctl_check_extension((long)argp); | 1290 | r = kvm_dev_ioctl_check_extension(arg); |
1200 | break; | 1291 | break; |
1201 | case KVM_GET_VCPU_MMAP_SIZE: | 1292 | case KVM_GET_VCPU_MMAP_SIZE: |
1202 | r = -EINVAL; | 1293 | r = -EINVAL; |
@@ -1206,6 +1297,9 @@ static long kvm_dev_ioctl(struct file *filp, | |||
1206 | #ifdef CONFIG_X86 | 1297 | #ifdef CONFIG_X86 |
1207 | r += PAGE_SIZE; /* pio data page */ | 1298 | r += PAGE_SIZE; /* pio data page */ |
1208 | #endif | 1299 | #endif |
1300 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
1301 | r += PAGE_SIZE; /* coalesced mmio ring page */ | ||
1302 | #endif | ||
1209 | break; | 1303 | break; |
1210 | case KVM_TRACE_ENABLE: | 1304 | case KVM_TRACE_ENABLE: |
1211 | case KVM_TRACE_PAUSE: | 1305 | case KVM_TRACE_PAUSE: |
@@ -1247,7 +1341,6 @@ static void hardware_disable(void *junk) | |||
1247 | if (!cpu_isset(cpu, cpus_hardware_enabled)) | 1341 | if (!cpu_isset(cpu, cpus_hardware_enabled)) |
1248 | return; | 1342 | return; |
1249 | cpu_clear(cpu, cpus_hardware_enabled); | 1343 | cpu_clear(cpu, cpus_hardware_enabled); |
1250 | decache_vcpus_on_cpu(cpu); | ||
1251 | kvm_arch_hardware_disable(NULL); | 1344 | kvm_arch_hardware_disable(NULL); |
1252 | } | 1345 | } |
1253 | 1346 | ||
@@ -1277,6 +1370,18 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | |||
1277 | return NOTIFY_OK; | 1370 | return NOTIFY_OK; |
1278 | } | 1371 | } |
1279 | 1372 | ||
1373 | |||
1374 | asmlinkage void kvm_handle_fault_on_reboot(void) | ||
1375 | { | ||
1376 | if (kvm_rebooting) | ||
1377 | /* spin while reset goes on */ | ||
1378 | while (true) | ||
1379 | ; | ||
1380 | /* Fault while not rebooting. We want the trace. */ | ||
1381 | BUG(); | ||
1382 | } | ||
1383 | EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); | ||
1384 | |||
1280 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, | 1385 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, |
1281 | void *v) | 1386 | void *v) |
1282 | { | 1387 | { |
@@ -1286,6 +1391,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val, | |||
1286 | * in vmx root mode. | 1391 | * in vmx root mode. |
1287 | */ | 1392 | */ |
1288 | printk(KERN_INFO "kvm: exiting hardware virtualization\n"); | 1393 | printk(KERN_INFO "kvm: exiting hardware virtualization\n"); |
1394 | kvm_rebooting = true; | ||
1289 | on_each_cpu(hardware_disable, NULL, 1); | 1395 | on_each_cpu(hardware_disable, NULL, 1); |
1290 | } | 1396 | } |
1291 | return NOTIFY_OK; | 1397 | return NOTIFY_OK; |
@@ -1312,14 +1418,15 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus) | |||
1312 | } | 1418 | } |
1313 | } | 1419 | } |
1314 | 1420 | ||
1315 | struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr) | 1421 | struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, |
1422 | gpa_t addr, int len, int is_write) | ||
1316 | { | 1423 | { |
1317 | int i; | 1424 | int i; |
1318 | 1425 | ||
1319 | for (i = 0; i < bus->dev_count; i++) { | 1426 | for (i = 0; i < bus->dev_count; i++) { |
1320 | struct kvm_io_device *pos = bus->devs[i]; | 1427 | struct kvm_io_device *pos = bus->devs[i]; |
1321 | 1428 | ||
1322 | if (pos->in_range(pos, addr)) | 1429 | if (pos->in_range(pos, addr, len, is_write)) |
1323 | return pos; | 1430 | return pos; |
1324 | } | 1431 | } |
1325 | 1432 | ||