diff options
Diffstat (limited to 'virt/kvm')
| -rw-r--r-- | virt/kvm/kvm_main.c | 230 | ||||
| -rw-r--r-- | virt/kvm/kvm_trace.c | 276 |
2 files changed, 473 insertions, 33 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b2e12893e3f4..c82cf15730a1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | #include <linux/kvm_para.h> | 40 | #include <linux/kvm_para.h> |
| 41 | #include <linux/pagemap.h> | 41 | #include <linux/pagemap.h> |
| 42 | #include <linux/mman.h> | 42 | #include <linux/mman.h> |
| 43 | #include <linux/swap.h> | ||
| 43 | 44 | ||
| 44 | #include <asm/processor.h> | 45 | #include <asm/processor.h> |
| 45 | #include <asm/io.h> | 46 | #include <asm/io.h> |
| @@ -59,7 +60,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_cache); | |||
| 59 | 60 | ||
| 60 | static __read_mostly struct preempt_ops kvm_preempt_ops; | 61 | static __read_mostly struct preempt_ops kvm_preempt_ops; |
| 61 | 62 | ||
| 62 | static struct dentry *debugfs_dir; | 63 | struct dentry *kvm_debugfs_dir; |
| 63 | 64 | ||
| 64 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | 65 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, |
| 65 | unsigned long arg); | 66 | unsigned long arg); |
| @@ -119,6 +120,29 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) | |||
| 119 | smp_call_function_mask(cpus, ack_flush, NULL, 1); | 120 | smp_call_function_mask(cpus, ack_flush, NULL, 1); |
| 120 | } | 121 | } |
| 121 | 122 | ||
| 123 | void kvm_reload_remote_mmus(struct kvm *kvm) | ||
| 124 | { | ||
| 125 | int i, cpu; | ||
| 126 | cpumask_t cpus; | ||
| 127 | struct kvm_vcpu *vcpu; | ||
| 128 | |||
| 129 | cpus_clear(cpus); | ||
| 130 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
| 131 | vcpu = kvm->vcpus[i]; | ||
| 132 | if (!vcpu) | ||
| 133 | continue; | ||
| 134 | if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | ||
| 135 | continue; | ||
| 136 | cpu = vcpu->cpu; | ||
| 137 | if (cpu != -1 && cpu != raw_smp_processor_id()) | ||
| 138 | cpu_set(cpu, cpus); | ||
| 139 | } | ||
| 140 | if (cpus_empty(cpus)) | ||
| 141 | return; | ||
| 142 | smp_call_function_mask(cpus, ack_flush, NULL, 1); | ||
| 143 | } | ||
| 144 | |||
| 145 | |||
| 122 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | 146 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) |
| 123 | { | 147 | { |
| 124 | struct page *page; | 148 | struct page *page; |
| @@ -170,6 +194,7 @@ static struct kvm *kvm_create_vm(void) | |||
| 170 | mutex_init(&kvm->lock); | 194 | mutex_init(&kvm->lock); |
| 171 | kvm_io_bus_init(&kvm->mmio_bus); | 195 | kvm_io_bus_init(&kvm->mmio_bus); |
| 172 | init_rwsem(&kvm->slots_lock); | 196 | init_rwsem(&kvm->slots_lock); |
| 197 | atomic_set(&kvm->users_count, 1); | ||
| 173 | spin_lock(&kvm_lock); | 198 | spin_lock(&kvm_lock); |
| 174 | list_add(&kvm->vm_list, &vm_list); | 199 | list_add(&kvm->vm_list, &vm_list); |
| 175 | spin_unlock(&kvm_lock); | 200 | spin_unlock(&kvm_lock); |
| @@ -189,9 +214,13 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | |||
| 189 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) | 214 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) |
| 190 | vfree(free->dirty_bitmap); | 215 | vfree(free->dirty_bitmap); |
| 191 | 216 | ||
| 217 | if (!dont || free->lpage_info != dont->lpage_info) | ||
| 218 | vfree(free->lpage_info); | ||
| 219 | |||
| 192 | free->npages = 0; | 220 | free->npages = 0; |
| 193 | free->dirty_bitmap = NULL; | 221 | free->dirty_bitmap = NULL; |
| 194 | free->rmap = NULL; | 222 | free->rmap = NULL; |
| 223 | free->lpage_info = NULL; | ||
| 195 | } | 224 | } |
| 196 | 225 | ||
| 197 | void kvm_free_physmem(struct kvm *kvm) | 226 | void kvm_free_physmem(struct kvm *kvm) |
| @@ -215,11 +244,25 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
| 215 | mmdrop(mm); | 244 | mmdrop(mm); |
| 216 | } | 245 | } |
| 217 | 246 | ||
| 247 | void kvm_get_kvm(struct kvm *kvm) | ||
| 248 | { | ||
| 249 | atomic_inc(&kvm->users_count); | ||
| 250 | } | ||
| 251 | EXPORT_SYMBOL_GPL(kvm_get_kvm); | ||
| 252 | |||
| 253 | void kvm_put_kvm(struct kvm *kvm) | ||
| 254 | { | ||
| 255 | if (atomic_dec_and_test(&kvm->users_count)) | ||
| 256 | kvm_destroy_vm(kvm); | ||
| 257 | } | ||
| 258 | EXPORT_SYMBOL_GPL(kvm_put_kvm); | ||
| 259 | |||
| 260 | |||
| 218 | static int kvm_vm_release(struct inode *inode, struct file *filp) | 261 | static int kvm_vm_release(struct inode *inode, struct file *filp) |
| 219 | { | 262 | { |
| 220 | struct kvm *kvm = filp->private_data; | 263 | struct kvm *kvm = filp->private_data; |
| 221 | 264 | ||
| 222 | kvm_destroy_vm(kvm); | 265 | kvm_put_kvm(kvm); |
| 223 | return 0; | 266 | return 0; |
| 224 | } | 267 | } |
| 225 | 268 | ||
| @@ -301,6 +344,25 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 301 | new.user_alloc = user_alloc; | 344 | new.user_alloc = user_alloc; |
| 302 | new.userspace_addr = mem->userspace_addr; | 345 | new.userspace_addr = mem->userspace_addr; |
| 303 | } | 346 | } |
| 347 | if (npages && !new.lpage_info) { | ||
| 348 | int largepages = npages / KVM_PAGES_PER_HPAGE; | ||
| 349 | if (npages % KVM_PAGES_PER_HPAGE) | ||
| 350 | largepages++; | ||
| 351 | if (base_gfn % KVM_PAGES_PER_HPAGE) | ||
| 352 | largepages++; | ||
| 353 | |||
| 354 | new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info)); | ||
| 355 | |||
| 356 | if (!new.lpage_info) | ||
| 357 | goto out_free; | ||
| 358 | |||
| 359 | memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info)); | ||
| 360 | |||
| 361 | if (base_gfn % KVM_PAGES_PER_HPAGE) | ||
| 362 | new.lpage_info[0].write_count = 1; | ||
| 363 | if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) | ||
| 364 | new.lpage_info[largepages-1].write_count = 1; | ||
| 365 | } | ||
| 304 | 366 | ||
| 305 | /* Allocate page dirty bitmap if needed */ | 367 | /* Allocate page dirty bitmap if needed */ |
| 306 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { | 368 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { |
| @@ -397,6 +459,12 @@ int is_error_page(struct page *page) | |||
| 397 | } | 459 | } |
| 398 | EXPORT_SYMBOL_GPL(is_error_page); | 460 | EXPORT_SYMBOL_GPL(is_error_page); |
| 399 | 461 | ||
| 462 | int is_error_pfn(pfn_t pfn) | ||
| 463 | { | ||
| 464 | return pfn == bad_pfn; | ||
| 465 | } | ||
| 466 | EXPORT_SYMBOL_GPL(is_error_pfn); | ||
| 467 | |||
| 400 | static inline unsigned long bad_hva(void) | 468 | static inline unsigned long bad_hva(void) |
| 401 | { | 469 | { |
| 402 | return PAGE_OFFSET; | 470 | return PAGE_OFFSET; |
| @@ -444,7 +512,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | |||
| 444 | } | 512 | } |
| 445 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); | 513 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); |
| 446 | 514 | ||
| 447 | static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | 515 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) |
| 448 | { | 516 | { |
| 449 | struct kvm_memory_slot *slot; | 517 | struct kvm_memory_slot *slot; |
| 450 | 518 | ||
| @@ -458,7 +526,7 @@ static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | |||
| 458 | /* | 526 | /* |
| 459 | * Requires current->mm->mmap_sem to be held | 527 | * Requires current->mm->mmap_sem to be held |
| 460 | */ | 528 | */ |
| 461 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | 529 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) |
| 462 | { | 530 | { |
| 463 | struct page *page[1]; | 531 | struct page *page[1]; |
| 464 | unsigned long addr; | 532 | unsigned long addr; |
| @@ -469,7 +537,7 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | |||
| 469 | addr = gfn_to_hva(kvm, gfn); | 537 | addr = gfn_to_hva(kvm, gfn); |
| 470 | if (kvm_is_error_hva(addr)) { | 538 | if (kvm_is_error_hva(addr)) { |
| 471 | get_page(bad_page); | 539 | get_page(bad_page); |
| 472 | return bad_page; | 540 | return page_to_pfn(bad_page); |
| 473 | } | 541 | } |
| 474 | 542 | ||
| 475 | npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, | 543 | npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, |
| @@ -477,27 +545,71 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | |||
| 477 | 545 | ||
| 478 | if (npages != 1) { | 546 | if (npages != 1) { |
| 479 | get_page(bad_page); | 547 | get_page(bad_page); |
| 480 | return bad_page; | 548 | return page_to_pfn(bad_page); |
| 481 | } | 549 | } |
| 482 | 550 | ||
| 483 | return page[0]; | 551 | return page_to_pfn(page[0]); |
| 552 | } | ||
| 553 | |||
| 554 | EXPORT_SYMBOL_GPL(gfn_to_pfn); | ||
| 555 | |||
| 556 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | ||
| 557 | { | ||
| 558 | return pfn_to_page(gfn_to_pfn(kvm, gfn)); | ||
| 484 | } | 559 | } |
| 485 | 560 | ||
| 486 | EXPORT_SYMBOL_GPL(gfn_to_page); | 561 | EXPORT_SYMBOL_GPL(gfn_to_page); |
| 487 | 562 | ||
| 488 | void kvm_release_page_clean(struct page *page) | 563 | void kvm_release_page_clean(struct page *page) |
| 489 | { | 564 | { |
| 490 | put_page(page); | 565 | kvm_release_pfn_clean(page_to_pfn(page)); |
| 491 | } | 566 | } |
| 492 | EXPORT_SYMBOL_GPL(kvm_release_page_clean); | 567 | EXPORT_SYMBOL_GPL(kvm_release_page_clean); |
| 493 | 568 | ||
| 569 | void kvm_release_pfn_clean(pfn_t pfn) | ||
| 570 | { | ||
| 571 | put_page(pfn_to_page(pfn)); | ||
| 572 | } | ||
| 573 | EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); | ||
| 574 | |||
| 494 | void kvm_release_page_dirty(struct page *page) | 575 | void kvm_release_page_dirty(struct page *page) |
| 495 | { | 576 | { |
| 577 | kvm_release_pfn_dirty(page_to_pfn(page)); | ||
| 578 | } | ||
| 579 | EXPORT_SYMBOL_GPL(kvm_release_page_dirty); | ||
| 580 | |||
| 581 | void kvm_release_pfn_dirty(pfn_t pfn) | ||
| 582 | { | ||
| 583 | kvm_set_pfn_dirty(pfn); | ||
| 584 | kvm_release_pfn_clean(pfn); | ||
| 585 | } | ||
| 586 | EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); | ||
| 587 | |||
| 588 | void kvm_set_page_dirty(struct page *page) | ||
| 589 | { | ||
| 590 | kvm_set_pfn_dirty(page_to_pfn(page)); | ||
| 591 | } | ||
| 592 | EXPORT_SYMBOL_GPL(kvm_set_page_dirty); | ||
| 593 | |||
| 594 | void kvm_set_pfn_dirty(pfn_t pfn) | ||
| 595 | { | ||
| 596 | struct page *page = pfn_to_page(pfn); | ||
| 496 | if (!PageReserved(page)) | 597 | if (!PageReserved(page)) |
| 497 | SetPageDirty(page); | 598 | SetPageDirty(page); |
| 498 | put_page(page); | ||
| 499 | } | 599 | } |
| 500 | EXPORT_SYMBOL_GPL(kvm_release_page_dirty); | 600 | EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); |
| 601 | |||
| 602 | void kvm_set_pfn_accessed(pfn_t pfn) | ||
| 603 | { | ||
| 604 | mark_page_accessed(pfn_to_page(pfn)); | ||
| 605 | } | ||
| 606 | EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); | ||
| 607 | |||
| 608 | void kvm_get_pfn(pfn_t pfn) | ||
| 609 | { | ||
| 610 | get_page(pfn_to_page(pfn)); | ||
| 611 | } | ||
| 612 | EXPORT_SYMBOL_GPL(kvm_get_pfn); | ||
| 501 | 613 | ||
| 502 | static int next_segment(unsigned long len, int offset) | 614 | static int next_segment(unsigned long len, int offset) |
| 503 | { | 615 | { |
| @@ -554,7 +666,9 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, | |||
| 554 | addr = gfn_to_hva(kvm, gfn); | 666 | addr = gfn_to_hva(kvm, gfn); |
| 555 | if (kvm_is_error_hva(addr)) | 667 | if (kvm_is_error_hva(addr)) |
| 556 | return -EFAULT; | 668 | return -EFAULT; |
| 669 | pagefault_disable(); | ||
| 557 | r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); | 670 | r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); |
| 671 | pagefault_enable(); | ||
| 558 | if (r) | 672 | if (r) |
| 559 | return -EFAULT; | 673 | return -EFAULT; |
| 560 | return 0; | 674 | return 0; |
| @@ -651,6 +765,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
| 651 | * We will block until either an interrupt or a signal wakes us up | 765 | * We will block until either an interrupt or a signal wakes us up |
| 652 | */ | 766 | */ |
| 653 | while (!kvm_cpu_has_interrupt(vcpu) | 767 | while (!kvm_cpu_has_interrupt(vcpu) |
| 768 | && !kvm_cpu_has_pending_timer(vcpu) | ||
| 654 | && !signal_pending(current) | 769 | && !signal_pending(current) |
| 655 | && !kvm_arch_vcpu_runnable(vcpu)) { | 770 | && !kvm_arch_vcpu_runnable(vcpu)) { |
| 656 | set_current_state(TASK_INTERRUPTIBLE); | 771 | set_current_state(TASK_INTERRUPTIBLE); |
| @@ -678,8 +793,10 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 678 | 793 | ||
| 679 | if (vmf->pgoff == 0) | 794 | if (vmf->pgoff == 0) |
| 680 | page = virt_to_page(vcpu->run); | 795 | page = virt_to_page(vcpu->run); |
| 796 | #ifdef CONFIG_X86 | ||
| 681 | else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) | 797 | else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) |
| 682 | page = virt_to_page(vcpu->arch.pio_data); | 798 | page = virt_to_page(vcpu->arch.pio_data); |
| 799 | #endif | ||
| 683 | else | 800 | else |
| 684 | return VM_FAULT_SIGBUS; | 801 | return VM_FAULT_SIGBUS; |
| 685 | get_page(page); | 802 | get_page(page); |
| @@ -701,11 +818,11 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) | |||
| 701 | { | 818 | { |
| 702 | struct kvm_vcpu *vcpu = filp->private_data; | 819 | struct kvm_vcpu *vcpu = filp->private_data; |
| 703 | 820 | ||
| 704 | fput(vcpu->kvm->filp); | 821 | kvm_put_kvm(vcpu->kvm); |
| 705 | return 0; | 822 | return 0; |
| 706 | } | 823 | } |
| 707 | 824 | ||
| 708 | static struct file_operations kvm_vcpu_fops = { | 825 | static const struct file_operations kvm_vcpu_fops = { |
| 709 | .release = kvm_vcpu_release, | 826 | .release = kvm_vcpu_release, |
| 710 | .unlocked_ioctl = kvm_vcpu_ioctl, | 827 | .unlocked_ioctl = kvm_vcpu_ioctl, |
| 711 | .compat_ioctl = kvm_vcpu_ioctl, | 828 | .compat_ioctl = kvm_vcpu_ioctl, |
| @@ -723,9 +840,10 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) | |||
| 723 | 840 | ||
| 724 | r = anon_inode_getfd(&fd, &inode, &file, | 841 | r = anon_inode_getfd(&fd, &inode, &file, |
| 725 | "kvm-vcpu", &kvm_vcpu_fops, vcpu); | 842 | "kvm-vcpu", &kvm_vcpu_fops, vcpu); |
| 726 | if (r) | 843 | if (r) { |
| 844 | kvm_put_kvm(vcpu->kvm); | ||
| 727 | return r; | 845 | return r; |
| 728 | atomic_inc(&vcpu->kvm->filp->f_count); | 846 | } |
| 729 | return fd; | 847 | return fd; |
| 730 | } | 848 | } |
| 731 | 849 | ||
| @@ -760,6 +878,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
| 760 | mutex_unlock(&kvm->lock); | 878 | mutex_unlock(&kvm->lock); |
| 761 | 879 | ||
| 762 | /* Now it's all set up, let userspace reach it */ | 880 | /* Now it's all set up, let userspace reach it */ |
| 881 | kvm_get_kvm(kvm); | ||
| 763 | r = create_vcpu_fd(vcpu); | 882 | r = create_vcpu_fd(vcpu); |
| 764 | if (r < 0) | 883 | if (r < 0) |
| 765 | goto unlink; | 884 | goto unlink; |
| @@ -802,28 +921,39 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
| 802 | r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); | 921 | r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); |
| 803 | break; | 922 | break; |
| 804 | case KVM_GET_REGS: { | 923 | case KVM_GET_REGS: { |
| 805 | struct kvm_regs kvm_regs; | 924 | struct kvm_regs *kvm_regs; |
| 806 | 925 | ||
| 807 | memset(&kvm_regs, 0, sizeof kvm_regs); | 926 | r = -ENOMEM; |
| 808 | r = kvm_arch_vcpu_ioctl_get_regs(vcpu, &kvm_regs); | 927 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); |
| 809 | if (r) | 928 | if (!kvm_regs) |
| 810 | goto out; | 929 | goto out; |
| 930 | r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); | ||
| 931 | if (r) | ||
| 932 | goto out_free1; | ||
| 811 | r = -EFAULT; | 933 | r = -EFAULT; |
| 812 | if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs)) | 934 | if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) |
| 813 | goto out; | 935 | goto out_free1; |
| 814 | r = 0; | 936 | r = 0; |
| 937 | out_free1: | ||
| 938 | kfree(kvm_regs); | ||
| 815 | break; | 939 | break; |
| 816 | } | 940 | } |
| 817 | case KVM_SET_REGS: { | 941 | case KVM_SET_REGS: { |
| 818 | struct kvm_regs kvm_regs; | 942 | struct kvm_regs *kvm_regs; |
| 819 | 943 | ||
| 820 | r = -EFAULT; | 944 | r = -ENOMEM; |
| 821 | if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs)) | 945 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); |
| 946 | if (!kvm_regs) | ||
| 822 | goto out; | 947 | goto out; |
| 823 | r = kvm_arch_vcpu_ioctl_set_regs(vcpu, &kvm_regs); | 948 | r = -EFAULT; |
| 949 | if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) | ||
| 950 | goto out_free2; | ||
| 951 | r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); | ||
| 824 | if (r) | 952 | if (r) |
| 825 | goto out; | 953 | goto out_free2; |
| 826 | r = 0; | 954 | r = 0; |
| 955 | out_free2: | ||
| 956 | kfree(kvm_regs); | ||
| 827 | break; | 957 | break; |
| 828 | } | 958 | } |
| 829 | case KVM_GET_SREGS: { | 959 | case KVM_GET_SREGS: { |
| @@ -851,6 +981,30 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
| 851 | r = 0; | 981 | r = 0; |
| 852 | break; | 982 | break; |
| 853 | } | 983 | } |
| 984 | case KVM_GET_MP_STATE: { | ||
| 985 | struct kvm_mp_state mp_state; | ||
| 986 | |||
| 987 | r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); | ||
| 988 | if (r) | ||
| 989 | goto out; | ||
| 990 | r = -EFAULT; | ||
| 991 | if (copy_to_user(argp, &mp_state, sizeof mp_state)) | ||
| 992 | goto out; | ||
| 993 | r = 0; | ||
| 994 | break; | ||
| 995 | } | ||
| 996 | case KVM_SET_MP_STATE: { | ||
| 997 | struct kvm_mp_state mp_state; | ||
| 998 | |||
| 999 | r = -EFAULT; | ||
| 1000 | if (copy_from_user(&mp_state, argp, sizeof mp_state)) | ||
| 1001 | goto out; | ||
| 1002 | r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); | ||
| 1003 | if (r) | ||
| 1004 | goto out; | ||
| 1005 | r = 0; | ||
| 1006 | break; | ||
| 1007 | } | ||
| 854 | case KVM_TRANSLATE: { | 1008 | case KVM_TRANSLATE: { |
| 855 | struct kvm_translation tr; | 1009 | struct kvm_translation tr; |
| 856 | 1010 | ||
| @@ -1005,7 +1159,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 1005 | return 0; | 1159 | return 0; |
| 1006 | } | 1160 | } |
| 1007 | 1161 | ||
| 1008 | static struct file_operations kvm_vm_fops = { | 1162 | static const struct file_operations kvm_vm_fops = { |
| 1009 | .release = kvm_vm_release, | 1163 | .release = kvm_vm_release, |
| 1010 | .unlocked_ioctl = kvm_vm_ioctl, | 1164 | .unlocked_ioctl = kvm_vm_ioctl, |
| 1011 | .compat_ioctl = kvm_vm_ioctl, | 1165 | .compat_ioctl = kvm_vm_ioctl, |
| @@ -1024,12 +1178,10 @@ static int kvm_dev_ioctl_create_vm(void) | |||
| 1024 | return PTR_ERR(kvm); | 1178 | return PTR_ERR(kvm); |
| 1025 | r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm); | 1179 | r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm); |
| 1026 | if (r) { | 1180 | if (r) { |
| 1027 | kvm_destroy_vm(kvm); | 1181 | kvm_put_kvm(kvm); |
| 1028 | return r; | 1182 | return r; |
| 1029 | } | 1183 | } |
| 1030 | 1184 | ||
| 1031 | kvm->filp = file; | ||
| 1032 | |||
| 1033 | return fd; | 1185 | return fd; |
| 1034 | } | 1186 | } |
| 1035 | 1187 | ||
| @@ -1059,7 +1211,15 @@ static long kvm_dev_ioctl(struct file *filp, | |||
| 1059 | r = -EINVAL; | 1211 | r = -EINVAL; |
| 1060 | if (arg) | 1212 | if (arg) |
| 1061 | goto out; | 1213 | goto out; |
| 1062 | r = 2 * PAGE_SIZE; | 1214 | r = PAGE_SIZE; /* struct kvm_run */ |
| 1215 | #ifdef CONFIG_X86 | ||
| 1216 | r += PAGE_SIZE; /* pio data page */ | ||
| 1217 | #endif | ||
| 1218 | break; | ||
| 1219 | case KVM_TRACE_ENABLE: | ||
| 1220 | case KVM_TRACE_PAUSE: | ||
| 1221 | case KVM_TRACE_DISABLE: | ||
| 1222 | r = kvm_trace_ioctl(ioctl, arg); | ||
| 1063 | break; | 1223 | break; |
| 1064 | default: | 1224 | default: |
| 1065 | return kvm_arch_dev_ioctl(filp, ioctl, arg); | 1225 | return kvm_arch_dev_ioctl(filp, ioctl, arg); |
| @@ -1232,9 +1392,9 @@ static void kvm_init_debug(void) | |||
| 1232 | { | 1392 | { |
| 1233 | struct kvm_stats_debugfs_item *p; | 1393 | struct kvm_stats_debugfs_item *p; |
| 1234 | 1394 | ||
| 1235 | debugfs_dir = debugfs_create_dir("kvm", NULL); | 1395 | kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); |
| 1236 | for (p = debugfs_entries; p->name; ++p) | 1396 | for (p = debugfs_entries; p->name; ++p) |
| 1237 | p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir, | 1397 | p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, |
| 1238 | (void *)(long)p->offset, | 1398 | (void *)(long)p->offset, |
| 1239 | stat_fops[p->kind]); | 1399 | stat_fops[p->kind]); |
| 1240 | } | 1400 | } |
| @@ -1245,7 +1405,7 @@ static void kvm_exit_debug(void) | |||
| 1245 | 1405 | ||
| 1246 | for (p = debugfs_entries; p->name; ++p) | 1406 | for (p = debugfs_entries; p->name; ++p) |
| 1247 | debugfs_remove(p->dentry); | 1407 | debugfs_remove(p->dentry); |
| 1248 | debugfs_remove(debugfs_dir); | 1408 | debugfs_remove(kvm_debugfs_dir); |
| 1249 | } | 1409 | } |
| 1250 | 1410 | ||
| 1251 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) | 1411 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) |
| @@ -1272,6 +1432,7 @@ static struct sys_device kvm_sysdev = { | |||
| 1272 | }; | 1432 | }; |
| 1273 | 1433 | ||
| 1274 | struct page *bad_page; | 1434 | struct page *bad_page; |
| 1435 | pfn_t bad_pfn; | ||
| 1275 | 1436 | ||
| 1276 | static inline | 1437 | static inline |
| 1277 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) | 1438 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) |
| @@ -1313,6 +1474,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
| 1313 | goto out; | 1474 | goto out; |
| 1314 | } | 1475 | } |
| 1315 | 1476 | ||
| 1477 | bad_pfn = page_to_pfn(bad_page); | ||
| 1478 | |||
| 1316 | r = kvm_arch_hardware_setup(); | 1479 | r = kvm_arch_hardware_setup(); |
| 1317 | if (r < 0) | 1480 | if (r < 0) |
| 1318 | goto out_free_0; | 1481 | goto out_free_0; |
| @@ -1386,6 +1549,7 @@ EXPORT_SYMBOL_GPL(kvm_init); | |||
| 1386 | 1549 | ||
| 1387 | void kvm_exit(void) | 1550 | void kvm_exit(void) |
| 1388 | { | 1551 | { |
| 1552 | kvm_trace_cleanup(); | ||
| 1389 | misc_deregister(&kvm_dev); | 1553 | misc_deregister(&kvm_dev); |
| 1390 | kmem_cache_destroy(kvm_vcpu_cache); | 1554 | kmem_cache_destroy(kvm_vcpu_cache); |
| 1391 | sysdev_unregister(&kvm_sysdev); | 1555 | sysdev_unregister(&kvm_sysdev); |
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c new file mode 100644 index 000000000000..0e495470788d --- /dev/null +++ b/virt/kvm/kvm_trace.c | |||
| @@ -0,0 +1,276 @@ | |||
| 1 | /* | ||
| 2 | * kvm trace | ||
| 3 | * | ||
| 4 | * It is designed to allow debugging traces of kvm to be generated | ||
| 5 | * on UP / SMP machines. Each trace entry can be timestamped so that | ||
| 6 | * it's possible to reconstruct a chronological record of trace events. | ||
| 7 | * The implementation refers to blktrace kernel support. | ||
| 8 | * | ||
| 9 | * Copyright (c) 2008 Intel Corporation | ||
| 10 | * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> | ||
| 11 | * | ||
| 12 | * Authors: Feng(Eric) Liu, eric.e.liu@intel.com | ||
| 13 | * | ||
| 14 | * Date: Feb 2008 | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/module.h> | ||
| 18 | #include <linux/relay.h> | ||
| 19 | #include <linux/debugfs.h> | ||
| 20 | |||
| 21 | #include <linux/kvm_host.h> | ||
| 22 | |||
| 23 | #define KVM_TRACE_STATE_RUNNING (1 << 0) | ||
| 24 | #define KVM_TRACE_STATE_PAUSE (1 << 1) | ||
| 25 | #define KVM_TRACE_STATE_CLEARUP (1 << 2) | ||
| 26 | |||
| 27 | struct kvm_trace { | ||
| 28 | int trace_state; | ||
| 29 | struct rchan *rchan; | ||
| 30 | struct dentry *lost_file; | ||
| 31 | atomic_t lost_records; | ||
| 32 | }; | ||
| 33 | static struct kvm_trace *kvm_trace; | ||
| 34 | |||
| 35 | struct kvm_trace_probe { | ||
| 36 | const char *name; | ||
| 37 | const char *format; | ||
| 38 | u32 cycle_in; | ||
| 39 | marker_probe_func *probe_func; | ||
| 40 | }; | ||
| 41 | |||
| 42 | static inline int calc_rec_size(int cycle, int extra) | ||
| 43 | { | ||
| 44 | int rec_size = KVM_TRC_HEAD_SIZE; | ||
| 45 | |||
| 46 | rec_size += extra; | ||
| 47 | return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; | ||
| 48 | } | ||
| 49 | |||
| 50 | static void kvm_add_trace(void *probe_private, void *call_data, | ||
| 51 | const char *format, va_list *args) | ||
| 52 | { | ||
| 53 | struct kvm_trace_probe *p = probe_private; | ||
| 54 | struct kvm_trace *kt = kvm_trace; | ||
| 55 | struct kvm_trace_rec rec; | ||
| 56 | struct kvm_vcpu *vcpu; | ||
| 57 | int i, extra, size; | ||
| 58 | |||
| 59 | if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) | ||
| 60 | return; | ||
| 61 | |||
| 62 | rec.event = va_arg(*args, u32); | ||
| 63 | vcpu = va_arg(*args, struct kvm_vcpu *); | ||
| 64 | rec.pid = current->tgid; | ||
| 65 | rec.vcpu_id = vcpu->vcpu_id; | ||
| 66 | |||
| 67 | extra = va_arg(*args, u32); | ||
| 68 | WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); | ||
| 69 | extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); | ||
| 70 | rec.extra_u32 = extra; | ||
| 71 | |||
| 72 | rec.cycle_in = p->cycle_in; | ||
| 73 | |||
| 74 | if (rec.cycle_in) { | ||
| 75 | u64 cycle = 0; | ||
| 76 | |||
| 77 | cycle = get_cycles(); | ||
| 78 | rec.u.cycle.cycle_lo = (u32)cycle; | ||
| 79 | rec.u.cycle.cycle_hi = (u32)(cycle >> 32); | ||
| 80 | |||
| 81 | for (i = 0; i < rec.extra_u32; i++) | ||
| 82 | rec.u.cycle.extra_u32[i] = va_arg(*args, u32); | ||
| 83 | } else { | ||
| 84 | for (i = 0; i < rec.extra_u32; i++) | ||
| 85 | rec.u.nocycle.extra_u32[i] = va_arg(*args, u32); | ||
| 86 | } | ||
| 87 | |||
| 88 | size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32)); | ||
| 89 | relay_write(kt->rchan, &rec, size); | ||
| 90 | } | ||
| 91 | |||
| 92 | static struct kvm_trace_probe kvm_trace_probes[] = { | ||
| 93 | { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace }, | ||
| 94 | { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace }, | ||
| 95 | }; | ||
| 96 | |||
| 97 | static int lost_records_get(void *data, u64 *val) | ||
| 98 | { | ||
| 99 | struct kvm_trace *kt = data; | ||
| 100 | |||
| 101 | *val = atomic_read(&kt->lost_records); | ||
| 102 | return 0; | ||
| 103 | } | ||
| 104 | |||
| 105 | DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n"); | ||
| 106 | |||
| 107 | /* | ||
| 108 | * The relay channel is used in "no-overwrite" mode, it keeps trace of how | ||
| 109 | * many times we encountered a full subbuffer, to tell user space app the | ||
| 110 | * lost records there were. | ||
| 111 | */ | ||
| 112 | static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, | ||
| 113 | void *prev_subbuf, size_t prev_padding) | ||
| 114 | { | ||
| 115 | struct kvm_trace *kt; | ||
| 116 | |||
| 117 | if (!relay_buf_full(buf)) | ||
| 118 | return 1; | ||
| 119 | |||
| 120 | kt = buf->chan->private_data; | ||
| 121 | atomic_inc(&kt->lost_records); | ||
| 122 | |||
| 123 | return 0; | ||
| 124 | } | ||
| 125 | |||
| 126 | static struct dentry *kvm_create_buf_file_callack(const char *filename, | ||
| 127 | struct dentry *parent, | ||
| 128 | int mode, | ||
| 129 | struct rchan_buf *buf, | ||
| 130 | int *is_global) | ||
| 131 | { | ||
| 132 | return debugfs_create_file(filename, mode, parent, buf, | ||
| 133 | &relay_file_operations); | ||
| 134 | } | ||
| 135 | |||
| 136 | static int kvm_remove_buf_file_callback(struct dentry *dentry) | ||
| 137 | { | ||
| 138 | debugfs_remove(dentry); | ||
| 139 | return 0; | ||
| 140 | } | ||
| 141 | |||
| 142 | static struct rchan_callbacks kvm_relay_callbacks = { | ||
| 143 | .subbuf_start = kvm_subbuf_start_callback, | ||
| 144 | .create_buf_file = kvm_create_buf_file_callack, | ||
| 145 | .remove_buf_file = kvm_remove_buf_file_callback, | ||
| 146 | }; | ||
| 147 | |||
| 148 | static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts) | ||
| 149 | { | ||
| 150 | struct kvm_trace *kt; | ||
| 151 | int i, r = -ENOMEM; | ||
| 152 | |||
| 153 | if (!kuts->buf_size || !kuts->buf_nr) | ||
| 154 | return -EINVAL; | ||
| 155 | |||
| 156 | kt = kzalloc(sizeof(*kt), GFP_KERNEL); | ||
| 157 | if (!kt) | ||
| 158 | goto err; | ||
| 159 | |||
| 160 | r = -EIO; | ||
| 161 | atomic_set(&kt->lost_records, 0); | ||
| 162 | kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir, | ||
| 163 | kt, &kvm_trace_lost_ops); | ||
| 164 | if (!kt->lost_file) | ||
| 165 | goto err; | ||
| 166 | |||
| 167 | kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size, | ||
| 168 | kuts->buf_nr, &kvm_relay_callbacks, kt); | ||
| 169 | if (!kt->rchan) | ||
| 170 | goto err; | ||
| 171 | |||
| 172 | kvm_trace = kt; | ||
| 173 | |||
| 174 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | ||
| 175 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | ||
| 176 | |||
| 177 | r = marker_probe_register(p->name, p->format, p->probe_func, p); | ||
| 178 | if (r) | ||
| 179 | printk(KERN_INFO "Unable to register probe %s\n", | ||
| 180 | p->name); | ||
| 181 | } | ||
| 182 | |||
| 183 | kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING; | ||
| 184 | |||
| 185 | return 0; | ||
| 186 | err: | ||
| 187 | if (kt) { | ||
| 188 | if (kt->lost_file) | ||
| 189 | debugfs_remove(kt->lost_file); | ||
| 190 | if (kt->rchan) | ||
| 191 | relay_close(kt->rchan); | ||
| 192 | kfree(kt); | ||
| 193 | } | ||
| 194 | return r; | ||
| 195 | } | ||
| 196 | |||
| 197 | static int kvm_trace_enable(char __user *arg) | ||
| 198 | { | ||
| 199 | struct kvm_user_trace_setup kuts; | ||
| 200 | int ret; | ||
| 201 | |||
| 202 | ret = copy_from_user(&kuts, arg, sizeof(kuts)); | ||
| 203 | if (ret) | ||
| 204 | return -EFAULT; | ||
| 205 | |||
| 206 | ret = do_kvm_trace_enable(&kuts); | ||
| 207 | if (ret) | ||
| 208 | return ret; | ||
| 209 | |||
| 210 | return 0; | ||
| 211 | } | ||
| 212 | |||
| 213 | static int kvm_trace_pause(void) | ||
| 214 | { | ||
| 215 | struct kvm_trace *kt = kvm_trace; | ||
| 216 | int r = -EINVAL; | ||
| 217 | |||
| 218 | if (kt == NULL) | ||
| 219 | return r; | ||
| 220 | |||
| 221 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING) { | ||
| 222 | kt->trace_state = KVM_TRACE_STATE_PAUSE; | ||
| 223 | relay_flush(kt->rchan); | ||
| 224 | r = 0; | ||
| 225 | } | ||
| 226 | |||
| 227 | return r; | ||
| 228 | } | ||
| 229 | |||
| 230 | void kvm_trace_cleanup(void) | ||
| 231 | { | ||
| 232 | struct kvm_trace *kt = kvm_trace; | ||
| 233 | int i; | ||
| 234 | |||
| 235 | if (kt == NULL) | ||
| 236 | return; | ||
| 237 | |||
| 238 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING || | ||
| 239 | kt->trace_state == KVM_TRACE_STATE_PAUSE) { | ||
| 240 | |||
| 241 | kt->trace_state = KVM_TRACE_STATE_CLEARUP; | ||
| 242 | |||
| 243 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | ||
| 244 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | ||
| 245 | marker_probe_unregister(p->name, p->probe_func, p); | ||
| 246 | } | ||
| 247 | |||
| 248 | relay_close(kt->rchan); | ||
| 249 | debugfs_remove(kt->lost_file); | ||
| 250 | kfree(kt); | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) | ||
| 255 | { | ||
| 256 | void __user *argp = (void __user *)arg; | ||
| 257 | long r = -EINVAL; | ||
| 258 | |||
| 259 | if (!capable(CAP_SYS_ADMIN)) | ||
| 260 | return -EPERM; | ||
| 261 | |||
| 262 | switch (ioctl) { | ||
| 263 | case KVM_TRACE_ENABLE: | ||
| 264 | r = kvm_trace_enable(argp); | ||
| 265 | break; | ||
| 266 | case KVM_TRACE_PAUSE: | ||
| 267 | r = kvm_trace_pause(); | ||
| 268 | break; | ||
| 269 | case KVM_TRACE_DISABLE: | ||
| 270 | r = 0; | ||
| 271 | kvm_trace_cleanup(); | ||
| 272 | break; | ||
| 273 | } | ||
| 274 | |||
| 275 | return r; | ||
| 276 | } | ||
