diff options
Diffstat (limited to 'virt/kvm')
-rw-r--r-- | virt/kvm/kvm_main.c | 230 | ||||
-rw-r--r-- | virt/kvm/kvm_trace.c | 276 |
2 files changed, 473 insertions, 33 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b2e12893e3f4..c82cf15730a1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/kvm_para.h> | 40 | #include <linux/kvm_para.h> |
41 | #include <linux/pagemap.h> | 41 | #include <linux/pagemap.h> |
42 | #include <linux/mman.h> | 42 | #include <linux/mman.h> |
43 | #include <linux/swap.h> | ||
43 | 44 | ||
44 | #include <asm/processor.h> | 45 | #include <asm/processor.h> |
45 | #include <asm/io.h> | 46 | #include <asm/io.h> |
@@ -59,7 +60,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_cache); | |||
59 | 60 | ||
60 | static __read_mostly struct preempt_ops kvm_preempt_ops; | 61 | static __read_mostly struct preempt_ops kvm_preempt_ops; |
61 | 62 | ||
62 | static struct dentry *debugfs_dir; | 63 | struct dentry *kvm_debugfs_dir; |
63 | 64 | ||
64 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | 65 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, |
65 | unsigned long arg); | 66 | unsigned long arg); |
@@ -119,6 +120,29 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) | |||
119 | smp_call_function_mask(cpus, ack_flush, NULL, 1); | 120 | smp_call_function_mask(cpus, ack_flush, NULL, 1); |
120 | } | 121 | } |
121 | 122 | ||
123 | void kvm_reload_remote_mmus(struct kvm *kvm) | ||
124 | { | ||
125 | int i, cpu; | ||
126 | cpumask_t cpus; | ||
127 | struct kvm_vcpu *vcpu; | ||
128 | |||
129 | cpus_clear(cpus); | ||
130 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
131 | vcpu = kvm->vcpus[i]; | ||
132 | if (!vcpu) | ||
133 | continue; | ||
134 | if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | ||
135 | continue; | ||
136 | cpu = vcpu->cpu; | ||
137 | if (cpu != -1 && cpu != raw_smp_processor_id()) | ||
138 | cpu_set(cpu, cpus); | ||
139 | } | ||
140 | if (cpus_empty(cpus)) | ||
141 | return; | ||
142 | smp_call_function_mask(cpus, ack_flush, NULL, 1); | ||
143 | } | ||
144 | |||
145 | |||
122 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | 146 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) |
123 | { | 147 | { |
124 | struct page *page; | 148 | struct page *page; |
@@ -170,6 +194,7 @@ static struct kvm *kvm_create_vm(void) | |||
170 | mutex_init(&kvm->lock); | 194 | mutex_init(&kvm->lock); |
171 | kvm_io_bus_init(&kvm->mmio_bus); | 195 | kvm_io_bus_init(&kvm->mmio_bus); |
172 | init_rwsem(&kvm->slots_lock); | 196 | init_rwsem(&kvm->slots_lock); |
197 | atomic_set(&kvm->users_count, 1); | ||
173 | spin_lock(&kvm_lock); | 198 | spin_lock(&kvm_lock); |
174 | list_add(&kvm->vm_list, &vm_list); | 199 | list_add(&kvm->vm_list, &vm_list); |
175 | spin_unlock(&kvm_lock); | 200 | spin_unlock(&kvm_lock); |
@@ -189,9 +214,13 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | |||
189 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) | 214 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) |
190 | vfree(free->dirty_bitmap); | 215 | vfree(free->dirty_bitmap); |
191 | 216 | ||
217 | if (!dont || free->lpage_info != dont->lpage_info) | ||
218 | vfree(free->lpage_info); | ||
219 | |||
192 | free->npages = 0; | 220 | free->npages = 0; |
193 | free->dirty_bitmap = NULL; | 221 | free->dirty_bitmap = NULL; |
194 | free->rmap = NULL; | 222 | free->rmap = NULL; |
223 | free->lpage_info = NULL; | ||
195 | } | 224 | } |
196 | 225 | ||
197 | void kvm_free_physmem(struct kvm *kvm) | 226 | void kvm_free_physmem(struct kvm *kvm) |
@@ -215,11 +244,25 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
215 | mmdrop(mm); | 244 | mmdrop(mm); |
216 | } | 245 | } |
217 | 246 | ||
247 | void kvm_get_kvm(struct kvm *kvm) | ||
248 | { | ||
249 | atomic_inc(&kvm->users_count); | ||
250 | } | ||
251 | EXPORT_SYMBOL_GPL(kvm_get_kvm); | ||
252 | |||
253 | void kvm_put_kvm(struct kvm *kvm) | ||
254 | { | ||
255 | if (atomic_dec_and_test(&kvm->users_count)) | ||
256 | kvm_destroy_vm(kvm); | ||
257 | } | ||
258 | EXPORT_SYMBOL_GPL(kvm_put_kvm); | ||
259 | |||
260 | |||
218 | static int kvm_vm_release(struct inode *inode, struct file *filp) | 261 | static int kvm_vm_release(struct inode *inode, struct file *filp) |
219 | { | 262 | { |
220 | struct kvm *kvm = filp->private_data; | 263 | struct kvm *kvm = filp->private_data; |
221 | 264 | ||
222 | kvm_destroy_vm(kvm); | 265 | kvm_put_kvm(kvm); |
223 | return 0; | 266 | return 0; |
224 | } | 267 | } |
225 | 268 | ||
@@ -301,6 +344,25 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
301 | new.user_alloc = user_alloc; | 344 | new.user_alloc = user_alloc; |
302 | new.userspace_addr = mem->userspace_addr; | 345 | new.userspace_addr = mem->userspace_addr; |
303 | } | 346 | } |
347 | if (npages && !new.lpage_info) { | ||
348 | int largepages = npages / KVM_PAGES_PER_HPAGE; | ||
349 | if (npages % KVM_PAGES_PER_HPAGE) | ||
350 | largepages++; | ||
351 | if (base_gfn % KVM_PAGES_PER_HPAGE) | ||
352 | largepages++; | ||
353 | |||
354 | new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info)); | ||
355 | |||
356 | if (!new.lpage_info) | ||
357 | goto out_free; | ||
358 | |||
359 | memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info)); | ||
360 | |||
361 | if (base_gfn % KVM_PAGES_PER_HPAGE) | ||
362 | new.lpage_info[0].write_count = 1; | ||
363 | if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) | ||
364 | new.lpage_info[largepages-1].write_count = 1; | ||
365 | } | ||
304 | 366 | ||
305 | /* Allocate page dirty bitmap if needed */ | 367 | /* Allocate page dirty bitmap if needed */ |
306 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { | 368 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { |
@@ -397,6 +459,12 @@ int is_error_page(struct page *page) | |||
397 | } | 459 | } |
398 | EXPORT_SYMBOL_GPL(is_error_page); | 460 | EXPORT_SYMBOL_GPL(is_error_page); |
399 | 461 | ||
462 | int is_error_pfn(pfn_t pfn) | ||
463 | { | ||
464 | return pfn == bad_pfn; | ||
465 | } | ||
466 | EXPORT_SYMBOL_GPL(is_error_pfn); | ||
467 | |||
400 | static inline unsigned long bad_hva(void) | 468 | static inline unsigned long bad_hva(void) |
401 | { | 469 | { |
402 | return PAGE_OFFSET; | 470 | return PAGE_OFFSET; |
@@ -444,7 +512,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | |||
444 | } | 512 | } |
445 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); | 513 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); |
446 | 514 | ||
447 | static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | 515 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) |
448 | { | 516 | { |
449 | struct kvm_memory_slot *slot; | 517 | struct kvm_memory_slot *slot; |
450 | 518 | ||
@@ -458,7 +526,7 @@ static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | |||
458 | /* | 526 | /* |
459 | * Requires current->mm->mmap_sem to be held | 527 | * Requires current->mm->mmap_sem to be held |
460 | */ | 528 | */ |
461 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | 529 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) |
462 | { | 530 | { |
463 | struct page *page[1]; | 531 | struct page *page[1]; |
464 | unsigned long addr; | 532 | unsigned long addr; |
@@ -469,7 +537,7 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | |||
469 | addr = gfn_to_hva(kvm, gfn); | 537 | addr = gfn_to_hva(kvm, gfn); |
470 | if (kvm_is_error_hva(addr)) { | 538 | if (kvm_is_error_hva(addr)) { |
471 | get_page(bad_page); | 539 | get_page(bad_page); |
472 | return bad_page; | 540 | return page_to_pfn(bad_page); |
473 | } | 541 | } |
474 | 542 | ||
475 | npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, | 543 | npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, |
@@ -477,27 +545,71 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | |||
477 | 545 | ||
478 | if (npages != 1) { | 546 | if (npages != 1) { |
479 | get_page(bad_page); | 547 | get_page(bad_page); |
480 | return bad_page; | 548 | return page_to_pfn(bad_page); |
481 | } | 549 | } |
482 | 550 | ||
483 | return page[0]; | 551 | return page_to_pfn(page[0]); |
552 | } | ||
553 | |||
554 | EXPORT_SYMBOL_GPL(gfn_to_pfn); | ||
555 | |||
556 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | ||
557 | { | ||
558 | return pfn_to_page(gfn_to_pfn(kvm, gfn)); | ||
484 | } | 559 | } |
485 | 560 | ||
486 | EXPORT_SYMBOL_GPL(gfn_to_page); | 561 | EXPORT_SYMBOL_GPL(gfn_to_page); |
487 | 562 | ||
488 | void kvm_release_page_clean(struct page *page) | 563 | void kvm_release_page_clean(struct page *page) |
489 | { | 564 | { |
490 | put_page(page); | 565 | kvm_release_pfn_clean(page_to_pfn(page)); |
491 | } | 566 | } |
492 | EXPORT_SYMBOL_GPL(kvm_release_page_clean); | 567 | EXPORT_SYMBOL_GPL(kvm_release_page_clean); |
493 | 568 | ||
569 | void kvm_release_pfn_clean(pfn_t pfn) | ||
570 | { | ||
571 | put_page(pfn_to_page(pfn)); | ||
572 | } | ||
573 | EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); | ||
574 | |||
494 | void kvm_release_page_dirty(struct page *page) | 575 | void kvm_release_page_dirty(struct page *page) |
495 | { | 576 | { |
577 | kvm_release_pfn_dirty(page_to_pfn(page)); | ||
578 | } | ||
579 | EXPORT_SYMBOL_GPL(kvm_release_page_dirty); | ||
580 | |||
581 | void kvm_release_pfn_dirty(pfn_t pfn) | ||
582 | { | ||
583 | kvm_set_pfn_dirty(pfn); | ||
584 | kvm_release_pfn_clean(pfn); | ||
585 | } | ||
586 | EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); | ||
587 | |||
588 | void kvm_set_page_dirty(struct page *page) | ||
589 | { | ||
590 | kvm_set_pfn_dirty(page_to_pfn(page)); | ||
591 | } | ||
592 | EXPORT_SYMBOL_GPL(kvm_set_page_dirty); | ||
593 | |||
594 | void kvm_set_pfn_dirty(pfn_t pfn) | ||
595 | { | ||
596 | struct page *page = pfn_to_page(pfn); | ||
496 | if (!PageReserved(page)) | 597 | if (!PageReserved(page)) |
497 | SetPageDirty(page); | 598 | SetPageDirty(page); |
498 | put_page(page); | ||
499 | } | 599 | } |
500 | EXPORT_SYMBOL_GPL(kvm_release_page_dirty); | 600 | EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); |
601 | |||
602 | void kvm_set_pfn_accessed(pfn_t pfn) | ||
603 | { | ||
604 | mark_page_accessed(pfn_to_page(pfn)); | ||
605 | } | ||
606 | EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); | ||
607 | |||
608 | void kvm_get_pfn(pfn_t pfn) | ||
609 | { | ||
610 | get_page(pfn_to_page(pfn)); | ||
611 | } | ||
612 | EXPORT_SYMBOL_GPL(kvm_get_pfn); | ||
501 | 613 | ||
502 | static int next_segment(unsigned long len, int offset) | 614 | static int next_segment(unsigned long len, int offset) |
503 | { | 615 | { |
@@ -554,7 +666,9 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, | |||
554 | addr = gfn_to_hva(kvm, gfn); | 666 | addr = gfn_to_hva(kvm, gfn); |
555 | if (kvm_is_error_hva(addr)) | 667 | if (kvm_is_error_hva(addr)) |
556 | return -EFAULT; | 668 | return -EFAULT; |
669 | pagefault_disable(); | ||
557 | r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); | 670 | r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); |
671 | pagefault_enable(); | ||
558 | if (r) | 672 | if (r) |
559 | return -EFAULT; | 673 | return -EFAULT; |
560 | return 0; | 674 | return 0; |
@@ -651,6 +765,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
651 | * We will block until either an interrupt or a signal wakes us up | 765 | * We will block until either an interrupt or a signal wakes us up |
652 | */ | 766 | */ |
653 | while (!kvm_cpu_has_interrupt(vcpu) | 767 | while (!kvm_cpu_has_interrupt(vcpu) |
768 | && !kvm_cpu_has_pending_timer(vcpu) | ||
654 | && !signal_pending(current) | 769 | && !signal_pending(current) |
655 | && !kvm_arch_vcpu_runnable(vcpu)) { | 770 | && !kvm_arch_vcpu_runnable(vcpu)) { |
656 | set_current_state(TASK_INTERRUPTIBLE); | 771 | set_current_state(TASK_INTERRUPTIBLE); |
@@ -678,8 +793,10 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
678 | 793 | ||
679 | if (vmf->pgoff == 0) | 794 | if (vmf->pgoff == 0) |
680 | page = virt_to_page(vcpu->run); | 795 | page = virt_to_page(vcpu->run); |
796 | #ifdef CONFIG_X86 | ||
681 | else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) | 797 | else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) |
682 | page = virt_to_page(vcpu->arch.pio_data); | 798 | page = virt_to_page(vcpu->arch.pio_data); |
799 | #endif | ||
683 | else | 800 | else |
684 | return VM_FAULT_SIGBUS; | 801 | return VM_FAULT_SIGBUS; |
685 | get_page(page); | 802 | get_page(page); |
@@ -701,11 +818,11 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) | |||
701 | { | 818 | { |
702 | struct kvm_vcpu *vcpu = filp->private_data; | 819 | struct kvm_vcpu *vcpu = filp->private_data; |
703 | 820 | ||
704 | fput(vcpu->kvm->filp); | 821 | kvm_put_kvm(vcpu->kvm); |
705 | return 0; | 822 | return 0; |
706 | } | 823 | } |
707 | 824 | ||
708 | static struct file_operations kvm_vcpu_fops = { | 825 | static const struct file_operations kvm_vcpu_fops = { |
709 | .release = kvm_vcpu_release, | 826 | .release = kvm_vcpu_release, |
710 | .unlocked_ioctl = kvm_vcpu_ioctl, | 827 | .unlocked_ioctl = kvm_vcpu_ioctl, |
711 | .compat_ioctl = kvm_vcpu_ioctl, | 828 | .compat_ioctl = kvm_vcpu_ioctl, |
@@ -723,9 +840,10 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) | |||
723 | 840 | ||
724 | r = anon_inode_getfd(&fd, &inode, &file, | 841 | r = anon_inode_getfd(&fd, &inode, &file, |
725 | "kvm-vcpu", &kvm_vcpu_fops, vcpu); | 842 | "kvm-vcpu", &kvm_vcpu_fops, vcpu); |
726 | if (r) | 843 | if (r) { |
844 | kvm_put_kvm(vcpu->kvm); | ||
727 | return r; | 845 | return r; |
728 | atomic_inc(&vcpu->kvm->filp->f_count); | 846 | } |
729 | return fd; | 847 | return fd; |
730 | } | 848 | } |
731 | 849 | ||
@@ -760,6 +878,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
760 | mutex_unlock(&kvm->lock); | 878 | mutex_unlock(&kvm->lock); |
761 | 879 | ||
762 | /* Now it's all set up, let userspace reach it */ | 880 | /* Now it's all set up, let userspace reach it */ |
881 | kvm_get_kvm(kvm); | ||
763 | r = create_vcpu_fd(vcpu); | 882 | r = create_vcpu_fd(vcpu); |
764 | if (r < 0) | 883 | if (r < 0) |
765 | goto unlink; | 884 | goto unlink; |
@@ -802,28 +921,39 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
802 | r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); | 921 | r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); |
803 | break; | 922 | break; |
804 | case KVM_GET_REGS: { | 923 | case KVM_GET_REGS: { |
805 | struct kvm_regs kvm_regs; | 924 | struct kvm_regs *kvm_regs; |
806 | 925 | ||
807 | memset(&kvm_regs, 0, sizeof kvm_regs); | 926 | r = -ENOMEM; |
808 | r = kvm_arch_vcpu_ioctl_get_regs(vcpu, &kvm_regs); | 927 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); |
809 | if (r) | 928 | if (!kvm_regs) |
810 | goto out; | 929 | goto out; |
930 | r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); | ||
931 | if (r) | ||
932 | goto out_free1; | ||
811 | r = -EFAULT; | 933 | r = -EFAULT; |
812 | if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs)) | 934 | if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) |
813 | goto out; | 935 | goto out_free1; |
814 | r = 0; | 936 | r = 0; |
937 | out_free1: | ||
938 | kfree(kvm_regs); | ||
815 | break; | 939 | break; |
816 | } | 940 | } |
817 | case KVM_SET_REGS: { | 941 | case KVM_SET_REGS: { |
818 | struct kvm_regs kvm_regs; | 942 | struct kvm_regs *kvm_regs; |
819 | 943 | ||
820 | r = -EFAULT; | 944 | r = -ENOMEM; |
821 | if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs)) | 945 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); |
946 | if (!kvm_regs) | ||
822 | goto out; | 947 | goto out; |
823 | r = kvm_arch_vcpu_ioctl_set_regs(vcpu, &kvm_regs); | 948 | r = -EFAULT; |
949 | if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) | ||
950 | goto out_free2; | ||
951 | r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); | ||
824 | if (r) | 952 | if (r) |
825 | goto out; | 953 | goto out_free2; |
826 | r = 0; | 954 | r = 0; |
955 | out_free2: | ||
956 | kfree(kvm_regs); | ||
827 | break; | 957 | break; |
828 | } | 958 | } |
829 | case KVM_GET_SREGS: { | 959 | case KVM_GET_SREGS: { |
@@ -851,6 +981,30 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
851 | r = 0; | 981 | r = 0; |
852 | break; | 982 | break; |
853 | } | 983 | } |
984 | case KVM_GET_MP_STATE: { | ||
985 | struct kvm_mp_state mp_state; | ||
986 | |||
987 | r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); | ||
988 | if (r) | ||
989 | goto out; | ||
990 | r = -EFAULT; | ||
991 | if (copy_to_user(argp, &mp_state, sizeof mp_state)) | ||
992 | goto out; | ||
993 | r = 0; | ||
994 | break; | ||
995 | } | ||
996 | case KVM_SET_MP_STATE: { | ||
997 | struct kvm_mp_state mp_state; | ||
998 | |||
999 | r = -EFAULT; | ||
1000 | if (copy_from_user(&mp_state, argp, sizeof mp_state)) | ||
1001 | goto out; | ||
1002 | r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); | ||
1003 | if (r) | ||
1004 | goto out; | ||
1005 | r = 0; | ||
1006 | break; | ||
1007 | } | ||
854 | case KVM_TRANSLATE: { | 1008 | case KVM_TRANSLATE: { |
855 | struct kvm_translation tr; | 1009 | struct kvm_translation tr; |
856 | 1010 | ||
@@ -1005,7 +1159,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) | |||
1005 | return 0; | 1159 | return 0; |
1006 | } | 1160 | } |
1007 | 1161 | ||
1008 | static struct file_operations kvm_vm_fops = { | 1162 | static const struct file_operations kvm_vm_fops = { |
1009 | .release = kvm_vm_release, | 1163 | .release = kvm_vm_release, |
1010 | .unlocked_ioctl = kvm_vm_ioctl, | 1164 | .unlocked_ioctl = kvm_vm_ioctl, |
1011 | .compat_ioctl = kvm_vm_ioctl, | 1165 | .compat_ioctl = kvm_vm_ioctl, |
@@ -1024,12 +1178,10 @@ static int kvm_dev_ioctl_create_vm(void) | |||
1024 | return PTR_ERR(kvm); | 1178 | return PTR_ERR(kvm); |
1025 | r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm); | 1179 | r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm); |
1026 | if (r) { | 1180 | if (r) { |
1027 | kvm_destroy_vm(kvm); | 1181 | kvm_put_kvm(kvm); |
1028 | return r; | 1182 | return r; |
1029 | } | 1183 | } |
1030 | 1184 | ||
1031 | kvm->filp = file; | ||
1032 | |||
1033 | return fd; | 1185 | return fd; |
1034 | } | 1186 | } |
1035 | 1187 | ||
@@ -1059,7 +1211,15 @@ static long kvm_dev_ioctl(struct file *filp, | |||
1059 | r = -EINVAL; | 1211 | r = -EINVAL; |
1060 | if (arg) | 1212 | if (arg) |
1061 | goto out; | 1213 | goto out; |
1062 | r = 2 * PAGE_SIZE; | 1214 | r = PAGE_SIZE; /* struct kvm_run */ |
1215 | #ifdef CONFIG_X86 | ||
1216 | r += PAGE_SIZE; /* pio data page */ | ||
1217 | #endif | ||
1218 | break; | ||
1219 | case KVM_TRACE_ENABLE: | ||
1220 | case KVM_TRACE_PAUSE: | ||
1221 | case KVM_TRACE_DISABLE: | ||
1222 | r = kvm_trace_ioctl(ioctl, arg); | ||
1063 | break; | 1223 | break; |
1064 | default: | 1224 | default: |
1065 | return kvm_arch_dev_ioctl(filp, ioctl, arg); | 1225 | return kvm_arch_dev_ioctl(filp, ioctl, arg); |
@@ -1232,9 +1392,9 @@ static void kvm_init_debug(void) | |||
1232 | { | 1392 | { |
1233 | struct kvm_stats_debugfs_item *p; | 1393 | struct kvm_stats_debugfs_item *p; |
1234 | 1394 | ||
1235 | debugfs_dir = debugfs_create_dir("kvm", NULL); | 1395 | kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); |
1236 | for (p = debugfs_entries; p->name; ++p) | 1396 | for (p = debugfs_entries; p->name; ++p) |
1237 | p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir, | 1397 | p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, |
1238 | (void *)(long)p->offset, | 1398 | (void *)(long)p->offset, |
1239 | stat_fops[p->kind]); | 1399 | stat_fops[p->kind]); |
1240 | } | 1400 | } |
@@ -1245,7 +1405,7 @@ static void kvm_exit_debug(void) | |||
1245 | 1405 | ||
1246 | for (p = debugfs_entries; p->name; ++p) | 1406 | for (p = debugfs_entries; p->name; ++p) |
1247 | debugfs_remove(p->dentry); | 1407 | debugfs_remove(p->dentry); |
1248 | debugfs_remove(debugfs_dir); | 1408 | debugfs_remove(kvm_debugfs_dir); |
1249 | } | 1409 | } |
1250 | 1410 | ||
1251 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) | 1411 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) |
@@ -1272,6 +1432,7 @@ static struct sys_device kvm_sysdev = { | |||
1272 | }; | 1432 | }; |
1273 | 1433 | ||
1274 | struct page *bad_page; | 1434 | struct page *bad_page; |
1435 | pfn_t bad_pfn; | ||
1275 | 1436 | ||
1276 | static inline | 1437 | static inline |
1277 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) | 1438 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) |
@@ -1313,6 +1474,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
1313 | goto out; | 1474 | goto out; |
1314 | } | 1475 | } |
1315 | 1476 | ||
1477 | bad_pfn = page_to_pfn(bad_page); | ||
1478 | |||
1316 | r = kvm_arch_hardware_setup(); | 1479 | r = kvm_arch_hardware_setup(); |
1317 | if (r < 0) | 1480 | if (r < 0) |
1318 | goto out_free_0; | 1481 | goto out_free_0; |
@@ -1386,6 +1549,7 @@ EXPORT_SYMBOL_GPL(kvm_init); | |||
1386 | 1549 | ||
1387 | void kvm_exit(void) | 1550 | void kvm_exit(void) |
1388 | { | 1551 | { |
1552 | kvm_trace_cleanup(); | ||
1389 | misc_deregister(&kvm_dev); | 1553 | misc_deregister(&kvm_dev); |
1390 | kmem_cache_destroy(kvm_vcpu_cache); | 1554 | kmem_cache_destroy(kvm_vcpu_cache); |
1391 | sysdev_unregister(&kvm_sysdev); | 1555 | sysdev_unregister(&kvm_sysdev); |
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c new file mode 100644 index 000000000000..0e495470788d --- /dev/null +++ b/virt/kvm/kvm_trace.c | |||
@@ -0,0 +1,276 @@ | |||
1 | /* | ||
2 | * kvm trace | ||
3 | * | ||
4 | * It is designed to allow debugging traces of kvm to be generated | ||
5 | * on UP / SMP machines. Each trace entry can be timestamped so that | ||
6 | * it's possible to reconstruct a chronological record of trace events. | ||
7 | * The implementation refers to blktrace kernel support. | ||
8 | * | ||
9 | * Copyright (c) 2008 Intel Corporation | ||
10 | * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> | ||
11 | * | ||
12 | * Authors: Feng(Eric) Liu, eric.e.liu@intel.com | ||
13 | * | ||
14 | * Date: Feb 2008 | ||
15 | */ | ||
16 | |||
17 | #include <linux/module.h> | ||
18 | #include <linux/relay.h> | ||
19 | #include <linux/debugfs.h> | ||
20 | |||
21 | #include <linux/kvm_host.h> | ||
22 | |||
23 | #define KVM_TRACE_STATE_RUNNING (1 << 0) | ||
24 | #define KVM_TRACE_STATE_PAUSE (1 << 1) | ||
25 | #define KVM_TRACE_STATE_CLEARUP (1 << 2) | ||
26 | |||
27 | struct kvm_trace { | ||
28 | int trace_state; | ||
29 | struct rchan *rchan; | ||
30 | struct dentry *lost_file; | ||
31 | atomic_t lost_records; | ||
32 | }; | ||
33 | static struct kvm_trace *kvm_trace; | ||
34 | |||
35 | struct kvm_trace_probe { | ||
36 | const char *name; | ||
37 | const char *format; | ||
38 | u32 cycle_in; | ||
39 | marker_probe_func *probe_func; | ||
40 | }; | ||
41 | |||
42 | static inline int calc_rec_size(int cycle, int extra) | ||
43 | { | ||
44 | int rec_size = KVM_TRC_HEAD_SIZE; | ||
45 | |||
46 | rec_size += extra; | ||
47 | return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; | ||
48 | } | ||
49 | |||
50 | static void kvm_add_trace(void *probe_private, void *call_data, | ||
51 | const char *format, va_list *args) | ||
52 | { | ||
53 | struct kvm_trace_probe *p = probe_private; | ||
54 | struct kvm_trace *kt = kvm_trace; | ||
55 | struct kvm_trace_rec rec; | ||
56 | struct kvm_vcpu *vcpu; | ||
57 | int i, extra, size; | ||
58 | |||
59 | if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) | ||
60 | return; | ||
61 | |||
62 | rec.event = va_arg(*args, u32); | ||
63 | vcpu = va_arg(*args, struct kvm_vcpu *); | ||
64 | rec.pid = current->tgid; | ||
65 | rec.vcpu_id = vcpu->vcpu_id; | ||
66 | |||
67 | extra = va_arg(*args, u32); | ||
68 | WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); | ||
69 | extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); | ||
70 | rec.extra_u32 = extra; | ||
71 | |||
72 | rec.cycle_in = p->cycle_in; | ||
73 | |||
74 | if (rec.cycle_in) { | ||
75 | u64 cycle = 0; | ||
76 | |||
77 | cycle = get_cycles(); | ||
78 | rec.u.cycle.cycle_lo = (u32)cycle; | ||
79 | rec.u.cycle.cycle_hi = (u32)(cycle >> 32); | ||
80 | |||
81 | for (i = 0; i < rec.extra_u32; i++) | ||
82 | rec.u.cycle.extra_u32[i] = va_arg(*args, u32); | ||
83 | } else { | ||
84 | for (i = 0; i < rec.extra_u32; i++) | ||
85 | rec.u.nocycle.extra_u32[i] = va_arg(*args, u32); | ||
86 | } | ||
87 | |||
88 | size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32)); | ||
89 | relay_write(kt->rchan, &rec, size); | ||
90 | } | ||
91 | |||
92 | static struct kvm_trace_probe kvm_trace_probes[] = { | ||
93 | { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace }, | ||
94 | { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace }, | ||
95 | }; | ||
96 | |||
97 | static int lost_records_get(void *data, u64 *val) | ||
98 | { | ||
99 | struct kvm_trace *kt = data; | ||
100 | |||
101 | *val = atomic_read(&kt->lost_records); | ||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n"); | ||
106 | |||
107 | /* | ||
108 | * The relay channel is used in "no-overwrite" mode, it keeps trace of how | ||
109 | * many times we encountered a full subbuffer, to tell user space app the | ||
110 | * lost records there were. | ||
111 | */ | ||
112 | static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, | ||
113 | void *prev_subbuf, size_t prev_padding) | ||
114 | { | ||
115 | struct kvm_trace *kt; | ||
116 | |||
117 | if (!relay_buf_full(buf)) | ||
118 | return 1; | ||
119 | |||
120 | kt = buf->chan->private_data; | ||
121 | atomic_inc(&kt->lost_records); | ||
122 | |||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | static struct dentry *kvm_create_buf_file_callack(const char *filename, | ||
127 | struct dentry *parent, | ||
128 | int mode, | ||
129 | struct rchan_buf *buf, | ||
130 | int *is_global) | ||
131 | { | ||
132 | return debugfs_create_file(filename, mode, parent, buf, | ||
133 | &relay_file_operations); | ||
134 | } | ||
135 | |||
136 | static int kvm_remove_buf_file_callback(struct dentry *dentry) | ||
137 | { | ||
138 | debugfs_remove(dentry); | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | static struct rchan_callbacks kvm_relay_callbacks = { | ||
143 | .subbuf_start = kvm_subbuf_start_callback, | ||
144 | .create_buf_file = kvm_create_buf_file_callack, | ||
145 | .remove_buf_file = kvm_remove_buf_file_callback, | ||
146 | }; | ||
147 | |||
148 | static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts) | ||
149 | { | ||
150 | struct kvm_trace *kt; | ||
151 | int i, r = -ENOMEM; | ||
152 | |||
153 | if (!kuts->buf_size || !kuts->buf_nr) | ||
154 | return -EINVAL; | ||
155 | |||
156 | kt = kzalloc(sizeof(*kt), GFP_KERNEL); | ||
157 | if (!kt) | ||
158 | goto err; | ||
159 | |||
160 | r = -EIO; | ||
161 | atomic_set(&kt->lost_records, 0); | ||
162 | kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir, | ||
163 | kt, &kvm_trace_lost_ops); | ||
164 | if (!kt->lost_file) | ||
165 | goto err; | ||
166 | |||
167 | kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size, | ||
168 | kuts->buf_nr, &kvm_relay_callbacks, kt); | ||
169 | if (!kt->rchan) | ||
170 | goto err; | ||
171 | |||
172 | kvm_trace = kt; | ||
173 | |||
174 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | ||
175 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | ||
176 | |||
177 | r = marker_probe_register(p->name, p->format, p->probe_func, p); | ||
178 | if (r) | ||
179 | printk(KERN_INFO "Unable to register probe %s\n", | ||
180 | p->name); | ||
181 | } | ||
182 | |||
183 | kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING; | ||
184 | |||
185 | return 0; | ||
186 | err: | ||
187 | if (kt) { | ||
188 | if (kt->lost_file) | ||
189 | debugfs_remove(kt->lost_file); | ||
190 | if (kt->rchan) | ||
191 | relay_close(kt->rchan); | ||
192 | kfree(kt); | ||
193 | } | ||
194 | return r; | ||
195 | } | ||
196 | |||
197 | static int kvm_trace_enable(char __user *arg) | ||
198 | { | ||
199 | struct kvm_user_trace_setup kuts; | ||
200 | int ret; | ||
201 | |||
202 | ret = copy_from_user(&kuts, arg, sizeof(kuts)); | ||
203 | if (ret) | ||
204 | return -EFAULT; | ||
205 | |||
206 | ret = do_kvm_trace_enable(&kuts); | ||
207 | if (ret) | ||
208 | return ret; | ||
209 | |||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | static int kvm_trace_pause(void) | ||
214 | { | ||
215 | struct kvm_trace *kt = kvm_trace; | ||
216 | int r = -EINVAL; | ||
217 | |||
218 | if (kt == NULL) | ||
219 | return r; | ||
220 | |||
221 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING) { | ||
222 | kt->trace_state = KVM_TRACE_STATE_PAUSE; | ||
223 | relay_flush(kt->rchan); | ||
224 | r = 0; | ||
225 | } | ||
226 | |||
227 | return r; | ||
228 | } | ||
229 | |||
230 | void kvm_trace_cleanup(void) | ||
231 | { | ||
232 | struct kvm_trace *kt = kvm_trace; | ||
233 | int i; | ||
234 | |||
235 | if (kt == NULL) | ||
236 | return; | ||
237 | |||
238 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING || | ||
239 | kt->trace_state == KVM_TRACE_STATE_PAUSE) { | ||
240 | |||
241 | kt->trace_state = KVM_TRACE_STATE_CLEARUP; | ||
242 | |||
243 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | ||
244 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | ||
245 | marker_probe_unregister(p->name, p->probe_func, p); | ||
246 | } | ||
247 | |||
248 | relay_close(kt->rchan); | ||
249 | debugfs_remove(kt->lost_file); | ||
250 | kfree(kt); | ||
251 | } | ||
252 | } | ||
253 | |||
254 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) | ||
255 | { | ||
256 | void __user *argp = (void __user *)arg; | ||
257 | long r = -EINVAL; | ||
258 | |||
259 | if (!capable(CAP_SYS_ADMIN)) | ||
260 | return -EPERM; | ||
261 | |||
262 | switch (ioctl) { | ||
263 | case KVM_TRACE_ENABLE: | ||
264 | r = kvm_trace_enable(argp); | ||
265 | break; | ||
266 | case KVM_TRACE_PAUSE: | ||
267 | r = kvm_trace_pause(); | ||
268 | break; | ||
269 | case KVM_TRACE_DISABLE: | ||
270 | r = 0; | ||
271 | kvm_trace_cleanup(); | ||
272 | break; | ||
273 | } | ||
274 | |||
275 | return r; | ||
276 | } | ||