aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/kvm_main.c230
-rw-r--r--virt/kvm/kvm_trace.c276
2 files changed, 473 insertions, 33 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b2e12893e3f4..c82cf15730a1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -40,6 +40,7 @@
40#include <linux/kvm_para.h> 40#include <linux/kvm_para.h>
41#include <linux/pagemap.h> 41#include <linux/pagemap.h>
42#include <linux/mman.h> 42#include <linux/mman.h>
43#include <linux/swap.h>
43 44
44#include <asm/processor.h> 45#include <asm/processor.h>
45#include <asm/io.h> 46#include <asm/io.h>
@@ -59,7 +60,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
59 60
60static __read_mostly struct preempt_ops kvm_preempt_ops; 61static __read_mostly struct preempt_ops kvm_preempt_ops;
61 62
62static struct dentry *debugfs_dir; 63struct dentry *kvm_debugfs_dir;
63 64
64static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 65static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
65 unsigned long arg); 66 unsigned long arg);
@@ -119,6 +120,29 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
119 smp_call_function_mask(cpus, ack_flush, NULL, 1); 120 smp_call_function_mask(cpus, ack_flush, NULL, 1);
120} 121}
121 122
123void kvm_reload_remote_mmus(struct kvm *kvm)
124{
125 int i, cpu;
126 cpumask_t cpus;
127 struct kvm_vcpu *vcpu;
128
129 cpus_clear(cpus);
130 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
131 vcpu = kvm->vcpus[i];
132 if (!vcpu)
133 continue;
134 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
135 continue;
136 cpu = vcpu->cpu;
137 if (cpu != -1 && cpu != raw_smp_processor_id())
138 cpu_set(cpu, cpus);
139 }
140 if (cpus_empty(cpus))
141 return;
142 smp_call_function_mask(cpus, ack_flush, NULL, 1);
143}
144
145
122int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 146int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
123{ 147{
124 struct page *page; 148 struct page *page;
@@ -170,6 +194,7 @@ static struct kvm *kvm_create_vm(void)
170 mutex_init(&kvm->lock); 194 mutex_init(&kvm->lock);
171 kvm_io_bus_init(&kvm->mmio_bus); 195 kvm_io_bus_init(&kvm->mmio_bus);
172 init_rwsem(&kvm->slots_lock); 196 init_rwsem(&kvm->slots_lock);
197 atomic_set(&kvm->users_count, 1);
173 spin_lock(&kvm_lock); 198 spin_lock(&kvm_lock);
174 list_add(&kvm->vm_list, &vm_list); 199 list_add(&kvm->vm_list, &vm_list);
175 spin_unlock(&kvm_lock); 200 spin_unlock(&kvm_lock);
@@ -189,9 +214,13 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
189 if (!dont || free->dirty_bitmap != dont->dirty_bitmap) 214 if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
190 vfree(free->dirty_bitmap); 215 vfree(free->dirty_bitmap);
191 216
217 if (!dont || free->lpage_info != dont->lpage_info)
218 vfree(free->lpage_info);
219
192 free->npages = 0; 220 free->npages = 0;
193 free->dirty_bitmap = NULL; 221 free->dirty_bitmap = NULL;
194 free->rmap = NULL; 222 free->rmap = NULL;
223 free->lpage_info = NULL;
195} 224}
196 225
197void kvm_free_physmem(struct kvm *kvm) 226void kvm_free_physmem(struct kvm *kvm)
@@ -215,11 +244,25 @@ static void kvm_destroy_vm(struct kvm *kvm)
215 mmdrop(mm); 244 mmdrop(mm);
216} 245}
217 246
247void kvm_get_kvm(struct kvm *kvm)
248{
249 atomic_inc(&kvm->users_count);
250}
251EXPORT_SYMBOL_GPL(kvm_get_kvm);
252
253void kvm_put_kvm(struct kvm *kvm)
254{
255 if (atomic_dec_and_test(&kvm->users_count))
256 kvm_destroy_vm(kvm);
257}
258EXPORT_SYMBOL_GPL(kvm_put_kvm);
259
260
218static int kvm_vm_release(struct inode *inode, struct file *filp) 261static int kvm_vm_release(struct inode *inode, struct file *filp)
219{ 262{
220 struct kvm *kvm = filp->private_data; 263 struct kvm *kvm = filp->private_data;
221 264
222 kvm_destroy_vm(kvm); 265 kvm_put_kvm(kvm);
223 return 0; 266 return 0;
224} 267}
225 268
@@ -301,6 +344,25 @@ int __kvm_set_memory_region(struct kvm *kvm,
301 new.user_alloc = user_alloc; 344 new.user_alloc = user_alloc;
302 new.userspace_addr = mem->userspace_addr; 345 new.userspace_addr = mem->userspace_addr;
303 } 346 }
347 if (npages && !new.lpage_info) {
348 int largepages = npages / KVM_PAGES_PER_HPAGE;
349 if (npages % KVM_PAGES_PER_HPAGE)
350 largepages++;
351 if (base_gfn % KVM_PAGES_PER_HPAGE)
352 largepages++;
353
354 new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info));
355
356 if (!new.lpage_info)
357 goto out_free;
358
359 memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info));
360
361 if (base_gfn % KVM_PAGES_PER_HPAGE)
362 new.lpage_info[0].write_count = 1;
363 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE)
364 new.lpage_info[largepages-1].write_count = 1;
365 }
304 366
305 /* Allocate page dirty bitmap if needed */ 367 /* Allocate page dirty bitmap if needed */
306 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { 368 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
@@ -397,6 +459,12 @@ int is_error_page(struct page *page)
397} 459}
398EXPORT_SYMBOL_GPL(is_error_page); 460EXPORT_SYMBOL_GPL(is_error_page);
399 461
462int is_error_pfn(pfn_t pfn)
463{
464 return pfn == bad_pfn;
465}
466EXPORT_SYMBOL_GPL(is_error_pfn);
467
400static inline unsigned long bad_hva(void) 468static inline unsigned long bad_hva(void)
401{ 469{
402 return PAGE_OFFSET; 470 return PAGE_OFFSET;
@@ -444,7 +512,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
444} 512}
445EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 513EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
446 514
447static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 515unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
448{ 516{
449 struct kvm_memory_slot *slot; 517 struct kvm_memory_slot *slot;
450 518
@@ -458,7 +526,7 @@ static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
458/* 526/*
459 * Requires current->mm->mmap_sem to be held 527 * Requires current->mm->mmap_sem to be held
460 */ 528 */
461struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 529pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
462{ 530{
463 struct page *page[1]; 531 struct page *page[1];
464 unsigned long addr; 532 unsigned long addr;
@@ -469,7 +537,7 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
469 addr = gfn_to_hva(kvm, gfn); 537 addr = gfn_to_hva(kvm, gfn);
470 if (kvm_is_error_hva(addr)) { 538 if (kvm_is_error_hva(addr)) {
471 get_page(bad_page); 539 get_page(bad_page);
472 return bad_page; 540 return page_to_pfn(bad_page);
473 } 541 }
474 542
475 npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, 543 npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page,
@@ -477,27 +545,71 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
477 545
478 if (npages != 1) { 546 if (npages != 1) {
479 get_page(bad_page); 547 get_page(bad_page);
480 return bad_page; 548 return page_to_pfn(bad_page);
481 } 549 }
482 550
483 return page[0]; 551 return page_to_pfn(page[0]);
552}
553
554EXPORT_SYMBOL_GPL(gfn_to_pfn);
555
556struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
557{
558 return pfn_to_page(gfn_to_pfn(kvm, gfn));
484} 559}
485 560
486EXPORT_SYMBOL_GPL(gfn_to_page); 561EXPORT_SYMBOL_GPL(gfn_to_page);
487 562
488void kvm_release_page_clean(struct page *page) 563void kvm_release_page_clean(struct page *page)
489{ 564{
490 put_page(page); 565 kvm_release_pfn_clean(page_to_pfn(page));
491} 566}
492EXPORT_SYMBOL_GPL(kvm_release_page_clean); 567EXPORT_SYMBOL_GPL(kvm_release_page_clean);
493 568
569void kvm_release_pfn_clean(pfn_t pfn)
570{
571 put_page(pfn_to_page(pfn));
572}
573EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
574
494void kvm_release_page_dirty(struct page *page) 575void kvm_release_page_dirty(struct page *page)
495{ 576{
577 kvm_release_pfn_dirty(page_to_pfn(page));
578}
579EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
580
581void kvm_release_pfn_dirty(pfn_t pfn)
582{
583 kvm_set_pfn_dirty(pfn);
584 kvm_release_pfn_clean(pfn);
585}
586EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
587
588void kvm_set_page_dirty(struct page *page)
589{
590 kvm_set_pfn_dirty(page_to_pfn(page));
591}
592EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
593
594void kvm_set_pfn_dirty(pfn_t pfn)
595{
596 struct page *page = pfn_to_page(pfn);
496 if (!PageReserved(page)) 597 if (!PageReserved(page))
497 SetPageDirty(page); 598 SetPageDirty(page);
498 put_page(page);
499} 599}
500EXPORT_SYMBOL_GPL(kvm_release_page_dirty); 600EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
601
602void kvm_set_pfn_accessed(pfn_t pfn)
603{
604 mark_page_accessed(pfn_to_page(pfn));
605}
606EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
607
608void kvm_get_pfn(pfn_t pfn)
609{
610 get_page(pfn_to_page(pfn));
611}
612EXPORT_SYMBOL_GPL(kvm_get_pfn);
501 613
502static int next_segment(unsigned long len, int offset) 614static int next_segment(unsigned long len, int offset)
503{ 615{
@@ -554,7 +666,9 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
554 addr = gfn_to_hva(kvm, gfn); 666 addr = gfn_to_hva(kvm, gfn);
555 if (kvm_is_error_hva(addr)) 667 if (kvm_is_error_hva(addr))
556 return -EFAULT; 668 return -EFAULT;
669 pagefault_disable();
557 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); 670 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
671 pagefault_enable();
558 if (r) 672 if (r)
559 return -EFAULT; 673 return -EFAULT;
560 return 0; 674 return 0;
@@ -651,6 +765,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
651 * We will block until either an interrupt or a signal wakes us up 765 * We will block until either an interrupt or a signal wakes us up
652 */ 766 */
653 while (!kvm_cpu_has_interrupt(vcpu) 767 while (!kvm_cpu_has_interrupt(vcpu)
768 && !kvm_cpu_has_pending_timer(vcpu)
654 && !signal_pending(current) 769 && !signal_pending(current)
655 && !kvm_arch_vcpu_runnable(vcpu)) { 770 && !kvm_arch_vcpu_runnable(vcpu)) {
656 set_current_state(TASK_INTERRUPTIBLE); 771 set_current_state(TASK_INTERRUPTIBLE);
@@ -678,8 +793,10 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
678 793
679 if (vmf->pgoff == 0) 794 if (vmf->pgoff == 0)
680 page = virt_to_page(vcpu->run); 795 page = virt_to_page(vcpu->run);
796#ifdef CONFIG_X86
681 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) 797 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
682 page = virt_to_page(vcpu->arch.pio_data); 798 page = virt_to_page(vcpu->arch.pio_data);
799#endif
683 else 800 else
684 return VM_FAULT_SIGBUS; 801 return VM_FAULT_SIGBUS;
685 get_page(page); 802 get_page(page);
@@ -701,11 +818,11 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
701{ 818{
702 struct kvm_vcpu *vcpu = filp->private_data; 819 struct kvm_vcpu *vcpu = filp->private_data;
703 820
704 fput(vcpu->kvm->filp); 821 kvm_put_kvm(vcpu->kvm);
705 return 0; 822 return 0;
706} 823}
707 824
708static struct file_operations kvm_vcpu_fops = { 825static const struct file_operations kvm_vcpu_fops = {
709 .release = kvm_vcpu_release, 826 .release = kvm_vcpu_release,
710 .unlocked_ioctl = kvm_vcpu_ioctl, 827 .unlocked_ioctl = kvm_vcpu_ioctl,
711 .compat_ioctl = kvm_vcpu_ioctl, 828 .compat_ioctl = kvm_vcpu_ioctl,
@@ -723,9 +840,10 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
723 840
724 r = anon_inode_getfd(&fd, &inode, &file, 841 r = anon_inode_getfd(&fd, &inode, &file,
725 "kvm-vcpu", &kvm_vcpu_fops, vcpu); 842 "kvm-vcpu", &kvm_vcpu_fops, vcpu);
726 if (r) 843 if (r) {
844 kvm_put_kvm(vcpu->kvm);
727 return r; 845 return r;
728 atomic_inc(&vcpu->kvm->filp->f_count); 846 }
729 return fd; 847 return fd;
730} 848}
731 849
@@ -760,6 +878,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
760 mutex_unlock(&kvm->lock); 878 mutex_unlock(&kvm->lock);
761 879
762 /* Now it's all set up, let userspace reach it */ 880 /* Now it's all set up, let userspace reach it */
881 kvm_get_kvm(kvm);
763 r = create_vcpu_fd(vcpu); 882 r = create_vcpu_fd(vcpu);
764 if (r < 0) 883 if (r < 0)
765 goto unlink; 884 goto unlink;
@@ -802,28 +921,39 @@ static long kvm_vcpu_ioctl(struct file *filp,
802 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); 921 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
803 break; 922 break;
804 case KVM_GET_REGS: { 923 case KVM_GET_REGS: {
805 struct kvm_regs kvm_regs; 924 struct kvm_regs *kvm_regs;
806 925
807 memset(&kvm_regs, 0, sizeof kvm_regs); 926 r = -ENOMEM;
808 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, &kvm_regs); 927 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
809 if (r) 928 if (!kvm_regs)
810 goto out; 929 goto out;
930 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
931 if (r)
932 goto out_free1;
811 r = -EFAULT; 933 r = -EFAULT;
812 if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs)) 934 if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs)))
813 goto out; 935 goto out_free1;
814 r = 0; 936 r = 0;
937out_free1:
938 kfree(kvm_regs);
815 break; 939 break;
816 } 940 }
817 case KVM_SET_REGS: { 941 case KVM_SET_REGS: {
818 struct kvm_regs kvm_regs; 942 struct kvm_regs *kvm_regs;
819 943
820 r = -EFAULT; 944 r = -ENOMEM;
821 if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs)) 945 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
946 if (!kvm_regs)
822 goto out; 947 goto out;
823 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, &kvm_regs); 948 r = -EFAULT;
949 if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs)))
950 goto out_free2;
951 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs);
824 if (r) 952 if (r)
825 goto out; 953 goto out_free2;
826 r = 0; 954 r = 0;
955out_free2:
956 kfree(kvm_regs);
827 break; 957 break;
828 } 958 }
829 case KVM_GET_SREGS: { 959 case KVM_GET_SREGS: {
@@ -851,6 +981,30 @@ static long kvm_vcpu_ioctl(struct file *filp,
851 r = 0; 981 r = 0;
852 break; 982 break;
853 } 983 }
984 case KVM_GET_MP_STATE: {
985 struct kvm_mp_state mp_state;
986
987 r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state);
988 if (r)
989 goto out;
990 r = -EFAULT;
991 if (copy_to_user(argp, &mp_state, sizeof mp_state))
992 goto out;
993 r = 0;
994 break;
995 }
996 case KVM_SET_MP_STATE: {
997 struct kvm_mp_state mp_state;
998
999 r = -EFAULT;
1000 if (copy_from_user(&mp_state, argp, sizeof mp_state))
1001 goto out;
1002 r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
1003 if (r)
1004 goto out;
1005 r = 0;
1006 break;
1007 }
854 case KVM_TRANSLATE: { 1008 case KVM_TRANSLATE: {
855 struct kvm_translation tr; 1009 struct kvm_translation tr;
856 1010
@@ -1005,7 +1159,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
1005 return 0; 1159 return 0;
1006} 1160}
1007 1161
1008static struct file_operations kvm_vm_fops = { 1162static const struct file_operations kvm_vm_fops = {
1009 .release = kvm_vm_release, 1163 .release = kvm_vm_release,
1010 .unlocked_ioctl = kvm_vm_ioctl, 1164 .unlocked_ioctl = kvm_vm_ioctl,
1011 .compat_ioctl = kvm_vm_ioctl, 1165 .compat_ioctl = kvm_vm_ioctl,
@@ -1024,12 +1178,10 @@ static int kvm_dev_ioctl_create_vm(void)
1024 return PTR_ERR(kvm); 1178 return PTR_ERR(kvm);
1025 r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm); 1179 r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
1026 if (r) { 1180 if (r) {
1027 kvm_destroy_vm(kvm); 1181 kvm_put_kvm(kvm);
1028 return r; 1182 return r;
1029 } 1183 }
1030 1184
1031 kvm->filp = file;
1032
1033 return fd; 1185 return fd;
1034} 1186}
1035 1187
@@ -1059,7 +1211,15 @@ static long kvm_dev_ioctl(struct file *filp,
1059 r = -EINVAL; 1211 r = -EINVAL;
1060 if (arg) 1212 if (arg)
1061 goto out; 1213 goto out;
1062 r = 2 * PAGE_SIZE; 1214 r = PAGE_SIZE; /* struct kvm_run */
1215#ifdef CONFIG_X86
1216 r += PAGE_SIZE; /* pio data page */
1217#endif
1218 break;
1219 case KVM_TRACE_ENABLE:
1220 case KVM_TRACE_PAUSE:
1221 case KVM_TRACE_DISABLE:
1222 r = kvm_trace_ioctl(ioctl, arg);
1063 break; 1223 break;
1064 default: 1224 default:
1065 return kvm_arch_dev_ioctl(filp, ioctl, arg); 1225 return kvm_arch_dev_ioctl(filp, ioctl, arg);
@@ -1232,9 +1392,9 @@ static void kvm_init_debug(void)
1232{ 1392{
1233 struct kvm_stats_debugfs_item *p; 1393 struct kvm_stats_debugfs_item *p;
1234 1394
1235 debugfs_dir = debugfs_create_dir("kvm", NULL); 1395 kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
1236 for (p = debugfs_entries; p->name; ++p) 1396 for (p = debugfs_entries; p->name; ++p)
1237 p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir, 1397 p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
1238 (void *)(long)p->offset, 1398 (void *)(long)p->offset,
1239 stat_fops[p->kind]); 1399 stat_fops[p->kind]);
1240} 1400}
@@ -1245,7 +1405,7 @@ static void kvm_exit_debug(void)
1245 1405
1246 for (p = debugfs_entries; p->name; ++p) 1406 for (p = debugfs_entries; p->name; ++p)
1247 debugfs_remove(p->dentry); 1407 debugfs_remove(p->dentry);
1248 debugfs_remove(debugfs_dir); 1408 debugfs_remove(kvm_debugfs_dir);
1249} 1409}
1250 1410
1251static int kvm_suspend(struct sys_device *dev, pm_message_t state) 1411static int kvm_suspend(struct sys_device *dev, pm_message_t state)
@@ -1272,6 +1432,7 @@ static struct sys_device kvm_sysdev = {
1272}; 1432};
1273 1433
1274struct page *bad_page; 1434struct page *bad_page;
1435pfn_t bad_pfn;
1275 1436
1276static inline 1437static inline
1277struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) 1438struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
@@ -1313,6 +1474,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
1313 goto out; 1474 goto out;
1314 } 1475 }
1315 1476
1477 bad_pfn = page_to_pfn(bad_page);
1478
1316 r = kvm_arch_hardware_setup(); 1479 r = kvm_arch_hardware_setup();
1317 if (r < 0) 1480 if (r < 0)
1318 goto out_free_0; 1481 goto out_free_0;
@@ -1386,6 +1549,7 @@ EXPORT_SYMBOL_GPL(kvm_init);
1386 1549
1387void kvm_exit(void) 1550void kvm_exit(void)
1388{ 1551{
1552 kvm_trace_cleanup();
1389 misc_deregister(&kvm_dev); 1553 misc_deregister(&kvm_dev);
1390 kmem_cache_destroy(kvm_vcpu_cache); 1554 kmem_cache_destroy(kvm_vcpu_cache);
1391 sysdev_unregister(&kvm_sysdev); 1555 sysdev_unregister(&kvm_sysdev);
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
new file mode 100644
index 000000000000..0e495470788d
--- /dev/null
+++ b/virt/kvm/kvm_trace.c
@@ -0,0 +1,276 @@
1/*
2 * kvm trace
3 *
4 * It is designed to allow debugging traces of kvm to be generated
5 * on UP / SMP machines. Each trace entry can be timestamped so that
6 * it's possible to reconstruct a chronological record of trace events.
7 * The implementation refers to blktrace kernel support.
8 *
9 * Copyright (c) 2008 Intel Corporation
10 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
11 *
12 * Authors: Feng(Eric) Liu, eric.e.liu@intel.com
13 *
14 * Date: Feb 2008
15 */
16
17#include <linux/module.h>
18#include <linux/relay.h>
19#include <linux/debugfs.h>
20
21#include <linux/kvm_host.h>
22
23#define KVM_TRACE_STATE_RUNNING (1 << 0)
24#define KVM_TRACE_STATE_PAUSE (1 << 1)
25#define KVM_TRACE_STATE_CLEARUP (1 << 2)
26
27struct kvm_trace {
28 int trace_state;
29 struct rchan *rchan;
30 struct dentry *lost_file;
31 atomic_t lost_records;
32};
33static struct kvm_trace *kvm_trace;
34
35struct kvm_trace_probe {
36 const char *name;
37 const char *format;
38 u32 cycle_in;
39 marker_probe_func *probe_func;
40};
41
42static inline int calc_rec_size(int cycle, int extra)
43{
44 int rec_size = KVM_TRC_HEAD_SIZE;
45
46 rec_size += extra;
47 return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
48}
49
50static void kvm_add_trace(void *probe_private, void *call_data,
51 const char *format, va_list *args)
52{
53 struct kvm_trace_probe *p = probe_private;
54 struct kvm_trace *kt = kvm_trace;
55 struct kvm_trace_rec rec;
56 struct kvm_vcpu *vcpu;
57 int i, extra, size;
58
59 if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
60 return;
61
62 rec.event = va_arg(*args, u32);
63 vcpu = va_arg(*args, struct kvm_vcpu *);
64 rec.pid = current->tgid;
65 rec.vcpu_id = vcpu->vcpu_id;
66
67 extra = va_arg(*args, u32);
68 WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
69 extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX);
70 rec.extra_u32 = extra;
71
72 rec.cycle_in = p->cycle_in;
73
74 if (rec.cycle_in) {
75 u64 cycle = 0;
76
77 cycle = get_cycles();
78 rec.u.cycle.cycle_lo = (u32)cycle;
79 rec.u.cycle.cycle_hi = (u32)(cycle >> 32);
80
81 for (i = 0; i < rec.extra_u32; i++)
82 rec.u.cycle.extra_u32[i] = va_arg(*args, u32);
83 } else {
84 for (i = 0; i < rec.extra_u32; i++)
85 rec.u.nocycle.extra_u32[i] = va_arg(*args, u32);
86 }
87
88 size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32));
89 relay_write(kt->rchan, &rec, size);
90}
91
92static struct kvm_trace_probe kvm_trace_probes[] = {
93 { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
94 { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
95};
96
97static int lost_records_get(void *data, u64 *val)
98{
99 struct kvm_trace *kt = data;
100
101 *val = atomic_read(&kt->lost_records);
102 return 0;
103}
104
105DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");
106
107/*
108 * The relay channel is used in "no-overwrite" mode, it keeps trace of how
109 * many times we encountered a full subbuffer, to tell user space app the
110 * lost records there were.
111 */
112static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
113 void *prev_subbuf, size_t prev_padding)
114{
115 struct kvm_trace *kt;
116
117 if (!relay_buf_full(buf))
118 return 1;
119
120 kt = buf->chan->private_data;
121 atomic_inc(&kt->lost_records);
122
123 return 0;
124}
125
126static struct dentry *kvm_create_buf_file_callack(const char *filename,
127 struct dentry *parent,
128 int mode,
129 struct rchan_buf *buf,
130 int *is_global)
131{
132 return debugfs_create_file(filename, mode, parent, buf,
133 &relay_file_operations);
134}
135
136static int kvm_remove_buf_file_callback(struct dentry *dentry)
137{
138 debugfs_remove(dentry);
139 return 0;
140}
141
142static struct rchan_callbacks kvm_relay_callbacks = {
143 .subbuf_start = kvm_subbuf_start_callback,
144 .create_buf_file = kvm_create_buf_file_callack,
145 .remove_buf_file = kvm_remove_buf_file_callback,
146};
147
148static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
149{
150 struct kvm_trace *kt;
151 int i, r = -ENOMEM;
152
153 if (!kuts->buf_size || !kuts->buf_nr)
154 return -EINVAL;
155
156 kt = kzalloc(sizeof(*kt), GFP_KERNEL);
157 if (!kt)
158 goto err;
159
160 r = -EIO;
161 atomic_set(&kt->lost_records, 0);
162 kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir,
163 kt, &kvm_trace_lost_ops);
164 if (!kt->lost_file)
165 goto err;
166
167 kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size,
168 kuts->buf_nr, &kvm_relay_callbacks, kt);
169 if (!kt->rchan)
170 goto err;
171
172 kvm_trace = kt;
173
174 for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
175 struct kvm_trace_probe *p = &kvm_trace_probes[i];
176
177 r = marker_probe_register(p->name, p->format, p->probe_func, p);
178 if (r)
179 printk(KERN_INFO "Unable to register probe %s\n",
180 p->name);
181 }
182
183 kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;
184
185 return 0;
186err:
187 if (kt) {
188 if (kt->lost_file)
189 debugfs_remove(kt->lost_file);
190 if (kt->rchan)
191 relay_close(kt->rchan);
192 kfree(kt);
193 }
194 return r;
195}
196
197static int kvm_trace_enable(char __user *arg)
198{
199 struct kvm_user_trace_setup kuts;
200 int ret;
201
202 ret = copy_from_user(&kuts, arg, sizeof(kuts));
203 if (ret)
204 return -EFAULT;
205
206 ret = do_kvm_trace_enable(&kuts);
207 if (ret)
208 return ret;
209
210 return 0;
211}
212
213static int kvm_trace_pause(void)
214{
215 struct kvm_trace *kt = kvm_trace;
216 int r = -EINVAL;
217
218 if (kt == NULL)
219 return r;
220
221 if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
222 kt->trace_state = KVM_TRACE_STATE_PAUSE;
223 relay_flush(kt->rchan);
224 r = 0;
225 }
226
227 return r;
228}
229
230void kvm_trace_cleanup(void)
231{
232 struct kvm_trace *kt = kvm_trace;
233 int i;
234
235 if (kt == NULL)
236 return;
237
238 if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
239 kt->trace_state == KVM_TRACE_STATE_PAUSE) {
240
241 kt->trace_state = KVM_TRACE_STATE_CLEARUP;
242
243 for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
244 struct kvm_trace_probe *p = &kvm_trace_probes[i];
245 marker_probe_unregister(p->name, p->probe_func, p);
246 }
247
248 relay_close(kt->rchan);
249 debugfs_remove(kt->lost_file);
250 kfree(kt);
251 }
252}
253
254int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
255{
256 void __user *argp = (void __user *)arg;
257 long r = -EINVAL;
258
259 if (!capable(CAP_SYS_ADMIN))
260 return -EPERM;
261
262 switch (ioctl) {
263 case KVM_TRACE_ENABLE:
264 r = kvm_trace_enable(argp);
265 break;
266 case KVM_TRACE_PAUSE:
267 r = kvm_trace_pause();
268 break;
269 case KVM_TRACE_DISABLE:
270 r = 0;
271 kvm_trace_cleanup();
272 break;
273 }
274
275 return r;
276}