diff options
Diffstat (limited to 'drivers/kvm/kvm_main.c')
-rw-r--r-- | drivers/kvm/kvm_main.c | 792 |
1 files changed, 675 insertions, 117 deletions
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index dc7a8c78cbf9..c8b8cfa332bb 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c | |||
@@ -51,27 +51,27 @@ static DEFINE_SPINLOCK(kvm_lock); | |||
51 | static LIST_HEAD(vm_list); | 51 | static LIST_HEAD(vm_list); |
52 | 52 | ||
53 | struct kvm_arch_ops *kvm_arch_ops; | 53 | struct kvm_arch_ops *kvm_arch_ops; |
54 | struct kvm_stat kvm_stat; | 54 | |
55 | EXPORT_SYMBOL_GPL(kvm_stat); | 55 | #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) |
56 | 56 | ||
57 | static struct kvm_stats_debugfs_item { | 57 | static struct kvm_stats_debugfs_item { |
58 | const char *name; | 58 | const char *name; |
59 | u32 *data; | 59 | int offset; |
60 | struct dentry *dentry; | 60 | struct dentry *dentry; |
61 | } debugfs_entries[] = { | 61 | } debugfs_entries[] = { |
62 | { "pf_fixed", &kvm_stat.pf_fixed }, | 62 | { "pf_fixed", STAT_OFFSET(pf_fixed) }, |
63 | { "pf_guest", &kvm_stat.pf_guest }, | 63 | { "pf_guest", STAT_OFFSET(pf_guest) }, |
64 | { "tlb_flush", &kvm_stat.tlb_flush }, | 64 | { "tlb_flush", STAT_OFFSET(tlb_flush) }, |
65 | { "invlpg", &kvm_stat.invlpg }, | 65 | { "invlpg", STAT_OFFSET(invlpg) }, |
66 | { "exits", &kvm_stat.exits }, | 66 | { "exits", STAT_OFFSET(exits) }, |
67 | { "io_exits", &kvm_stat.io_exits }, | 67 | { "io_exits", STAT_OFFSET(io_exits) }, |
68 | { "mmio_exits", &kvm_stat.mmio_exits }, | 68 | { "mmio_exits", STAT_OFFSET(mmio_exits) }, |
69 | { "signal_exits", &kvm_stat.signal_exits }, | 69 | { "signal_exits", STAT_OFFSET(signal_exits) }, |
70 | { "irq_window", &kvm_stat.irq_window_exits }, | 70 | { "irq_window", STAT_OFFSET(irq_window_exits) }, |
71 | { "halt_exits", &kvm_stat.halt_exits }, | 71 | { "halt_exits", STAT_OFFSET(halt_exits) }, |
72 | { "request_irq", &kvm_stat.request_irq_exits }, | 72 | { "request_irq", STAT_OFFSET(request_irq_exits) }, |
73 | { "irq_exits", &kvm_stat.irq_exits }, | 73 | { "irq_exits", STAT_OFFSET(irq_exits) }, |
74 | { NULL, NULL } | 74 | { NULL } |
75 | }; | 75 | }; |
76 | 76 | ||
77 | static struct dentry *debugfs_dir; | 77 | static struct dentry *debugfs_dir; |
@@ -346,6 +346,17 @@ static void kvm_free_physmem(struct kvm *kvm) | |||
346 | kvm_free_physmem_slot(&kvm->memslots[i], NULL); | 346 | kvm_free_physmem_slot(&kvm->memslots[i], NULL); |
347 | } | 347 | } |
348 | 348 | ||
349 | static void free_pio_guest_pages(struct kvm_vcpu *vcpu) | ||
350 | { | ||
351 | int i; | ||
352 | |||
353 | for (i = 0; i < 2; ++i) | ||
354 | if (vcpu->pio.guest_pages[i]) { | ||
355 | __free_page(vcpu->pio.guest_pages[i]); | ||
356 | vcpu->pio.guest_pages[i] = NULL; | ||
357 | } | ||
358 | } | ||
359 | |||
349 | static void kvm_free_vcpu(struct kvm_vcpu *vcpu) | 360 | static void kvm_free_vcpu(struct kvm_vcpu *vcpu) |
350 | { | 361 | { |
351 | if (!vcpu->vmcs) | 362 | if (!vcpu->vmcs) |
@@ -355,6 +366,11 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu) | |||
355 | kvm_mmu_destroy(vcpu); | 366 | kvm_mmu_destroy(vcpu); |
356 | vcpu_put(vcpu); | 367 | vcpu_put(vcpu); |
357 | kvm_arch_ops->vcpu_free(vcpu); | 368 | kvm_arch_ops->vcpu_free(vcpu); |
369 | free_page((unsigned long)vcpu->run); | ||
370 | vcpu->run = NULL; | ||
371 | free_page((unsigned long)vcpu->pio_data); | ||
372 | vcpu->pio_data = NULL; | ||
373 | free_pio_guest_pages(vcpu); | ||
358 | } | 374 | } |
359 | 375 | ||
360 | static void kvm_free_vcpus(struct kvm *kvm) | 376 | static void kvm_free_vcpus(struct kvm *kvm) |
@@ -404,12 +420,12 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
404 | u64 pdpte; | 420 | u64 pdpte; |
405 | u64 *pdpt; | 421 | u64 *pdpt; |
406 | int ret; | 422 | int ret; |
407 | struct kvm_memory_slot *memslot; | 423 | struct page *page; |
408 | 424 | ||
409 | spin_lock(&vcpu->kvm->lock); | 425 | spin_lock(&vcpu->kvm->lock); |
410 | memslot = gfn_to_memslot(vcpu->kvm, pdpt_gfn); | 426 | page = gfn_to_page(vcpu->kvm, pdpt_gfn); |
411 | /* FIXME: !memslot - emulate? 0xff? */ | 427 | /* FIXME: !page - emulate? 0xff? */ |
412 | pdpt = kmap_atomic(gfn_to_page(memslot, pdpt_gfn), KM_USER0); | 428 | pdpt = kmap_atomic(page, KM_USER0); |
413 | 429 | ||
414 | ret = 1; | 430 | ret = 1; |
415 | for (i = 0; i < 4; ++i) { | 431 | for (i = 0; i < 4; ++i) { |
@@ -494,7 +510,6 @@ EXPORT_SYMBOL_GPL(set_cr0); | |||
494 | 510 | ||
495 | void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | 511 | void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) |
496 | { | 512 | { |
497 | kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); | ||
498 | set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f)); | 513 | set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f)); |
499 | } | 514 | } |
500 | EXPORT_SYMBOL_GPL(lmsw); | 515 | EXPORT_SYMBOL_GPL(lmsw); |
@@ -830,7 +845,73 @@ out: | |||
830 | return r; | 845 | return r; |
831 | } | 846 | } |
832 | 847 | ||
833 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | 848 | /* |
849 | * Set a new alias region. Aliases map a portion of physical memory into | ||
850 | * another portion. This is useful for memory windows, for example the PC | ||
851 | * VGA region. | ||
852 | */ | ||
853 | static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | ||
854 | struct kvm_memory_alias *alias) | ||
855 | { | ||
856 | int r, n; | ||
857 | struct kvm_mem_alias *p; | ||
858 | |||
859 | r = -EINVAL; | ||
860 | /* General sanity checks */ | ||
861 | if (alias->memory_size & (PAGE_SIZE - 1)) | ||
862 | goto out; | ||
863 | if (alias->guest_phys_addr & (PAGE_SIZE - 1)) | ||
864 | goto out; | ||
865 | if (alias->slot >= KVM_ALIAS_SLOTS) | ||
866 | goto out; | ||
867 | if (alias->guest_phys_addr + alias->memory_size | ||
868 | < alias->guest_phys_addr) | ||
869 | goto out; | ||
870 | if (alias->target_phys_addr + alias->memory_size | ||
871 | < alias->target_phys_addr) | ||
872 | goto out; | ||
873 | |||
874 | spin_lock(&kvm->lock); | ||
875 | |||
876 | p = &kvm->aliases[alias->slot]; | ||
877 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | ||
878 | p->npages = alias->memory_size >> PAGE_SHIFT; | ||
879 | p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; | ||
880 | |||
881 | for (n = KVM_ALIAS_SLOTS; n > 0; --n) | ||
882 | if (kvm->aliases[n - 1].npages) | ||
883 | break; | ||
884 | kvm->naliases = n; | ||
885 | |||
886 | spin_unlock(&kvm->lock); | ||
887 | |||
888 | vcpu_load(&kvm->vcpus[0]); | ||
889 | spin_lock(&kvm->lock); | ||
890 | kvm_mmu_zap_all(&kvm->vcpus[0]); | ||
891 | spin_unlock(&kvm->lock); | ||
892 | vcpu_put(&kvm->vcpus[0]); | ||
893 | |||
894 | return 0; | ||
895 | |||
896 | out: | ||
897 | return r; | ||
898 | } | ||
899 | |||
900 | static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | ||
901 | { | ||
902 | int i; | ||
903 | struct kvm_mem_alias *alias; | ||
904 | |||
905 | for (i = 0; i < kvm->naliases; ++i) { | ||
906 | alias = &kvm->aliases[i]; | ||
907 | if (gfn >= alias->base_gfn | ||
908 | && gfn < alias->base_gfn + alias->npages) | ||
909 | return alias->target_gfn + gfn - alias->base_gfn; | ||
910 | } | ||
911 | return gfn; | ||
912 | } | ||
913 | |||
914 | static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | ||
834 | { | 915 | { |
835 | int i; | 916 | int i; |
836 | 917 | ||
@@ -843,7 +924,24 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | |||
843 | } | 924 | } |
844 | return NULL; | 925 | return NULL; |
845 | } | 926 | } |
846 | EXPORT_SYMBOL_GPL(gfn_to_memslot); | 927 | |
928 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | ||
929 | { | ||
930 | gfn = unalias_gfn(kvm, gfn); | ||
931 | return __gfn_to_memslot(kvm, gfn); | ||
932 | } | ||
933 | |||
934 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | ||
935 | { | ||
936 | struct kvm_memory_slot *slot; | ||
937 | |||
938 | gfn = unalias_gfn(kvm, gfn); | ||
939 | slot = __gfn_to_memslot(kvm, gfn); | ||
940 | if (!slot) | ||
941 | return NULL; | ||
942 | return slot->phys_mem[gfn - slot->base_gfn]; | ||
943 | } | ||
944 | EXPORT_SYMBOL_GPL(gfn_to_page); | ||
847 | 945 | ||
848 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | 946 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn) |
849 | { | 947 | { |
@@ -871,7 +969,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | |||
871 | } | 969 | } |
872 | 970 | ||
873 | static int emulator_read_std(unsigned long addr, | 971 | static int emulator_read_std(unsigned long addr, |
874 | unsigned long *val, | 972 | void *val, |
875 | unsigned int bytes, | 973 | unsigned int bytes, |
876 | struct x86_emulate_ctxt *ctxt) | 974 | struct x86_emulate_ctxt *ctxt) |
877 | { | 975 | { |
@@ -883,20 +981,20 @@ static int emulator_read_std(unsigned long addr, | |||
883 | unsigned offset = addr & (PAGE_SIZE-1); | 981 | unsigned offset = addr & (PAGE_SIZE-1); |
884 | unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); | 982 | unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); |
885 | unsigned long pfn; | 983 | unsigned long pfn; |
886 | struct kvm_memory_slot *memslot; | 984 | struct page *page; |
887 | void *page; | 985 | void *page_virt; |
888 | 986 | ||
889 | if (gpa == UNMAPPED_GVA) | 987 | if (gpa == UNMAPPED_GVA) |
890 | return X86EMUL_PROPAGATE_FAULT; | 988 | return X86EMUL_PROPAGATE_FAULT; |
891 | pfn = gpa >> PAGE_SHIFT; | 989 | pfn = gpa >> PAGE_SHIFT; |
892 | memslot = gfn_to_memslot(vcpu->kvm, pfn); | 990 | page = gfn_to_page(vcpu->kvm, pfn); |
893 | if (!memslot) | 991 | if (!page) |
894 | return X86EMUL_UNHANDLEABLE; | 992 | return X86EMUL_UNHANDLEABLE; |
895 | page = kmap_atomic(gfn_to_page(memslot, pfn), KM_USER0); | 993 | page_virt = kmap_atomic(page, KM_USER0); |
896 | 994 | ||
897 | memcpy(data, page + offset, tocopy); | 995 | memcpy(data, page_virt + offset, tocopy); |
898 | 996 | ||
899 | kunmap_atomic(page, KM_USER0); | 997 | kunmap_atomic(page_virt, KM_USER0); |
900 | 998 | ||
901 | bytes -= tocopy; | 999 | bytes -= tocopy; |
902 | data += tocopy; | 1000 | data += tocopy; |
@@ -907,7 +1005,7 @@ static int emulator_read_std(unsigned long addr, | |||
907 | } | 1005 | } |
908 | 1006 | ||
909 | static int emulator_write_std(unsigned long addr, | 1007 | static int emulator_write_std(unsigned long addr, |
910 | unsigned long val, | 1008 | const void *val, |
911 | unsigned int bytes, | 1009 | unsigned int bytes, |
912 | struct x86_emulate_ctxt *ctxt) | 1010 | struct x86_emulate_ctxt *ctxt) |
913 | { | 1011 | { |
@@ -917,7 +1015,7 @@ static int emulator_write_std(unsigned long addr, | |||
917 | } | 1015 | } |
918 | 1016 | ||
919 | static int emulator_read_emulated(unsigned long addr, | 1017 | static int emulator_read_emulated(unsigned long addr, |
920 | unsigned long *val, | 1018 | void *val, |
921 | unsigned int bytes, | 1019 | unsigned int bytes, |
922 | struct x86_emulate_ctxt *ctxt) | 1020 | struct x86_emulate_ctxt *ctxt) |
923 | { | 1021 | { |
@@ -945,37 +1043,37 @@ static int emulator_read_emulated(unsigned long addr, | |||
945 | } | 1043 | } |
946 | 1044 | ||
947 | static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 1045 | static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
948 | unsigned long val, int bytes) | 1046 | const void *val, int bytes) |
949 | { | 1047 | { |
950 | struct kvm_memory_slot *m; | ||
951 | struct page *page; | 1048 | struct page *page; |
952 | void *virt; | 1049 | void *virt; |
953 | 1050 | ||
954 | if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) | 1051 | if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) |
955 | return 0; | 1052 | return 0; |
956 | m = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT); | 1053 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
957 | if (!m) | 1054 | if (!page) |
958 | return 0; | 1055 | return 0; |
959 | page = gfn_to_page(m, gpa >> PAGE_SHIFT); | ||
960 | kvm_mmu_pre_write(vcpu, gpa, bytes); | 1056 | kvm_mmu_pre_write(vcpu, gpa, bytes); |
961 | mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); | 1057 | mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); |
962 | virt = kmap_atomic(page, KM_USER0); | 1058 | virt = kmap_atomic(page, KM_USER0); |
963 | memcpy(virt + offset_in_page(gpa), &val, bytes); | 1059 | memcpy(virt + offset_in_page(gpa), val, bytes); |
964 | kunmap_atomic(virt, KM_USER0); | 1060 | kunmap_atomic(virt, KM_USER0); |
965 | kvm_mmu_post_write(vcpu, gpa, bytes); | 1061 | kvm_mmu_post_write(vcpu, gpa, bytes); |
966 | return 1; | 1062 | return 1; |
967 | } | 1063 | } |
968 | 1064 | ||
969 | static int emulator_write_emulated(unsigned long addr, | 1065 | static int emulator_write_emulated(unsigned long addr, |
970 | unsigned long val, | 1066 | const void *val, |
971 | unsigned int bytes, | 1067 | unsigned int bytes, |
972 | struct x86_emulate_ctxt *ctxt) | 1068 | struct x86_emulate_ctxt *ctxt) |
973 | { | 1069 | { |
974 | struct kvm_vcpu *vcpu = ctxt->vcpu; | 1070 | struct kvm_vcpu *vcpu = ctxt->vcpu; |
975 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); | 1071 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); |
976 | 1072 | ||
977 | if (gpa == UNMAPPED_GVA) | 1073 | if (gpa == UNMAPPED_GVA) { |
1074 | kvm_arch_ops->inject_page_fault(vcpu, addr, 2); | ||
978 | return X86EMUL_PROPAGATE_FAULT; | 1075 | return X86EMUL_PROPAGATE_FAULT; |
1076 | } | ||
979 | 1077 | ||
980 | if (emulator_write_phys(vcpu, gpa, val, bytes)) | 1078 | if (emulator_write_phys(vcpu, gpa, val, bytes)) |
981 | return X86EMUL_CONTINUE; | 1079 | return X86EMUL_CONTINUE; |
@@ -984,14 +1082,14 @@ static int emulator_write_emulated(unsigned long addr, | |||
984 | vcpu->mmio_phys_addr = gpa; | 1082 | vcpu->mmio_phys_addr = gpa; |
985 | vcpu->mmio_size = bytes; | 1083 | vcpu->mmio_size = bytes; |
986 | vcpu->mmio_is_write = 1; | 1084 | vcpu->mmio_is_write = 1; |
987 | memcpy(vcpu->mmio_data, &val, bytes); | 1085 | memcpy(vcpu->mmio_data, val, bytes); |
988 | 1086 | ||
989 | return X86EMUL_CONTINUE; | 1087 | return X86EMUL_CONTINUE; |
990 | } | 1088 | } |
991 | 1089 | ||
992 | static int emulator_cmpxchg_emulated(unsigned long addr, | 1090 | static int emulator_cmpxchg_emulated(unsigned long addr, |
993 | unsigned long old, | 1091 | const void *old, |
994 | unsigned long new, | 1092 | const void *new, |
995 | unsigned int bytes, | 1093 | unsigned int bytes, |
996 | struct x86_emulate_ctxt *ctxt) | 1094 | struct x86_emulate_ctxt *ctxt) |
997 | { | 1095 | { |
@@ -1004,30 +1102,6 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
1004 | return emulator_write_emulated(addr, new, bytes, ctxt); | 1102 | return emulator_write_emulated(addr, new, bytes, ctxt); |
1005 | } | 1103 | } |
1006 | 1104 | ||
1007 | #ifdef CONFIG_X86_32 | ||
1008 | |||
1009 | static int emulator_cmpxchg8b_emulated(unsigned long addr, | ||
1010 | unsigned long old_lo, | ||
1011 | unsigned long old_hi, | ||
1012 | unsigned long new_lo, | ||
1013 | unsigned long new_hi, | ||
1014 | struct x86_emulate_ctxt *ctxt) | ||
1015 | { | ||
1016 | static int reported; | ||
1017 | int r; | ||
1018 | |||
1019 | if (!reported) { | ||
1020 | reported = 1; | ||
1021 | printk(KERN_WARNING "kvm: emulating exchange8b as write\n"); | ||
1022 | } | ||
1023 | r = emulator_write_emulated(addr, new_lo, 4, ctxt); | ||
1024 | if (r != X86EMUL_CONTINUE) | ||
1025 | return r; | ||
1026 | return emulator_write_emulated(addr+4, new_hi, 4, ctxt); | ||
1027 | } | ||
1028 | |||
1029 | #endif | ||
1030 | |||
1031 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | 1105 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) |
1032 | { | 1106 | { |
1033 | return kvm_arch_ops->get_segment_base(vcpu, seg); | 1107 | return kvm_arch_ops->get_segment_base(vcpu, seg); |
@@ -1042,7 +1116,6 @@ int emulate_clts(struct kvm_vcpu *vcpu) | |||
1042 | { | 1116 | { |
1043 | unsigned long cr0; | 1117 | unsigned long cr0; |
1044 | 1118 | ||
1045 | kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); | ||
1046 | cr0 = vcpu->cr0 & ~CR0_TS_MASK; | 1119 | cr0 = vcpu->cr0 & ~CR0_TS_MASK; |
1047 | kvm_arch_ops->set_cr0(vcpu, cr0); | 1120 | kvm_arch_ops->set_cr0(vcpu, cr0); |
1048 | return X86EMUL_CONTINUE; | 1121 | return X86EMUL_CONTINUE; |
@@ -1102,9 +1175,6 @@ struct x86_emulate_ops emulate_ops = { | |||
1102 | .read_emulated = emulator_read_emulated, | 1175 | .read_emulated = emulator_read_emulated, |
1103 | .write_emulated = emulator_write_emulated, | 1176 | .write_emulated = emulator_write_emulated, |
1104 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 1177 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
1105 | #ifdef CONFIG_X86_32 | ||
1106 | .cmpxchg8b_emulated = emulator_cmpxchg8b_emulated, | ||
1107 | #endif | ||
1108 | }; | 1178 | }; |
1109 | 1179 | ||
1110 | int emulate_instruction(struct kvm_vcpu *vcpu, | 1180 | int emulate_instruction(struct kvm_vcpu *vcpu, |
@@ -1116,6 +1186,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
1116 | int r; | 1186 | int r; |
1117 | int cs_db, cs_l; | 1187 | int cs_db, cs_l; |
1118 | 1188 | ||
1189 | vcpu->mmio_fault_cr2 = cr2; | ||
1119 | kvm_arch_ops->cache_regs(vcpu); | 1190 | kvm_arch_ops->cache_regs(vcpu); |
1120 | 1191 | ||
1121 | kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 1192 | kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
@@ -1166,8 +1237,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
1166 | kvm_arch_ops->decache_regs(vcpu); | 1237 | kvm_arch_ops->decache_regs(vcpu); |
1167 | kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags); | 1238 | kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags); |
1168 | 1239 | ||
1169 | if (vcpu->mmio_is_write) | 1240 | if (vcpu->mmio_is_write) { |
1241 | vcpu->mmio_needed = 0; | ||
1170 | return EMULATE_DO_MMIO; | 1242 | return EMULATE_DO_MMIO; |
1243 | } | ||
1171 | 1244 | ||
1172 | return EMULATE_DONE; | 1245 | return EMULATE_DONE; |
1173 | } | 1246 | } |
@@ -1177,7 +1250,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
1177 | { | 1250 | { |
1178 | unsigned long nr, a0, a1, a2, a3, a4, a5, ret; | 1251 | unsigned long nr, a0, a1, a2, a3, a4, a5, ret; |
1179 | 1252 | ||
1180 | kvm_arch_ops->decache_regs(vcpu); | 1253 | kvm_arch_ops->cache_regs(vcpu); |
1181 | ret = -KVM_EINVAL; | 1254 | ret = -KVM_EINVAL; |
1182 | #ifdef CONFIG_X86_64 | 1255 | #ifdef CONFIG_X86_64 |
1183 | if (is_long_mode(vcpu)) { | 1256 | if (is_long_mode(vcpu)) { |
@@ -1201,10 +1274,19 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
1201 | } | 1274 | } |
1202 | switch (nr) { | 1275 | switch (nr) { |
1203 | default: | 1276 | default: |
1204 | ; | 1277 | run->hypercall.args[0] = a0; |
1278 | run->hypercall.args[1] = a1; | ||
1279 | run->hypercall.args[2] = a2; | ||
1280 | run->hypercall.args[3] = a3; | ||
1281 | run->hypercall.args[4] = a4; | ||
1282 | run->hypercall.args[5] = a5; | ||
1283 | run->hypercall.ret = ret; | ||
1284 | run->hypercall.longmode = is_long_mode(vcpu); | ||
1285 | kvm_arch_ops->decache_regs(vcpu); | ||
1286 | return 0; | ||
1205 | } | 1287 | } |
1206 | vcpu->regs[VCPU_REGS_RAX] = ret; | 1288 | vcpu->regs[VCPU_REGS_RAX] = ret; |
1207 | kvm_arch_ops->cache_regs(vcpu); | 1289 | kvm_arch_ops->decache_regs(vcpu); |
1208 | return 1; | 1290 | return 1; |
1209 | } | 1291 | } |
1210 | EXPORT_SYMBOL_GPL(kvm_hypercall); | 1292 | EXPORT_SYMBOL_GPL(kvm_hypercall); |
@@ -1237,7 +1319,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | |||
1237 | 1319 | ||
1238 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | 1320 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) |
1239 | { | 1321 | { |
1240 | kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); | 1322 | kvm_arch_ops->decache_cr4_guest_bits(vcpu); |
1241 | switch (cr) { | 1323 | switch (cr) { |
1242 | case 0: | 1324 | case 0: |
1243 | return vcpu->cr0; | 1325 | return vcpu->cr0; |
@@ -1442,6 +1524,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1442 | printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", | 1524 | printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", |
1443 | __FUNCTION__, data); | 1525 | __FUNCTION__, data); |
1444 | break; | 1526 | break; |
1527 | case MSR_IA32_MCG_STATUS: | ||
1528 | printk(KERN_WARNING "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", | ||
1529 | __FUNCTION__, data); | ||
1530 | break; | ||
1445 | case MSR_IA32_UCODE_REV: | 1531 | case MSR_IA32_UCODE_REV: |
1446 | case MSR_IA32_UCODE_WRITE: | 1532 | case MSR_IA32_UCODE_WRITE: |
1447 | case 0x200 ... 0x2ff: /* MTRRs */ | 1533 | case 0x200 ... 0x2ff: /* MTRRs */ |
@@ -1478,6 +1564,8 @@ static int set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1478 | 1564 | ||
1479 | void kvm_resched(struct kvm_vcpu *vcpu) | 1565 | void kvm_resched(struct kvm_vcpu *vcpu) |
1480 | { | 1566 | { |
1567 | if (!need_resched()) | ||
1568 | return; | ||
1481 | vcpu_put(vcpu); | 1569 | vcpu_put(vcpu); |
1482 | cond_resched(); | 1570 | cond_resched(); |
1483 | vcpu_load(vcpu); | 1571 | vcpu_load(vcpu); |
@@ -1502,29 +1590,250 @@ void save_msrs(struct vmx_msr_entry *e, int n) | |||
1502 | } | 1590 | } |
1503 | EXPORT_SYMBOL_GPL(save_msrs); | 1591 | EXPORT_SYMBOL_GPL(save_msrs); |
1504 | 1592 | ||
1593 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | ||
1594 | { | ||
1595 | int i; | ||
1596 | u32 function; | ||
1597 | struct kvm_cpuid_entry *e, *best; | ||
1598 | |||
1599 | kvm_arch_ops->cache_regs(vcpu); | ||
1600 | function = vcpu->regs[VCPU_REGS_RAX]; | ||
1601 | vcpu->regs[VCPU_REGS_RAX] = 0; | ||
1602 | vcpu->regs[VCPU_REGS_RBX] = 0; | ||
1603 | vcpu->regs[VCPU_REGS_RCX] = 0; | ||
1604 | vcpu->regs[VCPU_REGS_RDX] = 0; | ||
1605 | best = NULL; | ||
1606 | for (i = 0; i < vcpu->cpuid_nent; ++i) { | ||
1607 | e = &vcpu->cpuid_entries[i]; | ||
1608 | if (e->function == function) { | ||
1609 | best = e; | ||
1610 | break; | ||
1611 | } | ||
1612 | /* | ||
1613 | * Both basic or both extended? | ||
1614 | */ | ||
1615 | if (((e->function ^ function) & 0x80000000) == 0) | ||
1616 | if (!best || e->function > best->function) | ||
1617 | best = e; | ||
1618 | } | ||
1619 | if (best) { | ||
1620 | vcpu->regs[VCPU_REGS_RAX] = best->eax; | ||
1621 | vcpu->regs[VCPU_REGS_RBX] = best->ebx; | ||
1622 | vcpu->regs[VCPU_REGS_RCX] = best->ecx; | ||
1623 | vcpu->regs[VCPU_REGS_RDX] = best->edx; | ||
1624 | } | ||
1625 | kvm_arch_ops->decache_regs(vcpu); | ||
1626 | kvm_arch_ops->skip_emulated_instruction(vcpu); | ||
1627 | } | ||
1628 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | ||
1629 | |||
1630 | static int pio_copy_data(struct kvm_vcpu *vcpu) | ||
1631 | { | ||
1632 | void *p = vcpu->pio_data; | ||
1633 | void *q; | ||
1634 | unsigned bytes; | ||
1635 | int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; | ||
1636 | |||
1637 | kvm_arch_ops->vcpu_put(vcpu); | ||
1638 | q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, | ||
1639 | PAGE_KERNEL); | ||
1640 | if (!q) { | ||
1641 | kvm_arch_ops->vcpu_load(vcpu); | ||
1642 | free_pio_guest_pages(vcpu); | ||
1643 | return -ENOMEM; | ||
1644 | } | ||
1645 | q += vcpu->pio.guest_page_offset; | ||
1646 | bytes = vcpu->pio.size * vcpu->pio.cur_count; | ||
1647 | if (vcpu->pio.in) | ||
1648 | memcpy(q, p, bytes); | ||
1649 | else | ||
1650 | memcpy(p, q, bytes); | ||
1651 | q -= vcpu->pio.guest_page_offset; | ||
1652 | vunmap(q); | ||
1653 | kvm_arch_ops->vcpu_load(vcpu); | ||
1654 | free_pio_guest_pages(vcpu); | ||
1655 | return 0; | ||
1656 | } | ||
1657 | |||
1658 | static int complete_pio(struct kvm_vcpu *vcpu) | ||
1659 | { | ||
1660 | struct kvm_pio_request *io = &vcpu->pio; | ||
1661 | long delta; | ||
1662 | int r; | ||
1663 | |||
1664 | kvm_arch_ops->cache_regs(vcpu); | ||
1665 | |||
1666 | if (!io->string) { | ||
1667 | if (io->in) | ||
1668 | memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data, | ||
1669 | io->size); | ||
1670 | } else { | ||
1671 | if (io->in) { | ||
1672 | r = pio_copy_data(vcpu); | ||
1673 | if (r) { | ||
1674 | kvm_arch_ops->cache_regs(vcpu); | ||
1675 | return r; | ||
1676 | } | ||
1677 | } | ||
1678 | |||
1679 | delta = 1; | ||
1680 | if (io->rep) { | ||
1681 | delta *= io->cur_count; | ||
1682 | /* | ||
1683 | * The size of the register should really depend on | ||
1684 | * current address size. | ||
1685 | */ | ||
1686 | vcpu->regs[VCPU_REGS_RCX] -= delta; | ||
1687 | } | ||
1688 | if (io->down) | ||
1689 | delta = -delta; | ||
1690 | delta *= io->size; | ||
1691 | if (io->in) | ||
1692 | vcpu->regs[VCPU_REGS_RDI] += delta; | ||
1693 | else | ||
1694 | vcpu->regs[VCPU_REGS_RSI] += delta; | ||
1695 | } | ||
1696 | |||
1697 | kvm_arch_ops->decache_regs(vcpu); | ||
1698 | |||
1699 | io->count -= io->cur_count; | ||
1700 | io->cur_count = 0; | ||
1701 | |||
1702 | if (!io->count) | ||
1703 | kvm_arch_ops->skip_emulated_instruction(vcpu); | ||
1704 | return 0; | ||
1705 | } | ||
1706 | |||
1707 | int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | ||
1708 | int size, unsigned long count, int string, int down, | ||
1709 | gva_t address, int rep, unsigned port) | ||
1710 | { | ||
1711 | unsigned now, in_page; | ||
1712 | int i; | ||
1713 | int nr_pages = 1; | ||
1714 | struct page *page; | ||
1715 | |||
1716 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
1717 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
1718 | vcpu->run->io.size = size; | ||
1719 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
1720 | vcpu->run->io.count = count; | ||
1721 | vcpu->run->io.port = port; | ||
1722 | vcpu->pio.count = count; | ||
1723 | vcpu->pio.cur_count = count; | ||
1724 | vcpu->pio.size = size; | ||
1725 | vcpu->pio.in = in; | ||
1726 | vcpu->pio.string = string; | ||
1727 | vcpu->pio.down = down; | ||
1728 | vcpu->pio.guest_page_offset = offset_in_page(address); | ||
1729 | vcpu->pio.rep = rep; | ||
1730 | |||
1731 | if (!string) { | ||
1732 | kvm_arch_ops->cache_regs(vcpu); | ||
1733 | memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); | ||
1734 | kvm_arch_ops->decache_regs(vcpu); | ||
1735 | return 0; | ||
1736 | } | ||
1737 | |||
1738 | if (!count) { | ||
1739 | kvm_arch_ops->skip_emulated_instruction(vcpu); | ||
1740 | return 1; | ||
1741 | } | ||
1742 | |||
1743 | now = min(count, PAGE_SIZE / size); | ||
1744 | |||
1745 | if (!down) | ||
1746 | in_page = PAGE_SIZE - offset_in_page(address); | ||
1747 | else | ||
1748 | in_page = offset_in_page(address) + size; | ||
1749 | now = min(count, (unsigned long)in_page / size); | ||
1750 | if (!now) { | ||
1751 | /* | ||
1752 | * String I/O straddles page boundary. Pin two guest pages | ||
1753 | * so that we satisfy atomicity constraints. Do just one | ||
1754 | * transaction to avoid complexity. | ||
1755 | */ | ||
1756 | nr_pages = 2; | ||
1757 | now = 1; | ||
1758 | } | ||
1759 | if (down) { | ||
1760 | /* | ||
1761 | * String I/O in reverse. Yuck. Kill the guest, fix later. | ||
1762 | */ | ||
1763 | printk(KERN_ERR "kvm: guest string pio down\n"); | ||
1764 | inject_gp(vcpu); | ||
1765 | return 1; | ||
1766 | } | ||
1767 | vcpu->run->io.count = now; | ||
1768 | vcpu->pio.cur_count = now; | ||
1769 | |||
1770 | for (i = 0; i < nr_pages; ++i) { | ||
1771 | spin_lock(&vcpu->kvm->lock); | ||
1772 | page = gva_to_page(vcpu, address + i * PAGE_SIZE); | ||
1773 | if (page) | ||
1774 | get_page(page); | ||
1775 | vcpu->pio.guest_pages[i] = page; | ||
1776 | spin_unlock(&vcpu->kvm->lock); | ||
1777 | if (!page) { | ||
1778 | inject_gp(vcpu); | ||
1779 | free_pio_guest_pages(vcpu); | ||
1780 | return 1; | ||
1781 | } | ||
1782 | } | ||
1783 | |||
1784 | if (!vcpu->pio.in) | ||
1785 | return pio_copy_data(vcpu); | ||
1786 | return 0; | ||
1787 | } | ||
1788 | EXPORT_SYMBOL_GPL(kvm_setup_pio); | ||
1789 | |||
1505 | static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1790 | static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
1506 | { | 1791 | { |
1507 | int r; | 1792 | int r; |
1793 | sigset_t sigsaved; | ||
1508 | 1794 | ||
1509 | vcpu_load(vcpu); | 1795 | vcpu_load(vcpu); |
1510 | 1796 | ||
1797 | if (vcpu->sigset_active) | ||
1798 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | ||
1799 | |||
1511 | /* re-sync apic's tpr */ | 1800 | /* re-sync apic's tpr */ |
1512 | vcpu->cr8 = kvm_run->cr8; | 1801 | vcpu->cr8 = kvm_run->cr8; |
1513 | 1802 | ||
1514 | if (kvm_run->emulated) { | 1803 | if (vcpu->pio.cur_count) { |
1515 | kvm_arch_ops->skip_emulated_instruction(vcpu); | 1804 | r = complete_pio(vcpu); |
1516 | kvm_run->emulated = 0; | 1805 | if (r) |
1806 | goto out; | ||
1517 | } | 1807 | } |
1518 | 1808 | ||
1519 | if (kvm_run->mmio_completed) { | 1809 | if (vcpu->mmio_needed) { |
1520 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | 1810 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); |
1521 | vcpu->mmio_read_completed = 1; | 1811 | vcpu->mmio_read_completed = 1; |
1812 | vcpu->mmio_needed = 0; | ||
1813 | r = emulate_instruction(vcpu, kvm_run, | ||
1814 | vcpu->mmio_fault_cr2, 0); | ||
1815 | if (r == EMULATE_DO_MMIO) { | ||
1816 | /* | ||
1817 | * Read-modify-write. Back to userspace. | ||
1818 | */ | ||
1819 | kvm_run->exit_reason = KVM_EXIT_MMIO; | ||
1820 | r = 0; | ||
1821 | goto out; | ||
1822 | } | ||
1522 | } | 1823 | } |
1523 | 1824 | ||
1524 | vcpu->mmio_needed = 0; | 1825 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { |
1826 | kvm_arch_ops->cache_regs(vcpu); | ||
1827 | vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; | ||
1828 | kvm_arch_ops->decache_regs(vcpu); | ||
1829 | } | ||
1525 | 1830 | ||
1526 | r = kvm_arch_ops->run(vcpu, kvm_run); | 1831 | r = kvm_arch_ops->run(vcpu, kvm_run); |
1527 | 1832 | ||
1833 | out: | ||
1834 | if (vcpu->sigset_active) | ||
1835 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | ||
1836 | |||
1528 | vcpu_put(vcpu); | 1837 | vcpu_put(vcpu); |
1529 | return r; | 1838 | return r; |
1530 | } | 1839 | } |
@@ -1633,7 +1942,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
1633 | sregs->gdt.limit = dt.limit; | 1942 | sregs->gdt.limit = dt.limit; |
1634 | sregs->gdt.base = dt.base; | 1943 | sregs->gdt.base = dt.base; |
1635 | 1944 | ||
1636 | kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); | 1945 | kvm_arch_ops->decache_cr4_guest_bits(vcpu); |
1637 | sregs->cr0 = vcpu->cr0; | 1946 | sregs->cr0 = vcpu->cr0; |
1638 | sregs->cr2 = vcpu->cr2; | 1947 | sregs->cr2 = vcpu->cr2; |
1639 | sregs->cr3 = vcpu->cr3; | 1948 | sregs->cr3 = vcpu->cr3; |
@@ -1665,16 +1974,6 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
1665 | 1974 | ||
1666 | vcpu_load(vcpu); | 1975 | vcpu_load(vcpu); |
1667 | 1976 | ||
1668 | set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | ||
1669 | set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); | ||
1670 | set_segment(vcpu, &sregs->es, VCPU_SREG_ES); | ||
1671 | set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); | ||
1672 | set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); | ||
1673 | set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); | ||
1674 | |||
1675 | set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); | ||
1676 | set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | ||
1677 | |||
1678 | dt.limit = sregs->idt.limit; | 1977 | dt.limit = sregs->idt.limit; |
1679 | dt.base = sregs->idt.base; | 1978 | dt.base = sregs->idt.base; |
1680 | kvm_arch_ops->set_idt(vcpu, &dt); | 1979 | kvm_arch_ops->set_idt(vcpu, &dt); |
@@ -1694,10 +1993,10 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
1694 | #endif | 1993 | #endif |
1695 | vcpu->apic_base = sregs->apic_base; | 1994 | vcpu->apic_base = sregs->apic_base; |
1696 | 1995 | ||
1697 | kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); | 1996 | kvm_arch_ops->decache_cr4_guest_bits(vcpu); |
1698 | 1997 | ||
1699 | mmu_reset_needed |= vcpu->cr0 != sregs->cr0; | 1998 | mmu_reset_needed |= vcpu->cr0 != sregs->cr0; |
1700 | kvm_arch_ops->set_cr0_no_modeswitch(vcpu, sregs->cr0); | 1999 | kvm_arch_ops->set_cr0(vcpu, sregs->cr0); |
1701 | 2000 | ||
1702 | mmu_reset_needed |= vcpu->cr4 != sregs->cr4; | 2001 | mmu_reset_needed |= vcpu->cr4 != sregs->cr4; |
1703 | kvm_arch_ops->set_cr4(vcpu, sregs->cr4); | 2002 | kvm_arch_ops->set_cr4(vcpu, sregs->cr4); |
@@ -1714,6 +2013,16 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
1714 | if (vcpu->irq_pending[i]) | 2013 | if (vcpu->irq_pending[i]) |
1715 | __set_bit(i, &vcpu->irq_summary); | 2014 | __set_bit(i, &vcpu->irq_summary); |
1716 | 2015 | ||
2016 | set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | ||
2017 | set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); | ||
2018 | set_segment(vcpu, &sregs->es, VCPU_SREG_ES); | ||
2019 | set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); | ||
2020 | set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); | ||
2021 | set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); | ||
2022 | |||
2023 | set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); | ||
2024 | set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | ||
2025 | |||
1717 | vcpu_put(vcpu); | 2026 | vcpu_put(vcpu); |
1718 | 2027 | ||
1719 | return 0; | 2028 | return 0; |
@@ -1887,6 +2196,36 @@ static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, | |||
1887 | return r; | 2196 | return r; |
1888 | } | 2197 | } |
1889 | 2198 | ||
2199 | static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, | ||
2200 | unsigned long address, | ||
2201 | int *type) | ||
2202 | { | ||
2203 | struct kvm_vcpu *vcpu = vma->vm_file->private_data; | ||
2204 | unsigned long pgoff; | ||
2205 | struct page *page; | ||
2206 | |||
2207 | *type = VM_FAULT_MINOR; | ||
2208 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | ||
2209 | if (pgoff == 0) | ||
2210 | page = virt_to_page(vcpu->run); | ||
2211 | else if (pgoff == KVM_PIO_PAGE_OFFSET) | ||
2212 | page = virt_to_page(vcpu->pio_data); | ||
2213 | else | ||
2214 | return NOPAGE_SIGBUS; | ||
2215 | get_page(page); | ||
2216 | return page; | ||
2217 | } | ||
2218 | |||
2219 | static struct vm_operations_struct kvm_vcpu_vm_ops = { | ||
2220 | .nopage = kvm_vcpu_nopage, | ||
2221 | }; | ||
2222 | |||
2223 | static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) | ||
2224 | { | ||
2225 | vma->vm_ops = &kvm_vcpu_vm_ops; | ||
2226 | return 0; | ||
2227 | } | ||
2228 | |||
1890 | static int kvm_vcpu_release(struct inode *inode, struct file *filp) | 2229 | static int kvm_vcpu_release(struct inode *inode, struct file *filp) |
1891 | { | 2230 | { |
1892 | struct kvm_vcpu *vcpu = filp->private_data; | 2231 | struct kvm_vcpu *vcpu = filp->private_data; |
@@ -1899,6 +2238,7 @@ static struct file_operations kvm_vcpu_fops = { | |||
1899 | .release = kvm_vcpu_release, | 2238 | .release = kvm_vcpu_release, |
1900 | .unlocked_ioctl = kvm_vcpu_ioctl, | 2239 | .unlocked_ioctl = kvm_vcpu_ioctl, |
1901 | .compat_ioctl = kvm_vcpu_ioctl, | 2240 | .compat_ioctl = kvm_vcpu_ioctl, |
2241 | .mmap = kvm_vcpu_mmap, | ||
1902 | }; | 2242 | }; |
1903 | 2243 | ||
1904 | /* | 2244 | /* |
@@ -1947,6 +2287,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
1947 | { | 2287 | { |
1948 | int r; | 2288 | int r; |
1949 | struct kvm_vcpu *vcpu; | 2289 | struct kvm_vcpu *vcpu; |
2290 | struct page *page; | ||
1950 | 2291 | ||
1951 | r = -EINVAL; | 2292 | r = -EINVAL; |
1952 | if (!valid_vcpu(n)) | 2293 | if (!valid_vcpu(n)) |
@@ -1961,9 +2302,22 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
1961 | return -EEXIST; | 2302 | return -EEXIST; |
1962 | } | 2303 | } |
1963 | 2304 | ||
2305 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2306 | r = -ENOMEM; | ||
2307 | if (!page) | ||
2308 | goto out_unlock; | ||
2309 | vcpu->run = page_address(page); | ||
2310 | |||
2311 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2312 | r = -ENOMEM; | ||
2313 | if (!page) | ||
2314 | goto out_free_run; | ||
2315 | vcpu->pio_data = page_address(page); | ||
2316 | |||
1964 | vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, | 2317 | vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, |
1965 | FX_IMAGE_ALIGN); | 2318 | FX_IMAGE_ALIGN); |
1966 | vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; | 2319 | vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; |
2320 | vcpu->cr0 = 0x10; | ||
1967 | 2321 | ||
1968 | r = kvm_arch_ops->vcpu_create(vcpu); | 2322 | r = kvm_arch_ops->vcpu_create(vcpu); |
1969 | if (r < 0) | 2323 | if (r < 0) |
@@ -1990,11 +2344,107 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
1990 | 2344 | ||
1991 | out_free_vcpus: | 2345 | out_free_vcpus: |
1992 | kvm_free_vcpu(vcpu); | 2346 | kvm_free_vcpu(vcpu); |
2347 | out_free_run: | ||
2348 | free_page((unsigned long)vcpu->run); | ||
2349 | vcpu->run = NULL; | ||
2350 | out_unlock: | ||
1993 | mutex_unlock(&vcpu->mutex); | 2351 | mutex_unlock(&vcpu->mutex); |
1994 | out: | 2352 | out: |
1995 | return r; | 2353 | return r; |
1996 | } | 2354 | } |
1997 | 2355 | ||
2356 | static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | ||
2357 | struct kvm_cpuid *cpuid, | ||
2358 | struct kvm_cpuid_entry __user *entries) | ||
2359 | { | ||
2360 | int r; | ||
2361 | |||
2362 | r = -E2BIG; | ||
2363 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
2364 | goto out; | ||
2365 | r = -EFAULT; | ||
2366 | if (copy_from_user(&vcpu->cpuid_entries, entries, | ||
2367 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | ||
2368 | goto out; | ||
2369 | vcpu->cpuid_nent = cpuid->nent; | ||
2370 | return 0; | ||
2371 | |||
2372 | out: | ||
2373 | return r; | ||
2374 | } | ||
2375 | |||
2376 | static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) | ||
2377 | { | ||
2378 | if (sigset) { | ||
2379 | sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); | ||
2380 | vcpu->sigset_active = 1; | ||
2381 | vcpu->sigset = *sigset; | ||
2382 | } else | ||
2383 | vcpu->sigset_active = 0; | ||
2384 | return 0; | ||
2385 | } | ||
2386 | |||
2387 | /* | ||
2388 | * fxsave fpu state. Taken from x86_64/processor.h. To be killed when | ||
2389 | * we have asm/x86/processor.h | ||
2390 | */ | ||
2391 | struct fxsave { | ||
2392 | u16 cwd; | ||
2393 | u16 swd; | ||
2394 | u16 twd; | ||
2395 | u16 fop; | ||
2396 | u64 rip; | ||
2397 | u64 rdp; | ||
2398 | u32 mxcsr; | ||
2399 | u32 mxcsr_mask; | ||
2400 | u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ | ||
2401 | #ifdef CONFIG_X86_64 | ||
2402 | u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ | ||
2403 | #else | ||
2404 | u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ | ||
2405 | #endif | ||
2406 | }; | ||
2407 | |||
2408 | static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | ||
2409 | { | ||
2410 | struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; | ||
2411 | |||
2412 | vcpu_load(vcpu); | ||
2413 | |||
2414 | memcpy(fpu->fpr, fxsave->st_space, 128); | ||
2415 | fpu->fcw = fxsave->cwd; | ||
2416 | fpu->fsw = fxsave->swd; | ||
2417 | fpu->ftwx = fxsave->twd; | ||
2418 | fpu->last_opcode = fxsave->fop; | ||
2419 | fpu->last_ip = fxsave->rip; | ||
2420 | fpu->last_dp = fxsave->rdp; | ||
2421 | memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); | ||
2422 | |||
2423 | vcpu_put(vcpu); | ||
2424 | |||
2425 | return 0; | ||
2426 | } | ||
2427 | |||
2428 | static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | ||
2429 | { | ||
2430 | struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; | ||
2431 | |||
2432 | vcpu_load(vcpu); | ||
2433 | |||
2434 | memcpy(fxsave->st_space, fpu->fpr, 128); | ||
2435 | fxsave->cwd = fpu->fcw; | ||
2436 | fxsave->swd = fpu->fsw; | ||
2437 | fxsave->twd = fpu->ftwx; | ||
2438 | fxsave->fop = fpu->last_opcode; | ||
2439 | fxsave->rip = fpu->last_ip; | ||
2440 | fxsave->rdp = fpu->last_dp; | ||
2441 | memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); | ||
2442 | |||
2443 | vcpu_put(vcpu); | ||
2444 | |||
2445 | return 0; | ||
2446 | } | ||
2447 | |||
1998 | static long kvm_vcpu_ioctl(struct file *filp, | 2448 | static long kvm_vcpu_ioctl(struct file *filp, |
1999 | unsigned int ioctl, unsigned long arg) | 2449 | unsigned int ioctl, unsigned long arg) |
2000 | { | 2450 | { |
@@ -2003,21 +2453,12 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
2003 | int r = -EINVAL; | 2453 | int r = -EINVAL; |
2004 | 2454 | ||
2005 | switch (ioctl) { | 2455 | switch (ioctl) { |
2006 | case KVM_RUN: { | 2456 | case KVM_RUN: |
2007 | struct kvm_run kvm_run; | 2457 | r = -EINVAL; |
2008 | 2458 | if (arg) | |
2009 | r = -EFAULT; | ||
2010 | if (copy_from_user(&kvm_run, argp, sizeof kvm_run)) | ||
2011 | goto out; | 2459 | goto out; |
2012 | r = kvm_vcpu_ioctl_run(vcpu, &kvm_run); | 2460 | r = kvm_vcpu_ioctl_run(vcpu, vcpu->run); |
2013 | if (r < 0 && r != -EINTR) | ||
2014 | goto out; | ||
2015 | if (copy_to_user(argp, &kvm_run, sizeof kvm_run)) { | ||
2016 | r = -EFAULT; | ||
2017 | goto out; | ||
2018 | } | ||
2019 | break; | 2461 | break; |
2020 | } | ||
2021 | case KVM_GET_REGS: { | 2462 | case KVM_GET_REGS: { |
2022 | struct kvm_regs kvm_regs; | 2463 | struct kvm_regs kvm_regs; |
2023 | 2464 | ||
@@ -2113,6 +2554,66 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
2113 | case KVM_SET_MSRS: | 2554 | case KVM_SET_MSRS: |
2114 | r = msr_io(vcpu, argp, do_set_msr, 0); | 2555 | r = msr_io(vcpu, argp, do_set_msr, 0); |
2115 | break; | 2556 | break; |
2557 | case KVM_SET_CPUID: { | ||
2558 | struct kvm_cpuid __user *cpuid_arg = argp; | ||
2559 | struct kvm_cpuid cpuid; | ||
2560 | |||
2561 | r = -EFAULT; | ||
2562 | if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) | ||
2563 | goto out; | ||
2564 | r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); | ||
2565 | if (r) | ||
2566 | goto out; | ||
2567 | break; | ||
2568 | } | ||
2569 | case KVM_SET_SIGNAL_MASK: { | ||
2570 | struct kvm_signal_mask __user *sigmask_arg = argp; | ||
2571 | struct kvm_signal_mask kvm_sigmask; | ||
2572 | sigset_t sigset, *p; | ||
2573 | |||
2574 | p = NULL; | ||
2575 | if (argp) { | ||
2576 | r = -EFAULT; | ||
2577 | if (copy_from_user(&kvm_sigmask, argp, | ||
2578 | sizeof kvm_sigmask)) | ||
2579 | goto out; | ||
2580 | r = -EINVAL; | ||
2581 | if (kvm_sigmask.len != sizeof sigset) | ||
2582 | goto out; | ||
2583 | r = -EFAULT; | ||
2584 | if (copy_from_user(&sigset, sigmask_arg->sigset, | ||
2585 | sizeof sigset)) | ||
2586 | goto out; | ||
2587 | p = &sigset; | ||
2588 | } | ||
2589 | r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); | ||
2590 | break; | ||
2591 | } | ||
2592 | case KVM_GET_FPU: { | ||
2593 | struct kvm_fpu fpu; | ||
2594 | |||
2595 | memset(&fpu, 0, sizeof fpu); | ||
2596 | r = kvm_vcpu_ioctl_get_fpu(vcpu, &fpu); | ||
2597 | if (r) | ||
2598 | goto out; | ||
2599 | r = -EFAULT; | ||
2600 | if (copy_to_user(argp, &fpu, sizeof fpu)) | ||
2601 | goto out; | ||
2602 | r = 0; | ||
2603 | break; | ||
2604 | } | ||
2605 | case KVM_SET_FPU: { | ||
2606 | struct kvm_fpu fpu; | ||
2607 | |||
2608 | r = -EFAULT; | ||
2609 | if (copy_from_user(&fpu, argp, sizeof fpu)) | ||
2610 | goto out; | ||
2611 | r = kvm_vcpu_ioctl_set_fpu(vcpu, &fpu); | ||
2612 | if (r) | ||
2613 | goto out; | ||
2614 | r = 0; | ||
2615 | break; | ||
2616 | } | ||
2116 | default: | 2617 | default: |
2117 | ; | 2618 | ; |
2118 | } | 2619 | } |
@@ -2155,6 +2656,17 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2155 | goto out; | 2656 | goto out; |
2156 | break; | 2657 | break; |
2157 | } | 2658 | } |
2659 | case KVM_SET_MEMORY_ALIAS: { | ||
2660 | struct kvm_memory_alias alias; | ||
2661 | |||
2662 | r = -EFAULT; | ||
2663 | if (copy_from_user(&alias, argp, sizeof alias)) | ||
2664 | goto out; | ||
2665 | r = kvm_vm_ioctl_set_memory_alias(kvm, &alias); | ||
2666 | if (r) | ||
2667 | goto out; | ||
2668 | break; | ||
2669 | } | ||
2158 | default: | 2670 | default: |
2159 | ; | 2671 | ; |
2160 | } | 2672 | } |
@@ -2168,15 +2680,11 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma, | |||
2168 | { | 2680 | { |
2169 | struct kvm *kvm = vma->vm_file->private_data; | 2681 | struct kvm *kvm = vma->vm_file->private_data; |
2170 | unsigned long pgoff; | 2682 | unsigned long pgoff; |
2171 | struct kvm_memory_slot *slot; | ||
2172 | struct page *page; | 2683 | struct page *page; |
2173 | 2684 | ||
2174 | *type = VM_FAULT_MINOR; | 2685 | *type = VM_FAULT_MINOR; |
2175 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 2686 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; |
2176 | slot = gfn_to_memslot(kvm, pgoff); | 2687 | page = gfn_to_page(kvm, pgoff); |
2177 | if (!slot) | ||
2178 | return NOPAGE_SIGBUS; | ||
2179 | page = gfn_to_page(slot, pgoff); | ||
2180 | if (!page) | 2688 | if (!page) |
2181 | return NOPAGE_SIGBUS; | 2689 | return NOPAGE_SIGBUS; |
2182 | get_page(page); | 2690 | get_page(page); |
@@ -2248,13 +2756,19 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2248 | unsigned int ioctl, unsigned long arg) | 2756 | unsigned int ioctl, unsigned long arg) |
2249 | { | 2757 | { |
2250 | void __user *argp = (void __user *)arg; | 2758 | void __user *argp = (void __user *)arg; |
2251 | int r = -EINVAL; | 2759 | long r = -EINVAL; |
2252 | 2760 | ||
2253 | switch (ioctl) { | 2761 | switch (ioctl) { |
2254 | case KVM_GET_API_VERSION: | 2762 | case KVM_GET_API_VERSION: |
2763 | r = -EINVAL; | ||
2764 | if (arg) | ||
2765 | goto out; | ||
2255 | r = KVM_API_VERSION; | 2766 | r = KVM_API_VERSION; |
2256 | break; | 2767 | break; |
2257 | case KVM_CREATE_VM: | 2768 | case KVM_CREATE_VM: |
2769 | r = -EINVAL; | ||
2770 | if (arg) | ||
2771 | goto out; | ||
2258 | r = kvm_dev_ioctl_create_vm(); | 2772 | r = kvm_dev_ioctl_create_vm(); |
2259 | break; | 2773 | break; |
2260 | case KVM_GET_MSR_INDEX_LIST: { | 2774 | case KVM_GET_MSR_INDEX_LIST: { |
@@ -2284,6 +2798,18 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2284 | r = 0; | 2798 | r = 0; |
2285 | break; | 2799 | break; |
2286 | } | 2800 | } |
2801 | case KVM_CHECK_EXTENSION: | ||
2802 | /* | ||
2803 | * No extensions defined at present. | ||
2804 | */ | ||
2805 | r = 0; | ||
2806 | break; | ||
2807 | case KVM_GET_VCPU_MMAP_SIZE: | ||
2808 | r = -EINVAL; | ||
2809 | if (arg) | ||
2810 | goto out; | ||
2811 | r = 2 * PAGE_SIZE; | ||
2812 | break; | ||
2287 | default: | 2813 | default: |
2288 | ; | 2814 | ; |
2289 | } | 2815 | } |
@@ -2299,7 +2825,7 @@ static struct file_operations kvm_chardev_ops = { | |||
2299 | }; | 2825 | }; |
2300 | 2826 | ||
2301 | static struct miscdevice kvm_dev = { | 2827 | static struct miscdevice kvm_dev = { |
2302 | MISC_DYNAMIC_MINOR, | 2828 | KVM_MINOR, |
2303 | "kvm", | 2829 | "kvm", |
2304 | &kvm_chardev_ops, | 2830 | &kvm_chardev_ops, |
2305 | }; | 2831 | }; |
@@ -2385,14 +2911,39 @@ static struct notifier_block kvm_cpu_notifier = { | |||
2385 | .priority = 20, /* must be > scheduler priority */ | 2911 | .priority = 20, /* must be > scheduler priority */ |
2386 | }; | 2912 | }; |
2387 | 2913 | ||
2914 | static u64 stat_get(void *_offset) | ||
2915 | { | ||
2916 | unsigned offset = (long)_offset; | ||
2917 | u64 total = 0; | ||
2918 | struct kvm *kvm; | ||
2919 | struct kvm_vcpu *vcpu; | ||
2920 | int i; | ||
2921 | |||
2922 | spin_lock(&kvm_lock); | ||
2923 | list_for_each_entry(kvm, &vm_list, vm_list) | ||
2924 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
2925 | vcpu = &kvm->vcpus[i]; | ||
2926 | total += *(u32 *)((void *)vcpu + offset); | ||
2927 | } | ||
2928 | spin_unlock(&kvm_lock); | ||
2929 | return total; | ||
2930 | } | ||
2931 | |||
2932 | static void stat_set(void *offset, u64 val) | ||
2933 | { | ||
2934 | } | ||
2935 | |||
2936 | DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, stat_set, "%llu\n"); | ||
2937 | |||
2388 | static __init void kvm_init_debug(void) | 2938 | static __init void kvm_init_debug(void) |
2389 | { | 2939 | { |
2390 | struct kvm_stats_debugfs_item *p; | 2940 | struct kvm_stats_debugfs_item *p; |
2391 | 2941 | ||
2392 | debugfs_dir = debugfs_create_dir("kvm", NULL); | 2942 | debugfs_dir = debugfs_create_dir("kvm", NULL); |
2393 | for (p = debugfs_entries; p->name; ++p) | 2943 | for (p = debugfs_entries; p->name; ++p) |
2394 | p->dentry = debugfs_create_u32(p->name, 0444, debugfs_dir, | 2944 | p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir, |
2395 | p->data); | 2945 | (void *)(long)p->offset, |
2946 | &stat_fops); | ||
2396 | } | 2947 | } |
2397 | 2948 | ||
2398 | static void kvm_exit_debug(void) | 2949 | static void kvm_exit_debug(void) |
@@ -2522,6 +3073,10 @@ static __init int kvm_init(void) | |||
2522 | static struct page *bad_page; | 3073 | static struct page *bad_page; |
2523 | int r; | 3074 | int r; |
2524 | 3075 | ||
3076 | r = kvm_mmu_module_init(); | ||
3077 | if (r) | ||
3078 | goto out4; | ||
3079 | |||
2525 | r = register_filesystem(&kvm_fs_type); | 3080 | r = register_filesystem(&kvm_fs_type); |
2526 | if (r) | 3081 | if (r) |
2527 | goto out3; | 3082 | goto out3; |
@@ -2550,6 +3105,8 @@ out: | |||
2550 | out2: | 3105 | out2: |
2551 | unregister_filesystem(&kvm_fs_type); | 3106 | unregister_filesystem(&kvm_fs_type); |
2552 | out3: | 3107 | out3: |
3108 | kvm_mmu_module_exit(); | ||
3109 | out4: | ||
2553 | return r; | 3110 | return r; |
2554 | } | 3111 | } |
2555 | 3112 | ||
@@ -2559,6 +3116,7 @@ static __exit void kvm_exit(void) | |||
2559 | __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT)); | 3116 | __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT)); |
2560 | mntput(kvmfs_mnt); | 3117 | mntput(kvmfs_mnt); |
2561 | unregister_filesystem(&kvm_fs_type); | 3118 | unregister_filesystem(&kvm_fs_type); |
3119 | kvm_mmu_module_exit(); | ||
2562 | } | 3120 | } |
2563 | 3121 | ||
2564 | module_init(kvm_init) | 3122 | module_init(kvm_init) |