aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/kvm/kvm_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/kvm/kvm_main.c')
-rw-r--r--drivers/kvm/kvm_main.c456
1 files changed, 278 insertions, 178 deletions
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 8f1f07adb04e..1b206f197c6b 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -16,34 +16,33 @@
16 */ 16 */
17 17
18#include "kvm.h" 18#include "kvm.h"
19#include "x86_emulate.h"
20#include "segment_descriptor.h"
19 21
20#include <linux/kvm.h> 22#include <linux/kvm.h>
21#include <linux/module.h> 23#include <linux/module.h>
22#include <linux/errno.h> 24#include <linux/errno.h>
23#include <linux/magic.h>
24#include <asm/processor.h>
25#include <linux/percpu.h> 25#include <linux/percpu.h>
26#include <linux/gfp.h> 26#include <linux/gfp.h>
27#include <asm/msr.h>
28#include <linux/mm.h> 27#include <linux/mm.h>
29#include <linux/miscdevice.h> 28#include <linux/miscdevice.h>
30#include <linux/vmalloc.h> 29#include <linux/vmalloc.h>
31#include <asm/uaccess.h>
32#include <linux/reboot.h> 30#include <linux/reboot.h>
33#include <asm/io.h>
34#include <linux/debugfs.h> 31#include <linux/debugfs.h>
35#include <linux/highmem.h> 32#include <linux/highmem.h>
36#include <linux/file.h> 33#include <linux/file.h>
37#include <asm/desc.h>
38#include <linux/sysdev.h> 34#include <linux/sysdev.h>
39#include <linux/cpu.h> 35#include <linux/cpu.h>
40#include <linux/file.h>
41#include <linux/fs.h>
42#include <linux/mount.h>
43#include <linux/sched.h> 36#include <linux/sched.h>
37#include <linux/cpumask.h>
38#include <linux/smp.h>
39#include <linux/anon_inodes.h>
44 40
45#include "x86_emulate.h" 41#include <asm/processor.h>
46#include "segment_descriptor.h" 42#include <asm/msr.h>
43#include <asm/io.h>
44#include <asm/uaccess.h>
45#include <asm/desc.h>
47 46
48MODULE_AUTHOR("Qumranet"); 47MODULE_AUTHOR("Qumranet");
49MODULE_LICENSE("GPL"); 48MODULE_LICENSE("GPL");
@@ -51,8 +50,12 @@ MODULE_LICENSE("GPL");
51static DEFINE_SPINLOCK(kvm_lock); 50static DEFINE_SPINLOCK(kvm_lock);
52static LIST_HEAD(vm_list); 51static LIST_HEAD(vm_list);
53 52
53static cpumask_t cpus_hardware_enabled;
54
54struct kvm_arch_ops *kvm_arch_ops; 55struct kvm_arch_ops *kvm_arch_ops;
55 56
57static void hardware_disable(void *ignored);
58
56#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) 59#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
57 60
58static struct kvm_stats_debugfs_item { 61static struct kvm_stats_debugfs_item {
@@ -72,13 +75,13 @@ static struct kvm_stats_debugfs_item {
72 { "halt_exits", STAT_OFFSET(halt_exits) }, 75 { "halt_exits", STAT_OFFSET(halt_exits) },
73 { "request_irq", STAT_OFFSET(request_irq_exits) }, 76 { "request_irq", STAT_OFFSET(request_irq_exits) },
74 { "irq_exits", STAT_OFFSET(irq_exits) }, 77 { "irq_exits", STAT_OFFSET(irq_exits) },
78 { "light_exits", STAT_OFFSET(light_exits) },
79 { "efer_reload", STAT_OFFSET(efer_reload) },
75 { NULL } 80 { NULL }
76}; 81};
77 82
78static struct dentry *debugfs_dir; 83static struct dentry *debugfs_dir;
79 84
80struct vfsmount *kvmfs_mnt;
81
82#define MAX_IO_MSRS 256 85#define MAX_IO_MSRS 256
83 86
84#define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL 87#define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL
@@ -100,55 +103,6 @@ struct segment_descriptor_64 {
100static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 103static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
101 unsigned long arg); 104 unsigned long arg);
102 105
103static struct inode *kvmfs_inode(struct file_operations *fops)
104{
105 int error = -ENOMEM;
106 struct inode *inode = new_inode(kvmfs_mnt->mnt_sb);
107
108 if (!inode)
109 goto eexit_1;
110
111 inode->i_fop = fops;
112
113 /*
114 * Mark the inode dirty from the very beginning,
115 * that way it will never be moved to the dirty
116 * list because mark_inode_dirty() will think
117 * that it already _is_ on the dirty list.
118 */
119 inode->i_state = I_DIRTY;
120 inode->i_mode = S_IRUSR | S_IWUSR;
121 inode->i_uid = current->fsuid;
122 inode->i_gid = current->fsgid;
123 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
124 return inode;
125
126eexit_1:
127 return ERR_PTR(error);
128}
129
130static struct file *kvmfs_file(struct inode *inode, void *private_data)
131{
132 struct file *file = get_empty_filp();
133
134 if (!file)
135 return ERR_PTR(-ENFILE);
136
137 file->f_path.mnt = mntget(kvmfs_mnt);
138 file->f_path.dentry = d_alloc_anon(inode);
139 if (!file->f_path.dentry)
140 return ERR_PTR(-ENOMEM);
141 file->f_mapping = inode->i_mapping;
142
143 file->f_pos = 0;
144 file->f_flags = O_RDWR;
145 file->f_op = inode->i_fop;
146 file->f_mode = FMODE_READ | FMODE_WRITE;
147 file->f_version = 0;
148 file->private_data = private_data;
149 return file;
150}
151
152unsigned long segment_base(u16 selector) 106unsigned long segment_base(u16 selector)
153{ 107{
154 struct descriptor_table gdt; 108 struct descriptor_table gdt;
@@ -307,6 +261,48 @@ static void vcpu_put(struct kvm_vcpu *vcpu)
307 mutex_unlock(&vcpu->mutex); 261 mutex_unlock(&vcpu->mutex);
308} 262}
309 263
264static void ack_flush(void *_completed)
265{
266 atomic_t *completed = _completed;
267
268 atomic_inc(completed);
269}
270
271void kvm_flush_remote_tlbs(struct kvm *kvm)
272{
273 int i, cpu, needed;
274 cpumask_t cpus;
275 struct kvm_vcpu *vcpu;
276 atomic_t completed;
277
278 atomic_set(&completed, 0);
279 cpus_clear(cpus);
280 needed = 0;
281 for (i = 0; i < kvm->nvcpus; ++i) {
282 vcpu = &kvm->vcpus[i];
283 if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
284 continue;
285 cpu = vcpu->cpu;
286 if (cpu != -1 && cpu != raw_smp_processor_id())
287 if (!cpu_isset(cpu, cpus)) {
288 cpu_set(cpu, cpus);
289 ++needed;
290 }
291 }
292
293 /*
294 * We really want smp_call_function_mask() here. But that's not
295 * available, so ipi all cpus in parallel and wait for them
296 * to complete.
297 */
298 for (cpu = first_cpu(cpus); cpu != NR_CPUS; cpu = next_cpu(cpu, cpus))
299 smp_call_function_single(cpu, ack_flush, &completed, 1, 0);
300 while (atomic_read(&completed) != needed) {
301 cpu_relax();
302 barrier();
303 }
304}
305
310static struct kvm *kvm_create_vm(void) 306static struct kvm *kvm_create_vm(void)
311{ 307{
312 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); 308 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
@@ -315,8 +311,13 @@ static struct kvm *kvm_create_vm(void)
315 if (!kvm) 311 if (!kvm)
316 return ERR_PTR(-ENOMEM); 312 return ERR_PTR(-ENOMEM);
317 313
314 kvm_io_bus_init(&kvm->pio_bus);
318 spin_lock_init(&kvm->lock); 315 spin_lock_init(&kvm->lock);
319 INIT_LIST_HEAD(&kvm->active_mmu_pages); 316 INIT_LIST_HEAD(&kvm->active_mmu_pages);
317 spin_lock(&kvm_lock);
318 list_add(&kvm->vm_list, &vm_list);
319 spin_unlock(&kvm_lock);
320 kvm_io_bus_init(&kvm->mmio_bus);
320 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 321 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
321 struct kvm_vcpu *vcpu = &kvm->vcpus[i]; 322 struct kvm_vcpu *vcpu = &kvm->vcpus[i];
322 323
@@ -324,10 +325,6 @@ static struct kvm *kvm_create_vm(void)
324 vcpu->cpu = -1; 325 vcpu->cpu = -1;
325 vcpu->kvm = kvm; 326 vcpu->kvm = kvm;
326 vcpu->mmu.root_hpa = INVALID_PAGE; 327 vcpu->mmu.root_hpa = INVALID_PAGE;
327 INIT_LIST_HEAD(&vcpu->free_pages);
328 spin_lock(&kvm_lock);
329 list_add(&kvm->vm_list, &vm_list);
330 spin_unlock(&kvm_lock);
331 } 328 }
332 return kvm; 329 return kvm;
333} 330}
@@ -380,6 +377,16 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
380 } 377 }
381} 378}
382 379
380static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
381{
382 if (!vcpu->vmcs)
383 return;
384
385 vcpu_load(vcpu);
386 kvm_mmu_unload(vcpu);
387 vcpu_put(vcpu);
388}
389
383static void kvm_free_vcpu(struct kvm_vcpu *vcpu) 390static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
384{ 391{
385 if (!vcpu->vmcs) 392 if (!vcpu->vmcs)
@@ -400,6 +407,11 @@ static void kvm_free_vcpus(struct kvm *kvm)
400{ 407{
401 unsigned int i; 408 unsigned int i;
402 409
410 /*
411 * Unpin any mmu pages first.
412 */
413 for (i = 0; i < KVM_MAX_VCPUS; ++i)
414 kvm_unload_vcpu_mmu(&kvm->vcpus[i]);
403 for (i = 0; i < KVM_MAX_VCPUS; ++i) 415 for (i = 0; i < KVM_MAX_VCPUS; ++i)
404 kvm_free_vcpu(&kvm->vcpus[i]); 416 kvm_free_vcpu(&kvm->vcpus[i]);
405} 417}
@@ -414,6 +426,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
414 spin_lock(&kvm_lock); 426 spin_lock(&kvm_lock);
415 list_del(&kvm->vm_list); 427 list_del(&kvm->vm_list);
416 spin_unlock(&kvm_lock); 428 spin_unlock(&kvm_lock);
429 kvm_io_bus_destroy(&kvm->pio_bus);
430 kvm_io_bus_destroy(&kvm->mmio_bus);
417 kvm_free_vcpus(kvm); 431 kvm_free_vcpus(kvm);
418 kvm_free_physmem(kvm); 432 kvm_free_physmem(kvm);
419 kfree(kvm); 433 kfree(kvm);
@@ -969,7 +983,7 @@ EXPORT_SYMBOL_GPL(gfn_to_page);
969void mark_page_dirty(struct kvm *kvm, gfn_t gfn) 983void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
970{ 984{
971 int i; 985 int i;
972 struct kvm_memory_slot *memslot = NULL; 986 struct kvm_memory_slot *memslot;
973 unsigned long rel_gfn; 987 unsigned long rel_gfn;
974 988
975 for (i = 0; i < kvm->nmemslots; ++i) { 989 for (i = 0; i < kvm->nmemslots; ++i) {
@@ -978,7 +992,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
978 if (gfn >= memslot->base_gfn 992 if (gfn >= memslot->base_gfn
979 && gfn < memslot->base_gfn + memslot->npages) { 993 && gfn < memslot->base_gfn + memslot->npages) {
980 994
981 if (!memslot || !memslot->dirty_bitmap) 995 if (!memslot->dirty_bitmap)
982 return; 996 return;
983 997
984 rel_gfn = gfn - memslot->base_gfn; 998 rel_gfn = gfn - memslot->base_gfn;
@@ -1037,12 +1051,31 @@ static int emulator_write_std(unsigned long addr,
1037 return X86EMUL_UNHANDLEABLE; 1051 return X86EMUL_UNHANDLEABLE;
1038} 1052}
1039 1053
1054static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
1055 gpa_t addr)
1056{
1057 /*
1058 * Note that its important to have this wrapper function because
1059 * in the very near future we will be checking for MMIOs against
1060 * the LAPIC as well as the general MMIO bus
1061 */
1062 return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
1063}
1064
1065static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
1066 gpa_t addr)
1067{
1068 return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr);
1069}
1070
1040static int emulator_read_emulated(unsigned long addr, 1071static int emulator_read_emulated(unsigned long addr,
1041 void *val, 1072 void *val,
1042 unsigned int bytes, 1073 unsigned int bytes,
1043 struct x86_emulate_ctxt *ctxt) 1074 struct x86_emulate_ctxt *ctxt)
1044{ 1075{
1045 struct kvm_vcpu *vcpu = ctxt->vcpu; 1076 struct kvm_vcpu *vcpu = ctxt->vcpu;
1077 struct kvm_io_device *mmio_dev;
1078 gpa_t gpa;
1046 1079
1047 if (vcpu->mmio_read_completed) { 1080 if (vcpu->mmio_read_completed) {
1048 memcpy(val, vcpu->mmio_data, bytes); 1081 memcpy(val, vcpu->mmio_data, bytes);
@@ -1051,18 +1084,26 @@ static int emulator_read_emulated(unsigned long addr,
1051 } else if (emulator_read_std(addr, val, bytes, ctxt) 1084 } else if (emulator_read_std(addr, val, bytes, ctxt)
1052 == X86EMUL_CONTINUE) 1085 == X86EMUL_CONTINUE)
1053 return X86EMUL_CONTINUE; 1086 return X86EMUL_CONTINUE;
1054 else {
1055 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
1056 1087
1057 if (gpa == UNMAPPED_GVA) 1088 gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
1058 return X86EMUL_PROPAGATE_FAULT; 1089 if (gpa == UNMAPPED_GVA)
1059 vcpu->mmio_needed = 1; 1090 return X86EMUL_PROPAGATE_FAULT;
1060 vcpu->mmio_phys_addr = gpa;
1061 vcpu->mmio_size = bytes;
1062 vcpu->mmio_is_write = 0;
1063 1091
1064 return X86EMUL_UNHANDLEABLE; 1092 /*
1093 * Is this MMIO handled locally?
1094 */
1095 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
1096 if (mmio_dev) {
1097 kvm_iodevice_read(mmio_dev, gpa, bytes, val);
1098 return X86EMUL_CONTINUE;
1065 } 1099 }
1100
1101 vcpu->mmio_needed = 1;
1102 vcpu->mmio_phys_addr = gpa;
1103 vcpu->mmio_size = bytes;
1104 vcpu->mmio_is_write = 0;
1105
1106 return X86EMUL_UNHANDLEABLE;
1066} 1107}
1067 1108
1068static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, 1109static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -1070,18 +1111,20 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1070{ 1111{
1071 struct page *page; 1112 struct page *page;
1072 void *virt; 1113 void *virt;
1114 unsigned offset = offset_in_page(gpa);
1073 1115
1074 if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) 1116 if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT))
1075 return 0; 1117 return 0;
1076 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 1118 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
1077 if (!page) 1119 if (!page)
1078 return 0; 1120 return 0;
1079 kvm_mmu_pre_write(vcpu, gpa, bytes);
1080 mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); 1121 mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
1081 virt = kmap_atomic(page, KM_USER0); 1122 virt = kmap_atomic(page, KM_USER0);
1082 memcpy(virt + offset_in_page(gpa), val, bytes); 1123 if (memcmp(virt + offset_in_page(gpa), val, bytes)) {
1124 kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes);
1125 memcpy(virt + offset_in_page(gpa), val, bytes);
1126 }
1083 kunmap_atomic(virt, KM_USER0); 1127 kunmap_atomic(virt, KM_USER0);
1084 kvm_mmu_post_write(vcpu, gpa, bytes);
1085 return 1; 1128 return 1;
1086} 1129}
1087 1130
@@ -1090,8 +1133,9 @@ static int emulator_write_emulated(unsigned long addr,
1090 unsigned int bytes, 1133 unsigned int bytes,
1091 struct x86_emulate_ctxt *ctxt) 1134 struct x86_emulate_ctxt *ctxt)
1092{ 1135{
1093 struct kvm_vcpu *vcpu = ctxt->vcpu; 1136 struct kvm_vcpu *vcpu = ctxt->vcpu;
1094 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); 1137 struct kvm_io_device *mmio_dev;
1138 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
1095 1139
1096 if (gpa == UNMAPPED_GVA) { 1140 if (gpa == UNMAPPED_GVA) {
1097 kvm_arch_ops->inject_page_fault(vcpu, addr, 2); 1141 kvm_arch_ops->inject_page_fault(vcpu, addr, 2);
@@ -1101,6 +1145,15 @@ static int emulator_write_emulated(unsigned long addr,
1101 if (emulator_write_phys(vcpu, gpa, val, bytes)) 1145 if (emulator_write_phys(vcpu, gpa, val, bytes))
1102 return X86EMUL_CONTINUE; 1146 return X86EMUL_CONTINUE;
1103 1147
1148 /*
1149 * Is this MMIO handled locally?
1150 */
1151 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
1152 if (mmio_dev) {
1153 kvm_iodevice_write(mmio_dev, gpa, bytes, val);
1154 return X86EMUL_CONTINUE;
1155 }
1156
1104 vcpu->mmio_needed = 1; 1157 vcpu->mmio_needed = 1;
1105 vcpu->mmio_phys_addr = gpa; 1158 vcpu->mmio_phys_addr = gpa;
1106 vcpu->mmio_size = bytes; 1159 vcpu->mmio_size = bytes;
@@ -1269,6 +1322,17 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
1269} 1322}
1270EXPORT_SYMBOL_GPL(emulate_instruction); 1323EXPORT_SYMBOL_GPL(emulate_instruction);
1271 1324
1325int kvm_emulate_halt(struct kvm_vcpu *vcpu)
1326{
1327 if (vcpu->irq_summary)
1328 return 1;
1329
1330 vcpu->run->exit_reason = KVM_EXIT_HLT;
1331 ++vcpu->stat.halt_exits;
1332 return 0;
1333}
1334EXPORT_SYMBOL_GPL(kvm_emulate_halt);
1335
1272int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) 1336int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
1273{ 1337{
1274 unsigned long nr, a0, a1, a2, a3, a4, a5, ret; 1338 unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
@@ -1469,6 +1533,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1469 case MSR_IA32_MC0_MISC+16: 1533 case MSR_IA32_MC0_MISC+16:
1470 case MSR_IA32_UCODE_REV: 1534 case MSR_IA32_UCODE_REV:
1471 case MSR_IA32_PERF_STATUS: 1535 case MSR_IA32_PERF_STATUS:
1536 case MSR_IA32_EBL_CR_POWERON:
1472 /* MTRR registers */ 1537 /* MTRR registers */
1473 case 0xfe: 1538 case 0xfe:
1474 case 0x200 ... 0x2ff: 1539 case 0x200 ... 0x2ff:
@@ -1727,6 +1792,20 @@ static int complete_pio(struct kvm_vcpu *vcpu)
1727 return 0; 1792 return 0;
1728} 1793}
1729 1794
1795void kernel_pio(struct kvm_io_device *pio_dev, struct kvm_vcpu *vcpu)
1796{
1797 /* TODO: String I/O for in kernel device */
1798
1799 if (vcpu->pio.in)
1800 kvm_iodevice_read(pio_dev, vcpu->pio.port,
1801 vcpu->pio.size,
1802 vcpu->pio_data);
1803 else
1804 kvm_iodevice_write(pio_dev, vcpu->pio.port,
1805 vcpu->pio.size,
1806 vcpu->pio_data);
1807}
1808
1730int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, 1809int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1731 int size, unsigned long count, int string, int down, 1810 int size, unsigned long count, int string, int down,
1732 gva_t address, int rep, unsigned port) 1811 gva_t address, int rep, unsigned port)
@@ -1735,6 +1814,7 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1735 int i; 1814 int i;
1736 int nr_pages = 1; 1815 int nr_pages = 1;
1737 struct page *page; 1816 struct page *page;
1817 struct kvm_io_device *pio_dev;
1738 1818
1739 vcpu->run->exit_reason = KVM_EXIT_IO; 1819 vcpu->run->exit_reason = KVM_EXIT_IO;
1740 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 1820 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
@@ -1746,17 +1826,27 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1746 vcpu->pio.cur_count = count; 1826 vcpu->pio.cur_count = count;
1747 vcpu->pio.size = size; 1827 vcpu->pio.size = size;
1748 vcpu->pio.in = in; 1828 vcpu->pio.in = in;
1829 vcpu->pio.port = port;
1749 vcpu->pio.string = string; 1830 vcpu->pio.string = string;
1750 vcpu->pio.down = down; 1831 vcpu->pio.down = down;
1751 vcpu->pio.guest_page_offset = offset_in_page(address); 1832 vcpu->pio.guest_page_offset = offset_in_page(address);
1752 vcpu->pio.rep = rep; 1833 vcpu->pio.rep = rep;
1753 1834
1835 pio_dev = vcpu_find_pio_dev(vcpu, port);
1754 if (!string) { 1836 if (!string) {
1755 kvm_arch_ops->cache_regs(vcpu); 1837 kvm_arch_ops->cache_regs(vcpu);
1756 memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); 1838 memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
1757 kvm_arch_ops->decache_regs(vcpu); 1839 kvm_arch_ops->decache_regs(vcpu);
1840 if (pio_dev) {
1841 kernel_pio(pio_dev, vcpu);
1842 complete_pio(vcpu);
1843 return 1;
1844 }
1758 return 0; 1845 return 0;
1759 } 1846 }
1847 /* TODO: String I/O for in kernel device */
1848 if (pio_dev)
1849 printk(KERN_ERR "kvm_setup_pio: no string io support\n");
1760 1850
1761 if (!count) { 1851 if (!count) {
1762 kvm_arch_ops->skip_emulated_instruction(vcpu); 1852 kvm_arch_ops->skip_emulated_instruction(vcpu);
@@ -2273,34 +2363,12 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
2273 struct inode *inode; 2363 struct inode *inode;
2274 struct file *file; 2364 struct file *file;
2275 2365
2366 r = anon_inode_getfd(&fd, &inode, &file,
2367 "kvm-vcpu", &kvm_vcpu_fops, vcpu);
2368 if (r)
2369 return r;
2276 atomic_inc(&vcpu->kvm->filp->f_count); 2370 atomic_inc(&vcpu->kvm->filp->f_count);
2277 inode = kvmfs_inode(&kvm_vcpu_fops);
2278 if (IS_ERR(inode)) {
2279 r = PTR_ERR(inode);
2280 goto out1;
2281 }
2282
2283 file = kvmfs_file(inode, vcpu);
2284 if (IS_ERR(file)) {
2285 r = PTR_ERR(file);
2286 goto out2;
2287 }
2288
2289 r = get_unused_fd();
2290 if (r < 0)
2291 goto out3;
2292 fd = r;
2293 fd_install(fd, file);
2294
2295 return fd; 2371 return fd;
2296
2297out3:
2298 fput(file);
2299out2:
2300 iput(inode);
2301out1:
2302 fput(vcpu->kvm->filp);
2303 return r;
2304} 2372}
2305 2373
2306/* 2374/*
@@ -2363,6 +2431,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
2363 if (r < 0) 2431 if (r < 0)
2364 goto out_free_vcpus; 2432 goto out_free_vcpus;
2365 2433
2434 spin_lock(&kvm_lock);
2435 if (n >= kvm->nvcpus)
2436 kvm->nvcpus = n + 1;
2437 spin_unlock(&kvm_lock);
2438
2366 return r; 2439 return r;
2367 2440
2368out_free_vcpus: 2441out_free_vcpus:
@@ -2376,6 +2449,27 @@ out:
2376 return r; 2449 return r;
2377} 2450}
2378 2451
2452static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
2453{
2454 u64 efer;
2455 int i;
2456 struct kvm_cpuid_entry *e, *entry;
2457
2458 rdmsrl(MSR_EFER, efer);
2459 entry = NULL;
2460 for (i = 0; i < vcpu->cpuid_nent; ++i) {
2461 e = &vcpu->cpuid_entries[i];
2462 if (e->function == 0x80000001) {
2463 entry = e;
2464 break;
2465 }
2466 }
2467 if (entry && (entry->edx & EFER_NX) && !(efer & EFER_NX)) {
2468 entry->edx &= ~(1 << 20);
2469 printk(KERN_INFO ": guest NX capability removed\n");
2470 }
2471}
2472
2379static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, 2473static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
2380 struct kvm_cpuid *cpuid, 2474 struct kvm_cpuid *cpuid,
2381 struct kvm_cpuid_entry __user *entries) 2475 struct kvm_cpuid_entry __user *entries)
@@ -2390,6 +2484,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
2390 cpuid->nent * sizeof(struct kvm_cpuid_entry))) 2484 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
2391 goto out; 2485 goto out;
2392 vcpu->cpuid_nent = cpuid->nent; 2486 vcpu->cpuid_nent = cpuid->nent;
2487 cpuid_fix_nx_cap(vcpu);
2393 return 0; 2488 return 0;
2394 2489
2395out: 2490out:
@@ -2738,41 +2833,18 @@ static int kvm_dev_ioctl_create_vm(void)
2738 struct file *file; 2833 struct file *file;
2739 struct kvm *kvm; 2834 struct kvm *kvm;
2740 2835
2741 inode = kvmfs_inode(&kvm_vm_fops);
2742 if (IS_ERR(inode)) {
2743 r = PTR_ERR(inode);
2744 goto out1;
2745 }
2746
2747 kvm = kvm_create_vm(); 2836 kvm = kvm_create_vm();
2748 if (IS_ERR(kvm)) { 2837 if (IS_ERR(kvm))
2749 r = PTR_ERR(kvm); 2838 return PTR_ERR(kvm);
2750 goto out2; 2839 r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
2840 if (r) {
2841 kvm_destroy_vm(kvm);
2842 return r;
2751 } 2843 }
2752 2844
2753 file = kvmfs_file(inode, kvm);
2754 if (IS_ERR(file)) {
2755 r = PTR_ERR(file);
2756 goto out3;
2757 }
2758 kvm->filp = file; 2845 kvm->filp = file;
2759 2846
2760 r = get_unused_fd();
2761 if (r < 0)
2762 goto out4;
2763 fd = r;
2764 fd_install(fd, file);
2765
2766 return fd; 2847 return fd;
2767
2768out4:
2769 fput(file);
2770out3:
2771 kvm_destroy_vm(kvm);
2772out2:
2773 iput(inode);
2774out1:
2775 return r;
2776} 2848}
2777 2849
2778static long kvm_dev_ioctl(struct file *filp, 2850static long kvm_dev_ioctl(struct file *filp,
@@ -2862,7 +2934,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
2862 * in vmx root mode. 2934 * in vmx root mode.
2863 */ 2935 */
2864 printk(KERN_INFO "kvm: exiting hardware virtualization\n"); 2936 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
2865 on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); 2937 on_each_cpu(hardware_disable, NULL, 0, 1);
2866 } 2938 }
2867 return NOTIFY_OK; 2939 return NOTIFY_OK;
2868} 2940}
@@ -2905,33 +2977,88 @@ static void decache_vcpus_on_cpu(int cpu)
2905 spin_unlock(&kvm_lock); 2977 spin_unlock(&kvm_lock);
2906} 2978}
2907 2979
2980static void hardware_enable(void *junk)
2981{
2982 int cpu = raw_smp_processor_id();
2983
2984 if (cpu_isset(cpu, cpus_hardware_enabled))
2985 return;
2986 cpu_set(cpu, cpus_hardware_enabled);
2987 kvm_arch_ops->hardware_enable(NULL);
2988}
2989
2990static void hardware_disable(void *junk)
2991{
2992 int cpu = raw_smp_processor_id();
2993
2994 if (!cpu_isset(cpu, cpus_hardware_enabled))
2995 return;
2996 cpu_clear(cpu, cpus_hardware_enabled);
2997 decache_vcpus_on_cpu(cpu);
2998 kvm_arch_ops->hardware_disable(NULL);
2999}
3000
2908static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, 3001static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
2909 void *v) 3002 void *v)
2910{ 3003{
2911 int cpu = (long)v; 3004 int cpu = (long)v;
2912 3005
2913 switch (val) { 3006 switch (val) {
2914 case CPU_DOWN_PREPARE: 3007 case CPU_DYING:
2915 case CPU_DOWN_PREPARE_FROZEN: 3008 case CPU_DYING_FROZEN:
2916 case CPU_UP_CANCELED: 3009 case CPU_UP_CANCELED:
2917 case CPU_UP_CANCELED_FROZEN: 3010 case CPU_UP_CANCELED_FROZEN:
2918 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 3011 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
2919 cpu); 3012 cpu);
2920 decache_vcpus_on_cpu(cpu); 3013 smp_call_function_single(cpu, hardware_disable, NULL, 0, 1);
2921 smp_call_function_single(cpu, kvm_arch_ops->hardware_disable,
2922 NULL, 0, 1);
2923 break; 3014 break;
2924 case CPU_ONLINE: 3015 case CPU_ONLINE:
2925 case CPU_ONLINE_FROZEN: 3016 case CPU_ONLINE_FROZEN:
2926 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 3017 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
2927 cpu); 3018 cpu);
2928 smp_call_function_single(cpu, kvm_arch_ops->hardware_enable, 3019 smp_call_function_single(cpu, hardware_enable, NULL, 0, 1);
2929 NULL, 0, 1);
2930 break; 3020 break;
2931 } 3021 }
2932 return NOTIFY_OK; 3022 return NOTIFY_OK;
2933} 3023}
2934 3024
3025void kvm_io_bus_init(struct kvm_io_bus *bus)
3026{
3027 memset(bus, 0, sizeof(*bus));
3028}
3029
3030void kvm_io_bus_destroy(struct kvm_io_bus *bus)
3031{
3032 int i;
3033
3034 for (i = 0; i < bus->dev_count; i++) {
3035 struct kvm_io_device *pos = bus->devs[i];
3036
3037 kvm_iodevice_destructor(pos);
3038 }
3039}
3040
3041struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr)
3042{
3043 int i;
3044
3045 for (i = 0; i < bus->dev_count; i++) {
3046 struct kvm_io_device *pos = bus->devs[i];
3047
3048 if (pos->in_range(pos, addr))
3049 return pos;
3050 }
3051
3052 return NULL;
3053}
3054
3055void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
3056{
3057 BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
3058
3059 bus->devs[bus->dev_count++] = dev;
3060}
3061
2935static struct notifier_block kvm_cpu_notifier = { 3062static struct notifier_block kvm_cpu_notifier = {
2936 .notifier_call = kvm_cpu_hotplug, 3063 .notifier_call = kvm_cpu_hotplug,
2937 .priority = 20, /* must be > scheduler priority */ 3064 .priority = 20, /* must be > scheduler priority */
@@ -2983,14 +3110,13 @@ static void kvm_exit_debug(void)
2983 3110
2984static int kvm_suspend(struct sys_device *dev, pm_message_t state) 3111static int kvm_suspend(struct sys_device *dev, pm_message_t state)
2985{ 3112{
2986 decache_vcpus_on_cpu(raw_smp_processor_id()); 3113 hardware_disable(NULL);
2987 on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
2988 return 0; 3114 return 0;
2989} 3115}
2990 3116
2991static int kvm_resume(struct sys_device *dev) 3117static int kvm_resume(struct sys_device *dev)
2992{ 3118{
2993 on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1); 3119 hardware_enable(NULL);
2994 return 0; 3120 return 0;
2995} 3121}
2996 3122
@@ -3007,18 +3133,6 @@ static struct sys_device kvm_sysdev = {
3007 3133
3008hpa_t bad_page_address; 3134hpa_t bad_page_address;
3009 3135
3010static int kvmfs_get_sb(struct file_system_type *fs_type, int flags,
3011 const char *dev_name, void *data, struct vfsmount *mnt)
3012{
3013 return get_sb_pseudo(fs_type, "kvm:", NULL, KVMFS_SUPER_MAGIC, mnt);
3014}
3015
3016static struct file_system_type kvm_fs_type = {
3017 .name = "kvmfs",
3018 .get_sb = kvmfs_get_sb,
3019 .kill_sb = kill_anon_super,
3020};
3021
3022int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) 3136int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
3023{ 3137{
3024 int r; 3138 int r;
@@ -3043,7 +3157,7 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
3043 if (r < 0) 3157 if (r < 0)
3044 goto out; 3158 goto out;
3045 3159
3046 on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1); 3160 on_each_cpu(hardware_enable, NULL, 0, 1);
3047 r = register_cpu_notifier(&kvm_cpu_notifier); 3161 r = register_cpu_notifier(&kvm_cpu_notifier);
3048 if (r) 3162 if (r)
3049 goto out_free_1; 3163 goto out_free_1;
@@ -3075,7 +3189,7 @@ out_free_2:
3075 unregister_reboot_notifier(&kvm_reboot_notifier); 3189 unregister_reboot_notifier(&kvm_reboot_notifier);
3076 unregister_cpu_notifier(&kvm_cpu_notifier); 3190 unregister_cpu_notifier(&kvm_cpu_notifier);
3077out_free_1: 3191out_free_1:
3078 on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); 3192 on_each_cpu(hardware_disable, NULL, 0, 1);
3079 kvm_arch_ops->hardware_unsetup(); 3193 kvm_arch_ops->hardware_unsetup();
3080out: 3194out:
3081 kvm_arch_ops = NULL; 3195 kvm_arch_ops = NULL;
@@ -3089,7 +3203,7 @@ void kvm_exit_arch(void)
3089 sysdev_class_unregister(&kvm_sysdev_class); 3203 sysdev_class_unregister(&kvm_sysdev_class);
3090 unregister_reboot_notifier(&kvm_reboot_notifier); 3204 unregister_reboot_notifier(&kvm_reboot_notifier);
3091 unregister_cpu_notifier(&kvm_cpu_notifier); 3205 unregister_cpu_notifier(&kvm_cpu_notifier);
3092 on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); 3206 on_each_cpu(hardware_disable, NULL, 0, 1);
3093 kvm_arch_ops->hardware_unsetup(); 3207 kvm_arch_ops->hardware_unsetup();
3094 kvm_arch_ops = NULL; 3208 kvm_arch_ops = NULL;
3095} 3209}
@@ -3103,14 +3217,6 @@ static __init int kvm_init(void)
3103 if (r) 3217 if (r)
3104 goto out4; 3218 goto out4;
3105 3219
3106 r = register_filesystem(&kvm_fs_type);
3107 if (r)
3108 goto out3;
3109
3110 kvmfs_mnt = kern_mount(&kvm_fs_type);
3111 r = PTR_ERR(kvmfs_mnt);
3112 if (IS_ERR(kvmfs_mnt))
3113 goto out2;
3114 kvm_init_debug(); 3220 kvm_init_debug();
3115 3221
3116 kvm_init_msr_list(); 3222 kvm_init_msr_list();
@@ -3127,10 +3233,6 @@ static __init int kvm_init(void)
3127 3233
3128out: 3234out:
3129 kvm_exit_debug(); 3235 kvm_exit_debug();
3130 mntput(kvmfs_mnt);
3131out2:
3132 unregister_filesystem(&kvm_fs_type);
3133out3:
3134 kvm_mmu_module_exit(); 3236 kvm_mmu_module_exit();
3135out4: 3237out4:
3136 return r; 3238 return r;
@@ -3140,8 +3242,6 @@ static __exit void kvm_exit(void)
3140{ 3242{
3141 kvm_exit_debug(); 3243 kvm_exit_debug();
3142 __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT)); 3244 __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT));
3143 mntput(kvmfs_mnt);
3144 unregister_filesystem(&kvm_fs_type);
3145 kvm_mmu_module_exit(); 3245 kvm_mmu_module_exit();
3146} 3246}
3147 3247