aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/kvm/kvm_main.c
diff options
context:
space:
mode:
authorAvi Kivity <avi@qumranet.com>2006-12-10 05:21:36 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-10 12:57:22 -0500
commit6aa8b732ca01c3d7a54e93f4d701b8aabbe60fb7 (patch)
tree23fcbe6f4918cacdae26d513a2bd13e91d8b4c38 /drivers/kvm/kvm_main.c
parentf5f1a24a2caa299bb7d294aee92d7dd3410d9ed7 (diff)
[PATCH] kvm: userspace interface
web site: http://kvm.sourceforge.net mailing list: kvm-devel@lists.sourceforge.net (http://lists.sourceforge.net/lists/listinfo/kvm-devel) The following patchset adds a driver for Intel's hardware virtualization extensions to the x86 architecture. The driver adds a character device (/dev/kvm) that exposes the virtualization capabilities to userspace. Using this driver, a process can run a virtual machine (a "guest") in a fully virtualized PC containing its own virtual hard disks, network adapters, and display. Using this driver, one can start multiple virtual machines on a host. Each virtual machine is a process on the host; a virtual cpu is a thread in that process. kill(1), nice(1), top(1) work as expected. In effect, the driver adds a third execution mode to the existing two: we now have kernel mode, user mode, and guest mode. Guest mode has its own address space mapping guest physical memory (which is accessible to user mode by mmap()ing /dev/kvm). Guest mode has no access to any I/O devices; any such access is intercepted and directed to user mode for emulation. The driver supports i386 and x86_64 hosts and guests. All combinations are allowed except x86_64 guest on i386 host. For i386 guests and hosts, both pae and non-pae paging modes are supported. SMP hosts and UP guests are supported. At the moment only Intel hardware is supported, but AMD virtualization support is being worked on. Performance currently is non-stellar due to the naive implementation of the mmu virtualization, which throws away most of the shadow page table entries every context switch. We plan to address this in two ways: - cache shadow page tables across tlb flushes - wait until AMD and Intel release processors with nested page tables Currently a virtual desktop is responsive but consumes a lot of CPU. Under Windows I tried playing pinball and watching a few flash movies; with a recent CPU one can hardly feel the virtualization. Linux/X is slower, probably due to X being in a separate process. In addition to the driver, you need a slightly modified qemu to provide I/O device emulation and the BIOS. Caveats (akpm: might no longer be true): - The Windows install currently bluescreens due to a problem with the virtual APIC. We are working on a fix. A temporary workaround is to use an existing image or install through qemu - Windows 64-bit does not work. That's also true for qemu, so it's probably a problem with the device model. [bero@arklinux.org: build fix] [simon.kagstrom@bth.se: build fix, other fixes] [uril@qumranet.com: KVM: Expose interrupt bitmap] [akpm@osdl.org: i386 build fix] [mingo@elte.hu: i386 fixes] [rdreier@cisco.com: add log levels to all printks] [randy.dunlap@oracle.com: Fix sparse NULL and C99 struct init warnings] [anthony@codemonkey.ws: KVM: AMD SVM: 32-bit host support] Signed-off-by: Yaniv Kamay <yaniv@qumranet.com> Signed-off-by: Avi Kivity <avi@qumranet.com> Cc: Simon Kagstrom <simon.kagstrom@bth.se> Cc: Bernhard Rosenkraenzer <bero@arklinux.org> Signed-off-by: Uri Lublin <uril@qumranet.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Roland Dreier <rolandd@cisco.com> Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com> Signed-off-by: Anthony Liguori <anthony@codemonkey.ws> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/kvm/kvm_main.c')
-rw-r--r--drivers/kvm/kvm_main.c1935
1 files changed, 1935 insertions, 0 deletions
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
new file mode 100644
index 000000000000..b6b8a41b5ec8
--- /dev/null
+++ b/drivers/kvm/kvm_main.c
@@ -0,0 +1,1935 @@
1/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * This module enables machines with Intel VT-x extensions to run virtual
5 * machines without emulation or binary translation.
6 *
7 * Copyright (C) 2006 Qumranet, Inc.
8 *
9 * Authors:
10 * Avi Kivity <avi@qumranet.com>
11 * Yaniv Kamay <yaniv@qumranet.com>
12 *
13 * This work is licensed under the terms of the GNU GPL, version 2. See
14 * the COPYING file in the top-level directory.
15 *
16 */
17
18#include "kvm.h"
19
20#include <linux/kvm.h>
21#include <linux/module.h>
22#include <linux/errno.h>
23#include <asm/processor.h>
24#include <linux/percpu.h>
25#include <linux/gfp.h>
26#include <asm/msr.h>
27#include <linux/mm.h>
28#include <linux/miscdevice.h>
29#include <linux/vmalloc.h>
30#include <asm/uaccess.h>
31#include <linux/reboot.h>
32#include <asm/io.h>
33#include <linux/debugfs.h>
34#include <linux/highmem.h>
35#include <linux/file.h>
36#include <asm/desc.h>
37
38#include "x86_emulate.h"
39#include "segment_descriptor.h"
40
41MODULE_AUTHOR("Qumranet");
42MODULE_LICENSE("GPL");
43
44struct kvm_arch_ops *kvm_arch_ops;
45struct kvm_stat kvm_stat;
46EXPORT_SYMBOL_GPL(kvm_stat);
47
48static struct kvm_stats_debugfs_item {
49 const char *name;
50 u32 *data;
51 struct dentry *dentry;
52} debugfs_entries[] = {
53 { "pf_fixed", &kvm_stat.pf_fixed },
54 { "pf_guest", &kvm_stat.pf_guest },
55 { "tlb_flush", &kvm_stat.tlb_flush },
56 { "invlpg", &kvm_stat.invlpg },
57 { "exits", &kvm_stat.exits },
58 { "io_exits", &kvm_stat.io_exits },
59 { "mmio_exits", &kvm_stat.mmio_exits },
60 { "signal_exits", &kvm_stat.signal_exits },
61 { "irq_exits", &kvm_stat.irq_exits },
62 { 0, 0 }
63};
64
65static struct dentry *debugfs_dir;
66
67#define MAX_IO_MSRS 256
68
69#define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL
70#define LMSW_GUEST_MASK 0x0eULL
71#define CR4_RESEVED_BITS (~((1ULL << 11) - 1))
72#define CR8_RESEVED_BITS (~0x0fULL)
73#define EFER_RESERVED_BITS 0xfffffffffffff2fe
74
75struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr)
76{
77 int i;
78
79 for (i = 0; i < vcpu->nmsrs; ++i)
80 if (vcpu->guest_msrs[i].index == msr)
81 return &vcpu->guest_msrs[i];
82 return 0;
83}
84EXPORT_SYMBOL_GPL(find_msr_entry);
85
86#ifdef __x86_64__
87// LDT or TSS descriptor in the GDT. 16 bytes.
88struct segment_descriptor_64 {
89 struct segment_descriptor s;
90 u32 base_higher;
91 u32 pad_zero;
92};
93
94#endif
95
96unsigned long segment_base(u16 selector)
97{
98 struct descriptor_table gdt;
99 struct segment_descriptor *d;
100 unsigned long table_base;
101 typedef unsigned long ul;
102 unsigned long v;
103
104 if (selector == 0)
105 return 0;
106
107 asm ("sgdt %0" : "=m"(gdt));
108 table_base = gdt.base;
109
110 if (selector & 4) { /* from ldt */
111 u16 ldt_selector;
112
113 asm ("sldt %0" : "=g"(ldt_selector));
114 table_base = segment_base(ldt_selector);
115 }
116 d = (struct segment_descriptor *)(table_base + (selector & ~7));
117 v = d->base_low | ((ul)d->base_mid << 16) | ((ul)d->base_high << 24);
118#ifdef __x86_64__
119 if (d->system == 0
120 && (d->type == 2 || d->type == 9 || d->type == 11))
121 v |= ((ul)((struct segment_descriptor_64 *)d)->base_higher) << 32;
122#endif
123 return v;
124}
125EXPORT_SYMBOL_GPL(segment_base);
126
127int kvm_read_guest(struct kvm_vcpu *vcpu,
128 gva_t addr,
129 unsigned long size,
130 void *dest)
131{
132 unsigned char *host_buf = dest;
133 unsigned long req_size = size;
134
135 while (size) {
136 hpa_t paddr;
137 unsigned now;
138 unsigned offset;
139 hva_t guest_buf;
140
141 paddr = gva_to_hpa(vcpu, addr);
142
143 if (is_error_hpa(paddr))
144 break;
145
146 guest_buf = (hva_t)kmap_atomic(
147 pfn_to_page(paddr >> PAGE_SHIFT),
148 KM_USER0);
149 offset = addr & ~PAGE_MASK;
150 guest_buf |= offset;
151 now = min(size, PAGE_SIZE - offset);
152 memcpy(host_buf, (void*)guest_buf, now);
153 host_buf += now;
154 addr += now;
155 size -= now;
156 kunmap_atomic((void *)(guest_buf & PAGE_MASK), KM_USER0);
157 }
158 return req_size - size;
159}
160EXPORT_SYMBOL_GPL(kvm_read_guest);
161
162int kvm_write_guest(struct kvm_vcpu *vcpu,
163 gva_t addr,
164 unsigned long size,
165 void *data)
166{
167 unsigned char *host_buf = data;
168 unsigned long req_size = size;
169
170 while (size) {
171 hpa_t paddr;
172 unsigned now;
173 unsigned offset;
174 hva_t guest_buf;
175
176 paddr = gva_to_hpa(vcpu, addr);
177
178 if (is_error_hpa(paddr))
179 break;
180
181 guest_buf = (hva_t)kmap_atomic(
182 pfn_to_page(paddr >> PAGE_SHIFT), KM_USER0);
183 offset = addr & ~PAGE_MASK;
184 guest_buf |= offset;
185 now = min(size, PAGE_SIZE - offset);
186 memcpy((void*)guest_buf, host_buf, now);
187 host_buf += now;
188 addr += now;
189 size -= now;
190 kunmap_atomic((void *)(guest_buf & PAGE_MASK), KM_USER0);
191 }
192 return req_size - size;
193}
194EXPORT_SYMBOL_GPL(kvm_write_guest);
195
196static int vcpu_slot(struct kvm_vcpu *vcpu)
197{
198 return vcpu - vcpu->kvm->vcpus;
199}
200
201/*
202 * Switches to specified vcpu, until a matching vcpu_put()
203 */
204static struct kvm_vcpu *vcpu_load(struct kvm *kvm, int vcpu_slot)
205{
206 struct kvm_vcpu *vcpu = &kvm->vcpus[vcpu_slot];
207
208 mutex_lock(&vcpu->mutex);
209 if (unlikely(!vcpu->vmcs)) {
210 mutex_unlock(&vcpu->mutex);
211 return 0;
212 }
213 return kvm_arch_ops->vcpu_load(vcpu);
214}
215
216static void vcpu_put(struct kvm_vcpu *vcpu)
217{
218 kvm_arch_ops->vcpu_put(vcpu);
219 put_cpu();
220 mutex_unlock(&vcpu->mutex);
221}
222
223static int kvm_dev_open(struct inode *inode, struct file *filp)
224{
225 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
226 int i;
227
228 if (!kvm)
229 return -ENOMEM;
230
231 spin_lock_init(&kvm->lock);
232 INIT_LIST_HEAD(&kvm->active_mmu_pages);
233 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
234 struct kvm_vcpu *vcpu = &kvm->vcpus[i];
235
236 mutex_init(&vcpu->mutex);
237 vcpu->mmu.root_hpa = INVALID_PAGE;
238 INIT_LIST_HEAD(&vcpu->free_pages);
239 }
240 filp->private_data = kvm;
241 return 0;
242}
243
244/*
245 * Free any memory in @free but not in @dont.
246 */
247static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
248 struct kvm_memory_slot *dont)
249{
250 int i;
251
252 if (!dont || free->phys_mem != dont->phys_mem)
253 if (free->phys_mem) {
254 for (i = 0; i < free->npages; ++i)
255 __free_page(free->phys_mem[i]);
256 vfree(free->phys_mem);
257 }
258
259 if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
260 vfree(free->dirty_bitmap);
261
262 free->phys_mem = 0;
263 free->npages = 0;
264 free->dirty_bitmap = 0;
265}
266
267static void kvm_free_physmem(struct kvm *kvm)
268{
269 int i;
270
271 for (i = 0; i < kvm->nmemslots; ++i)
272 kvm_free_physmem_slot(&kvm->memslots[i], 0);
273}
274
275static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
276{
277 kvm_arch_ops->vcpu_free(vcpu);
278 kvm_mmu_destroy(vcpu);
279}
280
281static void kvm_free_vcpus(struct kvm *kvm)
282{
283 unsigned int i;
284
285 for (i = 0; i < KVM_MAX_VCPUS; ++i)
286 kvm_free_vcpu(&kvm->vcpus[i]);
287}
288
289static int kvm_dev_release(struct inode *inode, struct file *filp)
290{
291 struct kvm *kvm = filp->private_data;
292
293 kvm_free_vcpus(kvm);
294 kvm_free_physmem(kvm);
295 kfree(kvm);
296 return 0;
297}
298
299static void inject_gp(struct kvm_vcpu *vcpu)
300{
301 kvm_arch_ops->inject_gp(vcpu, 0);
302}
303
304static int pdptrs_have_reserved_bits_set(struct kvm_vcpu *vcpu,
305 unsigned long cr3)
306{
307 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
308 unsigned offset = (cr3 & (PAGE_SIZE-1)) >> 5;
309 int i;
310 u64 pdpte;
311 u64 *pdpt;
312 struct kvm_memory_slot *memslot;
313
314 spin_lock(&vcpu->kvm->lock);
315 memslot = gfn_to_memslot(vcpu->kvm, pdpt_gfn);
316 /* FIXME: !memslot - emulate? 0xff? */
317 pdpt = kmap_atomic(gfn_to_page(memslot, pdpt_gfn), KM_USER0);
318
319 for (i = 0; i < 4; ++i) {
320 pdpte = pdpt[offset + i];
321 if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull))
322 break;
323 }
324
325 kunmap_atomic(pdpt, KM_USER0);
326 spin_unlock(&vcpu->kvm->lock);
327
328 return i != 4;
329}
330
331void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
332{
333 if (cr0 & CR0_RESEVED_BITS) {
334 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
335 cr0, vcpu->cr0);
336 inject_gp(vcpu);
337 return;
338 }
339
340 if ((cr0 & CR0_NW_MASK) && !(cr0 & CR0_CD_MASK)) {
341 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
342 inject_gp(vcpu);
343 return;
344 }
345
346 if ((cr0 & CR0_PG_MASK) && !(cr0 & CR0_PE_MASK)) {
347 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
348 "and a clear PE flag\n");
349 inject_gp(vcpu);
350 return;
351 }
352
353 if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) {
354#ifdef __x86_64__
355 if ((vcpu->shadow_efer & EFER_LME)) {
356 int cs_db, cs_l;
357
358 if (!is_pae(vcpu)) {
359 printk(KERN_DEBUG "set_cr0: #GP, start paging "
360 "in long mode while PAE is disabled\n");
361 inject_gp(vcpu);
362 return;
363 }
364 kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
365 if (cs_l) {
366 printk(KERN_DEBUG "set_cr0: #GP, start paging "
367 "in long mode while CS.L == 1\n");
368 inject_gp(vcpu);
369 return;
370
371 }
372 } else
373#endif
374 if (is_pae(vcpu) &&
375 pdptrs_have_reserved_bits_set(vcpu, vcpu->cr3)) {
376 printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
377 "reserved bits\n");
378 inject_gp(vcpu);
379 return;
380 }
381
382 }
383
384 kvm_arch_ops->set_cr0(vcpu, cr0);
385 vcpu->cr0 = cr0;
386
387 spin_lock(&vcpu->kvm->lock);
388 kvm_mmu_reset_context(vcpu);
389 spin_unlock(&vcpu->kvm->lock);
390 return;
391}
392EXPORT_SYMBOL_GPL(set_cr0);
393
394void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
395{
396 set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
397}
398EXPORT_SYMBOL_GPL(lmsw);
399
400void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
401{
402 if (cr4 & CR4_RESEVED_BITS) {
403 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
404 inject_gp(vcpu);
405 return;
406 }
407
408 if (kvm_arch_ops->is_long_mode(vcpu)) {
409 if (!(cr4 & CR4_PAE_MASK)) {
410 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
411 "in long mode\n");
412 inject_gp(vcpu);
413 return;
414 }
415 } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & CR4_PAE_MASK)
416 && pdptrs_have_reserved_bits_set(vcpu, vcpu->cr3)) {
417 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
418 inject_gp(vcpu);
419 }
420
421 if (cr4 & CR4_VMXE_MASK) {
422 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
423 inject_gp(vcpu);
424 return;
425 }
426 kvm_arch_ops->set_cr4(vcpu, cr4);
427 spin_lock(&vcpu->kvm->lock);
428 kvm_mmu_reset_context(vcpu);
429 spin_unlock(&vcpu->kvm->lock);
430}
431EXPORT_SYMBOL_GPL(set_cr4);
432
433void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
434{
435 if (kvm_arch_ops->is_long_mode(vcpu)) {
436 if ( cr3 & CR3_L_MODE_RESEVED_BITS) {
437 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
438 inject_gp(vcpu);
439 return;
440 }
441 } else {
442 if (cr3 & CR3_RESEVED_BITS) {
443 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
444 inject_gp(vcpu);
445 return;
446 }
447 if (is_paging(vcpu) && is_pae(vcpu) &&
448 pdptrs_have_reserved_bits_set(vcpu, cr3)) {
449 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
450 "reserved bits\n");
451 inject_gp(vcpu);
452 return;
453 }
454 }
455
456 vcpu->cr3 = cr3;
457 spin_lock(&vcpu->kvm->lock);
458 vcpu->mmu.new_cr3(vcpu);
459 spin_unlock(&vcpu->kvm->lock);
460}
461EXPORT_SYMBOL_GPL(set_cr3);
462
463void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
464{
465 if ( cr8 & CR8_RESEVED_BITS) {
466 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
467 inject_gp(vcpu);
468 return;
469 }
470 vcpu->cr8 = cr8;
471}
472EXPORT_SYMBOL_GPL(set_cr8);
473
474void fx_init(struct kvm_vcpu *vcpu)
475{
476 struct __attribute__ ((__packed__)) fx_image_s {
477 u16 control; //fcw
478 u16 status; //fsw
479 u16 tag; // ftw
480 u16 opcode; //fop
481 u64 ip; // fpu ip
482 u64 operand;// fpu dp
483 u32 mxcsr;
484 u32 mxcsr_mask;
485
486 } *fx_image;
487
488 fx_save(vcpu->host_fx_image);
489 fpu_init();
490 fx_save(vcpu->guest_fx_image);
491 fx_restore(vcpu->host_fx_image);
492
493 fx_image = (struct fx_image_s *)vcpu->guest_fx_image;
494 fx_image->mxcsr = 0x1f80;
495 memset(vcpu->guest_fx_image + sizeof(struct fx_image_s),
496 0, FX_IMAGE_SIZE - sizeof(struct fx_image_s));
497}
498EXPORT_SYMBOL_GPL(fx_init);
499
500/*
501 * Creates some virtual cpus. Good luck creating more than one.
502 */
503static int kvm_dev_ioctl_create_vcpu(struct kvm *kvm, int n)
504{
505 int r;
506 struct kvm_vcpu *vcpu;
507
508 r = -EINVAL;
509 if (n < 0 || n >= KVM_MAX_VCPUS)
510 goto out;
511
512 vcpu = &kvm->vcpus[n];
513
514 mutex_lock(&vcpu->mutex);
515
516 if (vcpu->vmcs) {
517 mutex_unlock(&vcpu->mutex);
518 return -EEXIST;
519 }
520
521 vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
522 FX_IMAGE_ALIGN);
523 vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
524
525 vcpu->cpu = -1; /* First load will set up TR */
526 vcpu->kvm = kvm;
527 r = kvm_arch_ops->vcpu_create(vcpu);
528 if (r < 0)
529 goto out_free_vcpus;
530
531 kvm_arch_ops->vcpu_load(vcpu);
532
533 r = kvm_arch_ops->vcpu_setup(vcpu);
534 if (r >= 0)
535 r = kvm_mmu_init(vcpu);
536
537 vcpu_put(vcpu);
538
539 if (r < 0)
540 goto out_free_vcpus;
541
542 return 0;
543
544out_free_vcpus:
545 kvm_free_vcpu(vcpu);
546 mutex_unlock(&vcpu->mutex);
547out:
548 return r;
549}
550
551/*
552 * Allocate some memory and give it an address in the guest physical address
553 * space.
554 *
555 * Discontiguous memory is allowed, mostly for framebuffers.
556 */
557static int kvm_dev_ioctl_set_memory_region(struct kvm *kvm,
558 struct kvm_memory_region *mem)
559{
560 int r;
561 gfn_t base_gfn;
562 unsigned long npages;
563 unsigned long i;
564 struct kvm_memory_slot *memslot;
565 struct kvm_memory_slot old, new;
566 int memory_config_version;
567
568 r = -EINVAL;
569 /* General sanity checks */
570 if (mem->memory_size & (PAGE_SIZE - 1))
571 goto out;
572 if (mem->guest_phys_addr & (PAGE_SIZE - 1))
573 goto out;
574 if (mem->slot >= KVM_MEMORY_SLOTS)
575 goto out;
576 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
577 goto out;
578
579 memslot = &kvm->memslots[mem->slot];
580 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
581 npages = mem->memory_size >> PAGE_SHIFT;
582
583 if (!npages)
584 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
585
586raced:
587 spin_lock(&kvm->lock);
588
589 memory_config_version = kvm->memory_config_version;
590 new = old = *memslot;
591
592 new.base_gfn = base_gfn;
593 new.npages = npages;
594 new.flags = mem->flags;
595
596 /* Disallow changing a memory slot's size. */
597 r = -EINVAL;
598 if (npages && old.npages && npages != old.npages)
599 goto out_unlock;
600
601 /* Check for overlaps */
602 r = -EEXIST;
603 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
604 struct kvm_memory_slot *s = &kvm->memslots[i];
605
606 if (s == memslot)
607 continue;
608 if (!((base_gfn + npages <= s->base_gfn) ||
609 (base_gfn >= s->base_gfn + s->npages)))
610 goto out_unlock;
611 }
612 /*
613 * Do memory allocations outside lock. memory_config_version will
614 * detect any races.
615 */
616 spin_unlock(&kvm->lock);
617
618 /* Deallocate if slot is being removed */
619 if (!npages)
620 new.phys_mem = 0;
621
622 /* Free page dirty bitmap if unneeded */
623 if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
624 new.dirty_bitmap = 0;
625
626 r = -ENOMEM;
627
628 /* Allocate if a slot is being created */
629 if (npages && !new.phys_mem) {
630 new.phys_mem = vmalloc(npages * sizeof(struct page *));
631
632 if (!new.phys_mem)
633 goto out_free;
634
635 memset(new.phys_mem, 0, npages * sizeof(struct page *));
636 for (i = 0; i < npages; ++i) {
637 new.phys_mem[i] = alloc_page(GFP_HIGHUSER
638 | __GFP_ZERO);
639 if (!new.phys_mem[i])
640 goto out_free;
641 }
642 }
643
644 /* Allocate page dirty bitmap if needed */
645 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
646 unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
647
648 new.dirty_bitmap = vmalloc(dirty_bytes);
649 if (!new.dirty_bitmap)
650 goto out_free;
651 memset(new.dirty_bitmap, 0, dirty_bytes);
652 }
653
654 spin_lock(&kvm->lock);
655
656 if (memory_config_version != kvm->memory_config_version) {
657 spin_unlock(&kvm->lock);
658 kvm_free_physmem_slot(&new, &old);
659 goto raced;
660 }
661
662 r = -EAGAIN;
663 if (kvm->busy)
664 goto out_unlock;
665
666 if (mem->slot >= kvm->nmemslots)
667 kvm->nmemslots = mem->slot + 1;
668
669 *memslot = new;
670 ++kvm->memory_config_version;
671
672 spin_unlock(&kvm->lock);
673
674 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
675 struct kvm_vcpu *vcpu;
676
677 vcpu = vcpu_load(kvm, i);
678 if (!vcpu)
679 continue;
680 kvm_mmu_reset_context(vcpu);
681 vcpu_put(vcpu);
682 }
683
684 kvm_free_physmem_slot(&old, &new);
685 return 0;
686
687out_unlock:
688 spin_unlock(&kvm->lock);
689out_free:
690 kvm_free_physmem_slot(&new, &old);
691out:
692 return r;
693}
694
695/*
696 * Get (and clear) the dirty memory log for a memory slot.
697 */
698static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm,
699 struct kvm_dirty_log *log)
700{
701 struct kvm_memory_slot *memslot;
702 int r, i;
703 int n;
704 unsigned long any = 0;
705
706 spin_lock(&kvm->lock);
707
708 /*
709 * Prevent changes to guest memory configuration even while the lock
710 * is not taken.
711 */
712 ++kvm->busy;
713 spin_unlock(&kvm->lock);
714 r = -EINVAL;
715 if (log->slot >= KVM_MEMORY_SLOTS)
716 goto out;
717
718 memslot = &kvm->memslots[log->slot];
719 r = -ENOENT;
720 if (!memslot->dirty_bitmap)
721 goto out;
722
723 n = ALIGN(memslot->npages, 8) / 8;
724
725 for (i = 0; !any && i < n; ++i)
726 any = memslot->dirty_bitmap[i];
727
728 r = -EFAULT;
729 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
730 goto out;
731
732
733 if (any) {
734 spin_lock(&kvm->lock);
735 kvm_mmu_slot_remove_write_access(kvm, log->slot);
736 spin_unlock(&kvm->lock);
737 memset(memslot->dirty_bitmap, 0, n);
738 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
739 struct kvm_vcpu *vcpu = vcpu_load(kvm, i);
740
741 if (!vcpu)
742 continue;
743 kvm_arch_ops->tlb_flush(vcpu);
744 vcpu_put(vcpu);
745 }
746 }
747
748 r = 0;
749
750out:
751 spin_lock(&kvm->lock);
752 --kvm->busy;
753 spin_unlock(&kvm->lock);
754 return r;
755}
756
757struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
758{
759 int i;
760
761 for (i = 0; i < kvm->nmemslots; ++i) {
762 struct kvm_memory_slot *memslot = &kvm->memslots[i];
763
764 if (gfn >= memslot->base_gfn
765 && gfn < memslot->base_gfn + memslot->npages)
766 return memslot;
767 }
768 return 0;
769}
770EXPORT_SYMBOL_GPL(gfn_to_memslot);
771
772void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
773{
774 int i;
775 struct kvm_memory_slot *memslot = 0;
776 unsigned long rel_gfn;
777
778 for (i = 0; i < kvm->nmemslots; ++i) {
779 memslot = &kvm->memslots[i];
780
781 if (gfn >= memslot->base_gfn
782 && gfn < memslot->base_gfn + memslot->npages) {
783
784 if (!memslot || !memslot->dirty_bitmap)
785 return;
786
787 rel_gfn = gfn - memslot->base_gfn;
788
789 /* avoid RMW */
790 if (!test_bit(rel_gfn, memslot->dirty_bitmap))
791 set_bit(rel_gfn, memslot->dirty_bitmap);
792 return;
793 }
794 }
795}
796
797static int emulator_read_std(unsigned long addr,
798 unsigned long *val,
799 unsigned int bytes,
800 struct x86_emulate_ctxt *ctxt)
801{
802 struct kvm_vcpu *vcpu = ctxt->vcpu;
803 void *data = val;
804
805 while (bytes) {
806 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
807 unsigned offset = addr & (PAGE_SIZE-1);
808 unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
809 unsigned long pfn;
810 struct kvm_memory_slot *memslot;
811 void *page;
812
813 if (gpa == UNMAPPED_GVA)
814 return X86EMUL_PROPAGATE_FAULT;
815 pfn = gpa >> PAGE_SHIFT;
816 memslot = gfn_to_memslot(vcpu->kvm, pfn);
817 if (!memslot)
818 return X86EMUL_UNHANDLEABLE;
819 page = kmap_atomic(gfn_to_page(memslot, pfn), KM_USER0);
820
821 memcpy(data, page + offset, tocopy);
822
823 kunmap_atomic(page, KM_USER0);
824
825 bytes -= tocopy;
826 data += tocopy;
827 addr += tocopy;
828 }
829
830 return X86EMUL_CONTINUE;
831}
832
833static int emulator_write_std(unsigned long addr,
834 unsigned long val,
835 unsigned int bytes,
836 struct x86_emulate_ctxt *ctxt)
837{
838 printk(KERN_ERR "emulator_write_std: addr %lx n %d\n",
839 addr, bytes);
840 return X86EMUL_UNHANDLEABLE;
841}
842
843static int emulator_read_emulated(unsigned long addr,
844 unsigned long *val,
845 unsigned int bytes,
846 struct x86_emulate_ctxt *ctxt)
847{
848 struct kvm_vcpu *vcpu = ctxt->vcpu;
849
850 if (vcpu->mmio_read_completed) {
851 memcpy(val, vcpu->mmio_data, bytes);
852 vcpu->mmio_read_completed = 0;
853 return X86EMUL_CONTINUE;
854 } else if (emulator_read_std(addr, val, bytes, ctxt)
855 == X86EMUL_CONTINUE)
856 return X86EMUL_CONTINUE;
857 else {
858 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
859 if (gpa == UNMAPPED_GVA)
860 return vcpu_printf(vcpu, "not present\n"), X86EMUL_PROPAGATE_FAULT;
861 vcpu->mmio_needed = 1;
862 vcpu->mmio_phys_addr = gpa;
863 vcpu->mmio_size = bytes;
864 vcpu->mmio_is_write = 0;
865
866 return X86EMUL_UNHANDLEABLE;
867 }
868}
869
870static int emulator_write_emulated(unsigned long addr,
871 unsigned long val,
872 unsigned int bytes,
873 struct x86_emulate_ctxt *ctxt)
874{
875 struct kvm_vcpu *vcpu = ctxt->vcpu;
876 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
877
878 if (gpa == UNMAPPED_GVA)
879 return X86EMUL_PROPAGATE_FAULT;
880
881 vcpu->mmio_needed = 1;
882 vcpu->mmio_phys_addr = gpa;
883 vcpu->mmio_size = bytes;
884 vcpu->mmio_is_write = 1;
885 memcpy(vcpu->mmio_data, &val, bytes);
886
887 return X86EMUL_CONTINUE;
888}
889
890static int emulator_cmpxchg_emulated(unsigned long addr,
891 unsigned long old,
892 unsigned long new,
893 unsigned int bytes,
894 struct x86_emulate_ctxt *ctxt)
895{
896 static int reported;
897
898 if (!reported) {
899 reported = 1;
900 printk(KERN_WARNING "kvm: emulating exchange as write\n");
901 }
902 return emulator_write_emulated(addr, new, bytes, ctxt);
903}
904
905static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
906{
907 return kvm_arch_ops->get_segment_base(vcpu, seg);
908}
909
910int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
911{
912 spin_lock(&vcpu->kvm->lock);
913 vcpu->mmu.inval_page(vcpu, address);
914 spin_unlock(&vcpu->kvm->lock);
915 kvm_arch_ops->invlpg(vcpu, address);
916 return X86EMUL_CONTINUE;
917}
918
919int emulate_clts(struct kvm_vcpu *vcpu)
920{
921 unsigned long cr0 = vcpu->cr0;
922
923 cr0 &= ~CR0_TS_MASK;
924 kvm_arch_ops->set_cr0(vcpu, cr0);
925 return X86EMUL_CONTINUE;
926}
927
928int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, unsigned long *dest)
929{
930 struct kvm_vcpu *vcpu = ctxt->vcpu;
931
932 switch (dr) {
933 case 0 ... 3:
934 *dest = kvm_arch_ops->get_dr(vcpu, dr);
935 return X86EMUL_CONTINUE;
936 default:
937 printk(KERN_DEBUG "%s: unexpected dr %u\n",
938 __FUNCTION__, dr);
939 return X86EMUL_UNHANDLEABLE;
940 }
941}
942
943int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
944{
945 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
946 int exception;
947
948 kvm_arch_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
949 if (exception) {
950 /* FIXME: better handling */
951 return X86EMUL_UNHANDLEABLE;
952 }
953 return X86EMUL_CONTINUE;
954}
955
956static void report_emulation_failure(struct x86_emulate_ctxt *ctxt)
957{
958 static int reported;
959 u8 opcodes[4];
960 unsigned long rip = ctxt->vcpu->rip;
961 unsigned long rip_linear;
962
963 rip_linear = rip + get_segment_base(ctxt->vcpu, VCPU_SREG_CS);
964
965 if (reported)
966 return;
967
968 emulator_read_std(rip_linear, (void *)opcodes, 4, ctxt);
969
970 printk(KERN_ERR "emulation failed but !mmio_needed?"
971 " rip %lx %02x %02x %02x %02x\n",
972 rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
973 reported = 1;
974}
975
976struct x86_emulate_ops emulate_ops = {
977 .read_std = emulator_read_std,
978 .write_std = emulator_write_std,
979 .read_emulated = emulator_read_emulated,
980 .write_emulated = emulator_write_emulated,
981 .cmpxchg_emulated = emulator_cmpxchg_emulated,
982};
983
984int emulate_instruction(struct kvm_vcpu *vcpu,
985 struct kvm_run *run,
986 unsigned long cr2,
987 u16 error_code)
988{
989 struct x86_emulate_ctxt emulate_ctxt;
990 int r;
991 int cs_db, cs_l;
992
993 kvm_arch_ops->cache_regs(vcpu);
994
995 kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
996
997 emulate_ctxt.vcpu = vcpu;
998 emulate_ctxt.eflags = kvm_arch_ops->get_rflags(vcpu);
999 emulate_ctxt.cr2 = cr2;
1000 emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM)
1001 ? X86EMUL_MODE_REAL : cs_l
1002 ? X86EMUL_MODE_PROT64 : cs_db
1003 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
1004
1005 if (emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
1006 emulate_ctxt.cs_base = 0;
1007 emulate_ctxt.ds_base = 0;
1008 emulate_ctxt.es_base = 0;
1009 emulate_ctxt.ss_base = 0;
1010 } else {
1011 emulate_ctxt.cs_base = get_segment_base(vcpu, VCPU_SREG_CS);
1012 emulate_ctxt.ds_base = get_segment_base(vcpu, VCPU_SREG_DS);
1013 emulate_ctxt.es_base = get_segment_base(vcpu, VCPU_SREG_ES);
1014 emulate_ctxt.ss_base = get_segment_base(vcpu, VCPU_SREG_SS);
1015 }
1016
1017 emulate_ctxt.gs_base = get_segment_base(vcpu, VCPU_SREG_GS);
1018 emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS);
1019
1020 vcpu->mmio_is_write = 0;
1021 r = x86_emulate_memop(&emulate_ctxt, &emulate_ops);
1022
1023 if ((r || vcpu->mmio_is_write) && run) {
1024 run->mmio.phys_addr = vcpu->mmio_phys_addr;
1025 memcpy(run->mmio.data, vcpu->mmio_data, 8);
1026 run->mmio.len = vcpu->mmio_size;
1027 run->mmio.is_write = vcpu->mmio_is_write;
1028 }
1029
1030 if (r) {
1031 if (!vcpu->mmio_needed) {
1032 report_emulation_failure(&emulate_ctxt);
1033 return EMULATE_FAIL;
1034 }
1035 return EMULATE_DO_MMIO;
1036 }
1037
1038 kvm_arch_ops->decache_regs(vcpu);
1039 kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags);
1040
1041 if (vcpu->mmio_is_write)
1042 return EMULATE_DO_MMIO;
1043
1044 return EMULATE_DONE;
1045}
1046EXPORT_SYMBOL_GPL(emulate_instruction);
1047
1048static u64 mk_cr_64(u64 curr_cr, u32 new_val)
1049{
1050 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
1051}
1052
1053void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
1054{
1055 struct descriptor_table dt = { limit, base };
1056
1057 kvm_arch_ops->set_gdt(vcpu, &dt);
1058}
1059
1060void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
1061{
1062 struct descriptor_table dt = { limit, base };
1063
1064 kvm_arch_ops->set_idt(vcpu, &dt);
1065}
1066
1067void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
1068 unsigned long *rflags)
1069{
1070 lmsw(vcpu, msw);
1071 *rflags = kvm_arch_ops->get_rflags(vcpu);
1072}
1073
1074unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
1075{
1076 switch (cr) {
1077 case 0:
1078 return vcpu->cr0;
1079 case 2:
1080 return vcpu->cr2;
1081 case 3:
1082 return vcpu->cr3;
1083 case 4:
1084 return vcpu->cr4;
1085 default:
1086 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
1087 return 0;
1088 }
1089}
1090
1091void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
1092 unsigned long *rflags)
1093{
1094 switch (cr) {
1095 case 0:
1096 set_cr0(vcpu, mk_cr_64(vcpu->cr0, val));
1097 *rflags = kvm_arch_ops->get_rflags(vcpu);
1098 break;
1099 case 2:
1100 vcpu->cr2 = val;
1101 break;
1102 case 3:
1103 set_cr3(vcpu, val);
1104 break;
1105 case 4:
1106 set_cr4(vcpu, mk_cr_64(vcpu->cr4, val));
1107 break;
1108 default:
1109 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
1110 }
1111}
1112
1113/*
1114 * Reads an msr value (of 'msr_index') into 'pdata'.
1115 * Returns 0 on success, non-0 otherwise.
1116 * Assumes vcpu_load() was already called.
1117 */
1118static int get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1119{
1120 return kvm_arch_ops->get_msr(vcpu, msr_index, pdata);
1121}
1122
1123#ifdef __x86_64__
1124
1125void set_efer(struct kvm_vcpu *vcpu, u64 efer)
1126{
1127 struct vmx_msr_entry *msr;
1128
1129 if (efer & EFER_RESERVED_BITS) {
1130 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
1131 efer);
1132 inject_gp(vcpu);
1133 return;
1134 }
1135
1136 if (is_paging(vcpu)
1137 && (vcpu->shadow_efer & EFER_LME) != (efer & EFER_LME)) {
1138 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
1139 inject_gp(vcpu);
1140 return;
1141 }
1142
1143 efer &= ~EFER_LMA;
1144 efer |= vcpu->shadow_efer & EFER_LMA;
1145
1146 vcpu->shadow_efer = efer;
1147
1148 msr = find_msr_entry(vcpu, MSR_EFER);
1149
1150 if (!(efer & EFER_LMA))
1151 efer &= ~EFER_LME;
1152 msr->data = efer;
1153}
1154EXPORT_SYMBOL_GPL(set_efer);
1155
1156#endif
1157
1158/*
1159 * Writes msr value into into the appropriate "register".
1160 * Returns 0 on success, non-0 otherwise.
1161 * Assumes vcpu_load() was already called.
1162 */
1163static int set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1164{
1165 return kvm_arch_ops->set_msr(vcpu, msr_index, data);
1166}
1167
1168void kvm_resched(struct kvm_vcpu *vcpu)
1169{
1170 vcpu_put(vcpu);
1171 cond_resched();
1172 /* Cannot fail - no vcpu unplug yet. */
1173 vcpu_load(vcpu->kvm, vcpu_slot(vcpu));
1174}
1175EXPORT_SYMBOL_GPL(kvm_resched);
1176
1177void load_msrs(struct vmx_msr_entry *e, int n)
1178{
1179 int i;
1180
1181 for (i = 0; i < n; ++i)
1182 wrmsrl(e[i].index, e[i].data);
1183}
1184EXPORT_SYMBOL_GPL(load_msrs);
1185
1186void save_msrs(struct vmx_msr_entry *e, int n)
1187{
1188 int i;
1189
1190 for (i = 0; i < n; ++i)
1191 rdmsrl(e[i].index, e[i].data);
1192}
1193EXPORT_SYMBOL_GPL(save_msrs);
1194
1195static int kvm_dev_ioctl_run(struct kvm *kvm, struct kvm_run *kvm_run)
1196{
1197 struct kvm_vcpu *vcpu;
1198 int r;
1199
1200 if (kvm_run->vcpu < 0 || kvm_run->vcpu >= KVM_MAX_VCPUS)
1201 return -EINVAL;
1202
1203 vcpu = vcpu_load(kvm, kvm_run->vcpu);
1204 if (!vcpu)
1205 return -ENOENT;
1206
1207 if (kvm_run->emulated) {
1208 kvm_arch_ops->skip_emulated_instruction(vcpu);
1209 kvm_run->emulated = 0;
1210 }
1211
1212 if (kvm_run->mmio_completed) {
1213 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
1214 vcpu->mmio_read_completed = 1;
1215 }
1216
1217 vcpu->mmio_needed = 0;
1218
1219 r = kvm_arch_ops->run(vcpu, kvm_run);
1220
1221 vcpu_put(vcpu);
1222 return r;
1223}
1224
1225static int kvm_dev_ioctl_get_regs(struct kvm *kvm, struct kvm_regs *regs)
1226{
1227 struct kvm_vcpu *vcpu;
1228
1229 if (regs->vcpu < 0 || regs->vcpu >= KVM_MAX_VCPUS)
1230 return -EINVAL;
1231
1232 vcpu = vcpu_load(kvm, regs->vcpu);
1233 if (!vcpu)
1234 return -ENOENT;
1235
1236 kvm_arch_ops->cache_regs(vcpu);
1237
1238 regs->rax = vcpu->regs[VCPU_REGS_RAX];
1239 regs->rbx = vcpu->regs[VCPU_REGS_RBX];
1240 regs->rcx = vcpu->regs[VCPU_REGS_RCX];
1241 regs->rdx = vcpu->regs[VCPU_REGS_RDX];
1242 regs->rsi = vcpu->regs[VCPU_REGS_RSI];
1243 regs->rdi = vcpu->regs[VCPU_REGS_RDI];
1244 regs->rsp = vcpu->regs[VCPU_REGS_RSP];
1245 regs->rbp = vcpu->regs[VCPU_REGS_RBP];
1246#ifdef __x86_64__
1247 regs->r8 = vcpu->regs[VCPU_REGS_R8];
1248 regs->r9 = vcpu->regs[VCPU_REGS_R9];
1249 regs->r10 = vcpu->regs[VCPU_REGS_R10];
1250 regs->r11 = vcpu->regs[VCPU_REGS_R11];
1251 regs->r12 = vcpu->regs[VCPU_REGS_R12];
1252 regs->r13 = vcpu->regs[VCPU_REGS_R13];
1253 regs->r14 = vcpu->regs[VCPU_REGS_R14];
1254 regs->r15 = vcpu->regs[VCPU_REGS_R15];
1255#endif
1256
1257 regs->rip = vcpu->rip;
1258 regs->rflags = kvm_arch_ops->get_rflags(vcpu);
1259
1260 /*
1261 * Don't leak debug flags in case they were set for guest debugging
1262 */
1263 if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
1264 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1265
1266 vcpu_put(vcpu);
1267
1268 return 0;
1269}
1270
1271static int kvm_dev_ioctl_set_regs(struct kvm *kvm, struct kvm_regs *regs)
1272{
1273 struct kvm_vcpu *vcpu;
1274
1275 if (regs->vcpu < 0 || regs->vcpu >= KVM_MAX_VCPUS)
1276 return -EINVAL;
1277
1278 vcpu = vcpu_load(kvm, regs->vcpu);
1279 if (!vcpu)
1280 return -ENOENT;
1281
1282 vcpu->regs[VCPU_REGS_RAX] = regs->rax;
1283 vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
1284 vcpu->regs[VCPU_REGS_RCX] = regs->rcx;
1285 vcpu->regs[VCPU_REGS_RDX] = regs->rdx;
1286 vcpu->regs[VCPU_REGS_RSI] = regs->rsi;
1287 vcpu->regs[VCPU_REGS_RDI] = regs->rdi;
1288 vcpu->regs[VCPU_REGS_RSP] = regs->rsp;
1289 vcpu->regs[VCPU_REGS_RBP] = regs->rbp;
1290#ifdef __x86_64__
1291 vcpu->regs[VCPU_REGS_R8] = regs->r8;
1292 vcpu->regs[VCPU_REGS_R9] = regs->r9;
1293 vcpu->regs[VCPU_REGS_R10] = regs->r10;
1294 vcpu->regs[VCPU_REGS_R11] = regs->r11;
1295 vcpu->regs[VCPU_REGS_R12] = regs->r12;
1296 vcpu->regs[VCPU_REGS_R13] = regs->r13;
1297 vcpu->regs[VCPU_REGS_R14] = regs->r14;
1298 vcpu->regs[VCPU_REGS_R15] = regs->r15;
1299#endif
1300
1301 vcpu->rip = regs->rip;
1302 kvm_arch_ops->set_rflags(vcpu, regs->rflags);
1303
1304 kvm_arch_ops->decache_regs(vcpu);
1305
1306 vcpu_put(vcpu);
1307
1308 return 0;
1309}
1310
1311static void get_segment(struct kvm_vcpu *vcpu,
1312 struct kvm_segment *var, int seg)
1313{
1314 return kvm_arch_ops->get_segment(vcpu, var, seg);
1315}
1316
1317static int kvm_dev_ioctl_get_sregs(struct kvm *kvm, struct kvm_sregs *sregs)
1318{
1319 struct kvm_vcpu *vcpu;
1320 struct descriptor_table dt;
1321
1322 if (sregs->vcpu < 0 || sregs->vcpu >= KVM_MAX_VCPUS)
1323 return -EINVAL;
1324 vcpu = vcpu_load(kvm, sregs->vcpu);
1325 if (!vcpu)
1326 return -ENOENT;
1327
1328 get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
1329 get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
1330 get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
1331 get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
1332 get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
1333 get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
1334
1335 get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
1336 get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
1337
1338 kvm_arch_ops->get_idt(vcpu, &dt);
1339 sregs->idt.limit = dt.limit;
1340 sregs->idt.base = dt.base;
1341 kvm_arch_ops->get_gdt(vcpu, &dt);
1342 sregs->gdt.limit = dt.limit;
1343 sregs->gdt.base = dt.base;
1344
1345 sregs->cr0 = vcpu->cr0;
1346 sregs->cr2 = vcpu->cr2;
1347 sregs->cr3 = vcpu->cr3;
1348 sregs->cr4 = vcpu->cr4;
1349 sregs->cr8 = vcpu->cr8;
1350 sregs->efer = vcpu->shadow_efer;
1351 sregs->apic_base = vcpu->apic_base;
1352
1353 memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
1354 sizeof sregs->interrupt_bitmap);
1355
1356 vcpu_put(vcpu);
1357
1358 return 0;
1359}
1360
1361static void set_segment(struct kvm_vcpu *vcpu,
1362 struct kvm_segment *var, int seg)
1363{
1364 return kvm_arch_ops->set_segment(vcpu, var, seg);
1365}
1366
1367static int kvm_dev_ioctl_set_sregs(struct kvm *kvm, struct kvm_sregs *sregs)
1368{
1369 struct kvm_vcpu *vcpu;
1370 int mmu_reset_needed = 0;
1371 int i;
1372 struct descriptor_table dt;
1373
1374 if (sregs->vcpu < 0 || sregs->vcpu >= KVM_MAX_VCPUS)
1375 return -EINVAL;
1376 vcpu = vcpu_load(kvm, sregs->vcpu);
1377 if (!vcpu)
1378 return -ENOENT;
1379
1380 set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
1381 set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
1382 set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
1383 set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
1384 set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
1385 set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
1386
1387 set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
1388 set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
1389
1390 dt.limit = sregs->idt.limit;
1391 dt.base = sregs->idt.base;
1392 kvm_arch_ops->set_idt(vcpu, &dt);
1393 dt.limit = sregs->gdt.limit;
1394 dt.base = sregs->gdt.base;
1395 kvm_arch_ops->set_gdt(vcpu, &dt);
1396
1397 vcpu->cr2 = sregs->cr2;
1398 mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
1399 vcpu->cr3 = sregs->cr3;
1400
1401 vcpu->cr8 = sregs->cr8;
1402
1403 mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
1404#ifdef __x86_64__
1405 kvm_arch_ops->set_efer(vcpu, sregs->efer);
1406#endif
1407 vcpu->apic_base = sregs->apic_base;
1408
1409 mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
1410 kvm_arch_ops->set_cr0_no_modeswitch(vcpu, sregs->cr0);
1411
1412 mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
1413 kvm_arch_ops->set_cr4(vcpu, sregs->cr4);
1414
1415 if (mmu_reset_needed)
1416 kvm_mmu_reset_context(vcpu);
1417
1418 memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
1419 sizeof vcpu->irq_pending);
1420 vcpu->irq_summary = 0;
1421 for (i = 0; i < NR_IRQ_WORDS; ++i)
1422 if (vcpu->irq_pending[i])
1423 __set_bit(i, &vcpu->irq_summary);
1424
1425 vcpu_put(vcpu);
1426
1427 return 0;
1428}
1429
1430/*
1431 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
1432 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
1433 */
1434static u32 msrs_to_save[] = {
1435 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
1436 MSR_K6_STAR,
1437#ifdef __x86_64__
1438 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
1439#endif
1440 MSR_IA32_TIME_STAMP_COUNTER,
1441};
1442
1443
1444/*
1445 * Adapt set_msr() to msr_io()'s calling convention
1446 */
1447static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1448{
1449 return set_msr(vcpu, index, *data);
1450}
1451
1452/*
1453 * Read or write a bunch of msrs. All parameters are kernel addresses.
1454 *
1455 * @return number of msrs set successfully.
1456 */
1457static int __msr_io(struct kvm *kvm, struct kvm_msrs *msrs,
1458 struct kvm_msr_entry *entries,
1459 int (*do_msr)(struct kvm_vcpu *vcpu,
1460 unsigned index, u64 *data))
1461{
1462 struct kvm_vcpu *vcpu;
1463 int i;
1464
1465 if (msrs->vcpu < 0 || msrs->vcpu >= KVM_MAX_VCPUS)
1466 return -EINVAL;
1467
1468 vcpu = vcpu_load(kvm, msrs->vcpu);
1469 if (!vcpu)
1470 return -ENOENT;
1471
1472 for (i = 0; i < msrs->nmsrs; ++i)
1473 if (do_msr(vcpu, entries[i].index, &entries[i].data))
1474 break;
1475
1476 vcpu_put(vcpu);
1477
1478 return i;
1479}
1480
1481/*
1482 * Read or write a bunch of msrs. Parameters are user addresses.
1483 *
1484 * @return number of msrs set successfully.
1485 */
1486static int msr_io(struct kvm *kvm, struct kvm_msrs __user *user_msrs,
1487 int (*do_msr)(struct kvm_vcpu *vcpu,
1488 unsigned index, u64 *data),
1489 int writeback)
1490{
1491 struct kvm_msrs msrs;
1492 struct kvm_msr_entry *entries;
1493 int r, n;
1494 unsigned size;
1495
1496 r = -EFAULT;
1497 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
1498 goto out;
1499
1500 r = -E2BIG;
1501 if (msrs.nmsrs >= MAX_IO_MSRS)
1502 goto out;
1503
1504 r = -ENOMEM;
1505 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
1506 entries = vmalloc(size);
1507 if (!entries)
1508 goto out;
1509
1510 r = -EFAULT;
1511 if (copy_from_user(entries, user_msrs->entries, size))
1512 goto out_free;
1513
1514 r = n = __msr_io(kvm, &msrs, entries, do_msr);
1515 if (r < 0)
1516 goto out_free;
1517
1518 r = -EFAULT;
1519 if (writeback && copy_to_user(user_msrs->entries, entries, size))
1520 goto out_free;
1521
1522 r = n;
1523
1524out_free:
1525 vfree(entries);
1526out:
1527 return r;
1528}
1529
1530/*
1531 * Translate a guest virtual address to a guest physical address.
1532 */
1533static int kvm_dev_ioctl_translate(struct kvm *kvm, struct kvm_translation *tr)
1534{
1535 unsigned long vaddr = tr->linear_address;
1536 struct kvm_vcpu *vcpu;
1537 gpa_t gpa;
1538
1539 vcpu = vcpu_load(kvm, tr->vcpu);
1540 if (!vcpu)
1541 return -ENOENT;
1542 spin_lock(&kvm->lock);
1543 gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
1544 tr->physical_address = gpa;
1545 tr->valid = gpa != UNMAPPED_GVA;
1546 tr->writeable = 1;
1547 tr->usermode = 0;
1548 spin_unlock(&kvm->lock);
1549 vcpu_put(vcpu);
1550
1551 return 0;
1552}
1553
1554static int kvm_dev_ioctl_interrupt(struct kvm *kvm, struct kvm_interrupt *irq)
1555{
1556 struct kvm_vcpu *vcpu;
1557
1558 if (irq->vcpu < 0 || irq->vcpu >= KVM_MAX_VCPUS)
1559 return -EINVAL;
1560 if (irq->irq < 0 || irq->irq >= 256)
1561 return -EINVAL;
1562 vcpu = vcpu_load(kvm, irq->vcpu);
1563 if (!vcpu)
1564 return -ENOENT;
1565
1566 set_bit(irq->irq, vcpu->irq_pending);
1567 set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
1568
1569 vcpu_put(vcpu);
1570
1571 return 0;
1572}
1573
1574static int kvm_dev_ioctl_debug_guest(struct kvm *kvm,
1575 struct kvm_debug_guest *dbg)
1576{
1577 struct kvm_vcpu *vcpu;
1578 int r;
1579
1580 if (dbg->vcpu < 0 || dbg->vcpu >= KVM_MAX_VCPUS)
1581 return -EINVAL;
1582 vcpu = vcpu_load(kvm, dbg->vcpu);
1583 if (!vcpu)
1584 return -ENOENT;
1585
1586 r = kvm_arch_ops->set_guest_debug(vcpu, dbg);
1587
1588 vcpu_put(vcpu);
1589
1590 return r;
1591}
1592
1593static long kvm_dev_ioctl(struct file *filp,
1594 unsigned int ioctl, unsigned long arg)
1595{
1596 struct kvm *kvm = filp->private_data;
1597 int r = -EINVAL;
1598
1599 switch (ioctl) {
1600 case KVM_CREATE_VCPU: {
1601 r = kvm_dev_ioctl_create_vcpu(kvm, arg);
1602 if (r)
1603 goto out;
1604 break;
1605 }
1606 case KVM_RUN: {
1607 struct kvm_run kvm_run;
1608
1609 r = -EFAULT;
1610 if (copy_from_user(&kvm_run, (void *)arg, sizeof kvm_run))
1611 goto out;
1612 r = kvm_dev_ioctl_run(kvm, &kvm_run);
1613 if (r < 0)
1614 goto out;
1615 r = -EFAULT;
1616 if (copy_to_user((void *)arg, &kvm_run, sizeof kvm_run))
1617 goto out;
1618 r = 0;
1619 break;
1620 }
1621 case KVM_GET_REGS: {
1622 struct kvm_regs kvm_regs;
1623
1624 r = -EFAULT;
1625 if (copy_from_user(&kvm_regs, (void *)arg, sizeof kvm_regs))
1626 goto out;
1627 r = kvm_dev_ioctl_get_regs(kvm, &kvm_regs);
1628 if (r)
1629 goto out;
1630 r = -EFAULT;
1631 if (copy_to_user((void *)arg, &kvm_regs, sizeof kvm_regs))
1632 goto out;
1633 r = 0;
1634 break;
1635 }
1636 case KVM_SET_REGS: {
1637 struct kvm_regs kvm_regs;
1638
1639 r = -EFAULT;
1640 if (copy_from_user(&kvm_regs, (void *)arg, sizeof kvm_regs))
1641 goto out;
1642 r = kvm_dev_ioctl_set_regs(kvm, &kvm_regs);
1643 if (r)
1644 goto out;
1645 r = 0;
1646 break;
1647 }
1648 case KVM_GET_SREGS: {
1649 struct kvm_sregs kvm_sregs;
1650
1651 r = -EFAULT;
1652 if (copy_from_user(&kvm_sregs, (void *)arg, sizeof kvm_sregs))
1653 goto out;
1654 r = kvm_dev_ioctl_get_sregs(kvm, &kvm_sregs);
1655 if (r)
1656 goto out;
1657 r = -EFAULT;
1658 if (copy_to_user((void *)arg, &kvm_sregs, sizeof kvm_sregs))
1659 goto out;
1660 r = 0;
1661 break;
1662 }
1663 case KVM_SET_SREGS: {
1664 struct kvm_sregs kvm_sregs;
1665
1666 r = -EFAULT;
1667 if (copy_from_user(&kvm_sregs, (void *)arg, sizeof kvm_sregs))
1668 goto out;
1669 r = kvm_dev_ioctl_set_sregs(kvm, &kvm_sregs);
1670 if (r)
1671 goto out;
1672 r = 0;
1673 break;
1674 }
1675 case KVM_TRANSLATE: {
1676 struct kvm_translation tr;
1677
1678 r = -EFAULT;
1679 if (copy_from_user(&tr, (void *)arg, sizeof tr))
1680 goto out;
1681 r = kvm_dev_ioctl_translate(kvm, &tr);
1682 if (r)
1683 goto out;
1684 r = -EFAULT;
1685 if (copy_to_user((void *)arg, &tr, sizeof tr))
1686 goto out;
1687 r = 0;
1688 break;
1689 }
1690 case KVM_INTERRUPT: {
1691 struct kvm_interrupt irq;
1692
1693 r = -EFAULT;
1694 if (copy_from_user(&irq, (void *)arg, sizeof irq))
1695 goto out;
1696 r = kvm_dev_ioctl_interrupt(kvm, &irq);
1697 if (r)
1698 goto out;
1699 r = 0;
1700 break;
1701 }
1702 case KVM_DEBUG_GUEST: {
1703 struct kvm_debug_guest dbg;
1704
1705 r = -EFAULT;
1706 if (copy_from_user(&dbg, (void *)arg, sizeof dbg))
1707 goto out;
1708 r = kvm_dev_ioctl_debug_guest(kvm, &dbg);
1709 if (r)
1710 goto out;
1711 r = 0;
1712 break;
1713 }
1714 case KVM_SET_MEMORY_REGION: {
1715 struct kvm_memory_region kvm_mem;
1716
1717 r = -EFAULT;
1718 if (copy_from_user(&kvm_mem, (void *)arg, sizeof kvm_mem))
1719 goto out;
1720 r = kvm_dev_ioctl_set_memory_region(kvm, &kvm_mem);
1721 if (r)
1722 goto out;
1723 break;
1724 }
1725 case KVM_GET_DIRTY_LOG: {
1726 struct kvm_dirty_log log;
1727
1728 r = -EFAULT;
1729 if (copy_from_user(&log, (void *)arg, sizeof log))
1730 goto out;
1731 r = kvm_dev_ioctl_get_dirty_log(kvm, &log);
1732 if (r)
1733 goto out;
1734 break;
1735 }
1736 case KVM_GET_MSRS:
1737 r = msr_io(kvm, (void __user *)arg, get_msr, 1);
1738 break;
1739 case KVM_SET_MSRS:
1740 r = msr_io(kvm, (void __user *)arg, do_set_msr, 0);
1741 break;
1742 case KVM_GET_MSR_INDEX_LIST: {
1743 struct kvm_msr_list __user *user_msr_list = (void __user *)arg;
1744 struct kvm_msr_list msr_list;
1745 unsigned n;
1746
1747 r = -EFAULT;
1748 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
1749 goto out;
1750 n = msr_list.nmsrs;
1751 msr_list.nmsrs = ARRAY_SIZE(msrs_to_save);
1752 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
1753 goto out;
1754 r = -E2BIG;
1755 if (n < ARRAY_SIZE(msrs_to_save))
1756 goto out;
1757 r = -EFAULT;
1758 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
1759 sizeof msrs_to_save))
1760 goto out;
1761 r = 0;
1762 }
1763 default:
1764 ;
1765 }
1766out:
1767 return r;
1768}
1769
1770static struct page *kvm_dev_nopage(struct vm_area_struct *vma,
1771 unsigned long address,
1772 int *type)
1773{
1774 struct kvm *kvm = vma->vm_file->private_data;
1775 unsigned long pgoff;
1776 struct kvm_memory_slot *slot;
1777 struct page *page;
1778
1779 *type = VM_FAULT_MINOR;
1780 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
1781 slot = gfn_to_memslot(kvm, pgoff);
1782 if (!slot)
1783 return NOPAGE_SIGBUS;
1784 page = gfn_to_page(slot, pgoff);
1785 if (!page)
1786 return NOPAGE_SIGBUS;
1787 get_page(page);
1788 return page;
1789}
1790
1791static struct vm_operations_struct kvm_dev_vm_ops = {
1792 .nopage = kvm_dev_nopage,
1793};
1794
1795static int kvm_dev_mmap(struct file *file, struct vm_area_struct *vma)
1796{
1797 vma->vm_ops = &kvm_dev_vm_ops;
1798 return 0;
1799}
1800
1801static struct file_operations kvm_chardev_ops = {
1802 .open = kvm_dev_open,
1803 .release = kvm_dev_release,
1804 .unlocked_ioctl = kvm_dev_ioctl,
1805 .compat_ioctl = kvm_dev_ioctl,
1806 .mmap = kvm_dev_mmap,
1807};
1808
1809static struct miscdevice kvm_dev = {
1810 MISC_DYNAMIC_MINOR,
1811 "kvm",
1812 &kvm_chardev_ops,
1813};
1814
1815static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
1816 void *v)
1817{
1818 if (val == SYS_RESTART) {
1819 /*
1820 * Some (well, at least mine) BIOSes hang on reboot if
1821 * in vmx root mode.
1822 */
1823 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
1824 on_each_cpu(kvm_arch_ops->hardware_disable, 0, 0, 1);
1825 }
1826 return NOTIFY_OK;
1827}
1828
1829static struct notifier_block kvm_reboot_notifier = {
1830 .notifier_call = kvm_reboot,
1831 .priority = 0,
1832};
1833
1834static __init void kvm_init_debug(void)
1835{
1836 struct kvm_stats_debugfs_item *p;
1837
1838 debugfs_dir = debugfs_create_dir("kvm", 0);
1839 for (p = debugfs_entries; p->name; ++p)
1840 p->dentry = debugfs_create_u32(p->name, 0444, debugfs_dir,
1841 p->data);
1842}
1843
1844static void kvm_exit_debug(void)
1845{
1846 struct kvm_stats_debugfs_item *p;
1847
1848 for (p = debugfs_entries; p->name; ++p)
1849 debugfs_remove(p->dentry);
1850 debugfs_remove(debugfs_dir);
1851}
1852
1853hpa_t bad_page_address;
1854
1855int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
1856{
1857 int r;
1858
1859 kvm_arch_ops = ops;
1860
1861 if (!kvm_arch_ops->cpu_has_kvm_support()) {
1862 printk(KERN_ERR "kvm: no hardware support\n");
1863 return -EOPNOTSUPP;
1864 }
1865 if (kvm_arch_ops->disabled_by_bios()) {
1866 printk(KERN_ERR "kvm: disabled by bios\n");
1867 return -EOPNOTSUPP;
1868 }
1869
1870 r = kvm_arch_ops->hardware_setup();
1871 if (r < 0)
1872 return r;
1873
1874 on_each_cpu(kvm_arch_ops->hardware_enable, 0, 0, 1);
1875 register_reboot_notifier(&kvm_reboot_notifier);
1876
1877 kvm_chardev_ops.owner = module;
1878
1879 r = misc_register(&kvm_dev);
1880 if (r) {
1881 printk (KERN_ERR "kvm: misc device register failed\n");
1882 goto out_free;
1883 }
1884
1885 return r;
1886
1887out_free:
1888 unregister_reboot_notifier(&kvm_reboot_notifier);
1889 on_each_cpu(kvm_arch_ops->hardware_disable, 0, 0, 1);
1890 kvm_arch_ops->hardware_unsetup();
1891 return r;
1892}
1893
1894void kvm_exit_arch(void)
1895{
1896 misc_deregister(&kvm_dev);
1897
1898 unregister_reboot_notifier(&kvm_reboot_notifier);
1899 on_each_cpu(kvm_arch_ops->hardware_disable, 0, 0, 1);
1900 kvm_arch_ops->hardware_unsetup();
1901}
1902
1903static __init int kvm_init(void)
1904{
1905 static struct page *bad_page;
1906 int r = 0;
1907
1908 kvm_init_debug();
1909
1910 if ((bad_page = alloc_page(GFP_KERNEL)) == NULL) {
1911 r = -ENOMEM;
1912 goto out;
1913 }
1914
1915 bad_page_address = page_to_pfn(bad_page) << PAGE_SHIFT;
1916 memset(__va(bad_page_address), 0, PAGE_SIZE);
1917
1918 return r;
1919
1920out:
1921 kvm_exit_debug();
1922 return r;
1923}
1924
1925static __exit void kvm_exit(void)
1926{
1927 kvm_exit_debug();
1928 __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT));
1929}
1930
1931module_init(kvm_init)
1932module_exit(kvm_exit)
1933
1934EXPORT_SYMBOL_GPL(kvm_init_arch);
1935EXPORT_SYMBOL_GPL(kvm_exit_arch);