aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/kvm/x86_emulate.h
diff options
context:
space:
mode:
authorAvi Kivity <avi@qumranet.com>2006-12-10 05:21:36 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-10 12:57:22 -0500
commit6aa8b732ca01c3d7a54e93f4d701b8aabbe60fb7 (patch)
tree23fcbe6f4918cacdae26d513a2bd13e91d8b4c38 /drivers/kvm/x86_emulate.h
parentf5f1a24a2caa299bb7d294aee92d7dd3410d9ed7 (diff)
[PATCH] kvm: userspace interface
web site: http://kvm.sourceforge.net mailing list: kvm-devel@lists.sourceforge.net (http://lists.sourceforge.net/lists/listinfo/kvm-devel) The following patchset adds a driver for Intel's hardware virtualization extensions to the x86 architecture. The driver adds a character device (/dev/kvm) that exposes the virtualization capabilities to userspace. Using this driver, a process can run a virtual machine (a "guest") in a fully virtualized PC containing its own virtual hard disks, network adapters, and display. Using this driver, one can start multiple virtual machines on a host. Each virtual machine is a process on the host; a virtual cpu is a thread in that process. kill(1), nice(1), top(1) work as expected. In effect, the driver adds a third execution mode to the existing two: we now have kernel mode, user mode, and guest mode. Guest mode has its own address space mapping guest physical memory (which is accessible to user mode by mmap()ing /dev/kvm). Guest mode has no access to any I/O devices; any such access is intercepted and directed to user mode for emulation. The driver supports i386 and x86_64 hosts and guests. All combinations are allowed except x86_64 guest on i386 host. For i386 guests and hosts, both pae and non-pae paging modes are supported. SMP hosts and UP guests are supported. At the moment only Intel hardware is supported, but AMD virtualization support is being worked on. Performance currently is non-stellar due to the naive implementation of the mmu virtualization, which throws away most of the shadow page table entries every context switch. We plan to address this in two ways: - cache shadow page tables across tlb flushes - wait until AMD and Intel release processors with nested page tables Currently a virtual desktop is responsive but consumes a lot of CPU. Under Windows I tried playing pinball and watching a few flash movies; with a recent CPU one can hardly feel the virtualization. Linux/X is slower, probably due to X being in a separate process. In addition to the driver, you need a slightly modified qemu to provide I/O device emulation and the BIOS. Caveats (akpm: might no longer be true): - The Windows install currently bluescreens due to a problem with the virtual APIC. We are working on a fix. A temporary workaround is to use an existing image or install through qemu - Windows 64-bit does not work. That's also true for qemu, so it's probably a problem with the device model. [bero@arklinux.org: build fix] [simon.kagstrom@bth.se: build fix, other fixes] [uril@qumranet.com: KVM: Expose interrupt bitmap] [akpm@osdl.org: i386 build fix] [mingo@elte.hu: i386 fixes] [rdreier@cisco.com: add log levels to all printks] [randy.dunlap@oracle.com: Fix sparse NULL and C99 struct init warnings] [anthony@codemonkey.ws: KVM: AMD SVM: 32-bit host support] Signed-off-by: Yaniv Kamay <yaniv@qumranet.com> Signed-off-by: Avi Kivity <avi@qumranet.com> Cc: Simon Kagstrom <simon.kagstrom@bth.se> Cc: Bernhard Rosenkraenzer <bero@arklinux.org> Signed-off-by: Uri Lublin <uril@qumranet.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Roland Dreier <rolandd@cisco.com> Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com> Signed-off-by: Anthony Liguori <anthony@codemonkey.ws> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/kvm/x86_emulate.h')
-rw-r--r--drivers/kvm/x86_emulate.h185
1 files changed, 185 insertions, 0 deletions
diff --git a/drivers/kvm/x86_emulate.h b/drivers/kvm/x86_emulate.h
new file mode 100644
index 000000000000..658b58de30fc
--- /dev/null
+++ b/drivers/kvm/x86_emulate.h
@@ -0,0 +1,185 @@
1/******************************************************************************
2 * x86_emulate.h
3 *
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
5 *
6 * Copyright (c) 2005 Keir Fraser
7 *
8 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
9 */
10
11#ifndef __X86_EMULATE_H__
12#define __X86_EMULATE_H__
13
14struct x86_emulate_ctxt;
15
16/*
17 * x86_emulate_ops:
18 *
19 * These operations represent the instruction emulator's interface to memory.
20 * There are two categories of operation: those that act on ordinary memory
21 * regions (*_std), and those that act on memory regions known to require
22 * special treatment or emulation (*_emulated).
23 *
24 * The emulator assumes that an instruction accesses only one 'emulated memory'
25 * location, that this location is the given linear faulting address (cr2), and
26 * that this is one of the instruction's data operands. Instruction fetches and
27 * stack operations are assumed never to access emulated memory. The emulator
28 * automatically deduces which operand of a string-move operation is accessing
29 * emulated memory, and assumes that the other operand accesses normal memory.
30 *
31 * NOTES:
32 * 1. The emulator isn't very smart about emulated vs. standard memory.
33 * 'Emulated memory' access addresses should be checked for sanity.
34 * 'Normal memory' accesses may fault, and the caller must arrange to
35 * detect and handle reentrancy into the emulator via recursive faults.
36 * Accesses may be unaligned and may cross page boundaries.
37 * 2. If the access fails (cannot emulate, or a standard access faults) then
38 * it is up to the memop to propagate the fault to the guest VM via
39 * some out-of-band mechanism, unknown to the emulator. The memop signals
40 * failure by returning X86EMUL_PROPAGATE_FAULT to the emulator, which will
41 * then immediately bail.
42 * 3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
43 * cmpxchg8b_emulated need support 8-byte accesses.
44 * 4. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
45 */
46/* Access completed successfully: continue emulation as normal. */
47#define X86EMUL_CONTINUE 0
48/* Access is unhandleable: bail from emulation and return error to caller. */
49#define X86EMUL_UNHANDLEABLE 1
50/* Terminate emulation but return success to the caller. */
51#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */
52#define X86EMUL_RETRY_INSTR 2 /* retry the instruction for some reason */
53#define X86EMUL_CMPXCHG_FAILED 2 /* cmpxchg did not see expected value */
54struct x86_emulate_ops {
55 /*
56 * read_std: Read bytes of standard (non-emulated/special) memory.
57 * Used for instruction fetch, stack operations, and others.
58 * @addr: [IN ] Linear address from which to read.
59 * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
60 * @bytes: [IN ] Number of bytes to read from memory.
61 */
62 int (*read_std)(unsigned long addr,
63 unsigned long *val,
64 unsigned int bytes, struct x86_emulate_ctxt * ctxt);
65
66 /*
67 * write_std: Write bytes of standard (non-emulated/special) memory.
68 * Used for stack operations, and others.
69 * @addr: [IN ] Linear address to which to write.
70 * @val: [IN ] Value to write to memory (low-order bytes used as
71 * required).
72 * @bytes: [IN ] Number of bytes to write to memory.
73 */
74 int (*write_std)(unsigned long addr,
75 unsigned long val,
76 unsigned int bytes, struct x86_emulate_ctxt * ctxt);
77
78 /*
79 * read_emulated: Read bytes from emulated/special memory area.
80 * @addr: [IN ] Linear address from which to read.
81 * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
82 * @bytes: [IN ] Number of bytes to read from memory.
83 */
84 int (*read_emulated) (unsigned long addr,
85 unsigned long *val,
86 unsigned int bytes,
87 struct x86_emulate_ctxt * ctxt);
88
89 /*
90 * write_emulated: Read bytes from emulated/special memory area.
91 * @addr: [IN ] Linear address to which to write.
92 * @val: [IN ] Value to write to memory (low-order bytes used as
93 * required).
94 * @bytes: [IN ] Number of bytes to write to memory.
95 */
96 int (*write_emulated) (unsigned long addr,
97 unsigned long val,
98 unsigned int bytes,
99 struct x86_emulate_ctxt * ctxt);
100
101 /*
102 * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
103 * emulated/special memory area.
104 * @addr: [IN ] Linear address to access.
105 * @old: [IN ] Value expected to be current at @addr.
106 * @new: [IN ] Value to write to @addr.
107 * @bytes: [IN ] Number of bytes to access using CMPXCHG.
108 */
109 int (*cmpxchg_emulated) (unsigned long addr,
110 unsigned long old,
111 unsigned long new,
112 unsigned int bytes,
113 struct x86_emulate_ctxt * ctxt);
114
115 /*
116 * cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
117 * emulated/special memory area.
118 * @addr: [IN ] Linear address to access.
119 * @old: [IN ] Value expected to be current at @addr.
120 * @new: [IN ] Value to write to @addr.
121 * NOTES:
122 * 1. This function is only ever called when emulating a real CMPXCHG8B.
123 * 2. This function is *never* called on x86/64 systems.
124 * 2. Not defining this function (i.e., specifying NULL) is equivalent
125 * to defining a function that always returns X86EMUL_UNHANDLEABLE.
126 */
127 int (*cmpxchg8b_emulated) (unsigned long addr,
128 unsigned long old_lo,
129 unsigned long old_hi,
130 unsigned long new_lo,
131 unsigned long new_hi,
132 struct x86_emulate_ctxt * ctxt);
133};
134
135struct cpu_user_regs;
136
137struct x86_emulate_ctxt {
138 /* Register state before/after emulation. */
139 struct kvm_vcpu *vcpu;
140
141 /* Linear faulting address (if emulating a page-faulting instruction). */
142 unsigned long eflags;
143 unsigned long cr2;
144
145 /* Emulated execution mode, represented by an X86EMUL_MODE value. */
146 int mode;
147
148 unsigned long cs_base;
149 unsigned long ds_base;
150 unsigned long es_base;
151 unsigned long ss_base;
152 unsigned long gs_base;
153 unsigned long fs_base;
154};
155
156/* Execution mode, passed to the emulator. */
157#define X86EMUL_MODE_REAL 0 /* Real mode. */
158#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */
159#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */
160#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
161
162/* Host execution mode. */
163#if defined(__i386__)
164#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
165#elif defined(__x86_64__)
166#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
167#endif
168
169/*
170 * x86_emulate_memop: Emulate an instruction that faulted attempting to
171 * read/write a 'special' memory area.
172 * Returns -1 on failure, 0 on success.
173 */
174int x86_emulate_memop(struct x86_emulate_ctxt *ctxt,
175 struct x86_emulate_ops *ops);
176
177/*
178 * Given the 'reg' portion of a ModRM byte, and a register block, return a
179 * pointer into the block that addresses the relevant register.
180 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
181 */
182void *decode_register(u8 modrm_reg, unsigned long *regs,
183 int highbyte_regs);
184
185#endif /* __X86_EMULATE_H__ */