aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/kvm/svm.c
diff options
context:
space:
mode:
authorAvi Kivity <avi@qumranet.com>2006-12-10 05:21:36 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-10 12:57:22 -0500
commit6aa8b732ca01c3d7a54e93f4d701b8aabbe60fb7 (patch)
tree23fcbe6f4918cacdae26d513a2bd13e91d8b4c38 /drivers/kvm/svm.c
parentf5f1a24a2caa299bb7d294aee92d7dd3410d9ed7 (diff)
[PATCH] kvm: userspace interface
web site: http://kvm.sourceforge.net mailing list: kvm-devel@lists.sourceforge.net (http://lists.sourceforge.net/lists/listinfo/kvm-devel) The following patchset adds a driver for Intel's hardware virtualization extensions to the x86 architecture. The driver adds a character device (/dev/kvm) that exposes the virtualization capabilities to userspace. Using this driver, a process can run a virtual machine (a "guest") in a fully virtualized PC containing its own virtual hard disks, network adapters, and display. Using this driver, one can start multiple virtual machines on a host. Each virtual machine is a process on the host; a virtual cpu is a thread in that process. kill(1), nice(1), top(1) work as expected. In effect, the driver adds a third execution mode to the existing two: we now have kernel mode, user mode, and guest mode. Guest mode has its own address space mapping guest physical memory (which is accessible to user mode by mmap()ing /dev/kvm). Guest mode has no access to any I/O devices; any such access is intercepted and directed to user mode for emulation. The driver supports i386 and x86_64 hosts and guests. All combinations are allowed except x86_64 guest on i386 host. For i386 guests and hosts, both pae and non-pae paging modes are supported. SMP hosts and UP guests are supported. At the moment only Intel hardware is supported, but AMD virtualization support is being worked on. Performance currently is non-stellar due to the naive implementation of the mmu virtualization, which throws away most of the shadow page table entries every context switch. We plan to address this in two ways: - cache shadow page tables across tlb flushes - wait until AMD and Intel release processors with nested page tables Currently a virtual desktop is responsive but consumes a lot of CPU. Under Windows I tried playing pinball and watching a few flash movies; with a recent CPU one can hardly feel the virtualization. Linux/X is slower, probably due to X being in a separate process. In addition to the driver, you need a slightly modified qemu to provide I/O device emulation and the BIOS. Caveats (akpm: might no longer be true): - The Windows install currently bluescreens due to a problem with the virtual APIC. We are working on a fix. A temporary workaround is to use an existing image or install through qemu - Windows 64-bit does not work. That's also true for qemu, so it's probably a problem with the device model. [bero@arklinux.org: build fix] [simon.kagstrom@bth.se: build fix, other fixes] [uril@qumranet.com: KVM: Expose interrupt bitmap] [akpm@osdl.org: i386 build fix] [mingo@elte.hu: i386 fixes] [rdreier@cisco.com: add log levels to all printks] [randy.dunlap@oracle.com: Fix sparse NULL and C99 struct init warnings] [anthony@codemonkey.ws: KVM: AMD SVM: 32-bit host support] Signed-off-by: Yaniv Kamay <yaniv@qumranet.com> Signed-off-by: Avi Kivity <avi@qumranet.com> Cc: Simon Kagstrom <simon.kagstrom@bth.se> Cc: Bernhard Rosenkraenzer <bero@arklinux.org> Signed-off-by: Uri Lublin <uril@qumranet.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Roland Dreier <rolandd@cisco.com> Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com> Signed-off-by: Anthony Liguori <anthony@codemonkey.ws> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/kvm/svm.c')
-rw-r--r--drivers/kvm/svm.c1677
1 files changed, 1677 insertions, 0 deletions
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
new file mode 100644
index 000000000000..a33a89c68138
--- /dev/null
+++ b/drivers/kvm/svm.c
@@ -0,0 +1,1677 @@
1/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * AMD SVM support
5 *
6 * Copyright (C) 2006 Qumranet, Inc.
7 *
8 * Authors:
9 * Yaniv Kamay <yaniv@qumranet.com>
10 * Avi Kivity <avi@qumranet.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2. See
13 * the COPYING file in the top-level directory.
14 *
15 */
16
17#include <linux/module.h>
18#include <linux/vmalloc.h>
19#include <linux/highmem.h>
20#include <asm/desc.h>
21
22#include "kvm_svm.h"
23#include "x86_emulate.h"
24
25MODULE_AUTHOR("Qumranet");
26MODULE_LICENSE("GPL");
27
28#define IOPM_ALLOC_ORDER 2
29#define MSRPM_ALLOC_ORDER 1
30
31#define DB_VECTOR 1
32#define UD_VECTOR 6
33#define GP_VECTOR 13
34
35#define DR7_GD_MASK (1 << 13)
36#define DR6_BD_MASK (1 << 13)
37#define CR4_DE_MASK (1UL << 3)
38
39#define SEG_TYPE_LDT 2
40#define SEG_TYPE_BUSY_TSS16 3
41
42#define KVM_EFER_LMA (1 << 10)
43#define KVM_EFER_LME (1 << 8)
44
45unsigned long iopm_base;
46unsigned long msrpm_base;
47
48struct kvm_ldttss_desc {
49 u16 limit0;
50 u16 base0;
51 unsigned base1 : 8, type : 5, dpl : 2, p : 1;
52 unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
53 u32 base3;
54 u32 zero1;
55} __attribute__((packed));
56
57struct svm_cpu_data {
58 int cpu;
59
60 uint64_t asid_generation;
61 uint32_t max_asid;
62 uint32_t next_asid;
63 struct kvm_ldttss_desc *tss_desc;
64
65 struct page *save_area;
66};
67
68static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
69
70struct svm_init_data {
71 int cpu;
72 int r;
73};
74
75static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
76
77#define NUM_MSR_MAPS (sizeof(msrpm_ranges) / sizeof(*msrpm_ranges))
78#define MSRS_RANGE_SIZE 2048
79#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
80
81#define MAX_INST_SIZE 15
82
83static unsigned get_addr_size(struct kvm_vcpu *vcpu)
84{
85 struct vmcb_save_area *sa = &vcpu->svm->vmcb->save;
86 u16 cs_attrib;
87
88 if (!(sa->cr0 & CR0_PE_MASK) || (sa->rflags & X86_EFLAGS_VM))
89 return 2;
90
91 cs_attrib = sa->cs.attrib;
92
93 return (cs_attrib & SVM_SELECTOR_L_MASK) ? 8 :
94 (cs_attrib & SVM_SELECTOR_DB_MASK) ? 4 : 2;
95}
96
97static inline u8 pop_irq(struct kvm_vcpu *vcpu)
98{
99 int word_index = __ffs(vcpu->irq_summary);
100 int bit_index = __ffs(vcpu->irq_pending[word_index]);
101 int irq = word_index * BITS_PER_LONG + bit_index;
102
103 clear_bit(bit_index, &vcpu->irq_pending[word_index]);
104 if (!vcpu->irq_pending[word_index])
105 clear_bit(word_index, &vcpu->irq_summary);
106 return irq;
107}
108
109static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
110{
111 set_bit(irq, vcpu->irq_pending);
112 set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
113}
114
115static inline void clgi(void)
116{
117 asm volatile (SVM_CLGI);
118}
119
120static inline void stgi(void)
121{
122 asm volatile (SVM_STGI);
123}
124
125static inline void invlpga(unsigned long addr, u32 asid)
126{
127 asm volatile (SVM_INVLPGA :: "a"(addr), "c"(asid));
128}
129
130static inline unsigned long kvm_read_cr2(void)
131{
132 unsigned long cr2;
133
134 asm volatile ("mov %%cr2, %0" : "=r" (cr2));
135 return cr2;
136}
137
138static inline void kvm_write_cr2(unsigned long val)
139{
140 asm volatile ("mov %0, %%cr2" :: "r" (val));
141}
142
143static inline unsigned long read_dr6(void)
144{
145 unsigned long dr6;
146
147 asm volatile ("mov %%dr6, %0" : "=r" (dr6));
148 return dr6;
149}
150
151static inline void write_dr6(unsigned long val)
152{
153 asm volatile ("mov %0, %%dr6" :: "r" (val));
154}
155
156static inline unsigned long read_dr7(void)
157{
158 unsigned long dr7;
159
160 asm volatile ("mov %%dr7, %0" : "=r" (dr7));
161 return dr7;
162}
163
164static inline void write_dr7(unsigned long val)
165{
166 asm volatile ("mov %0, %%dr7" :: "r" (val));
167}
168
169static inline int svm_is_long_mode(struct kvm_vcpu *vcpu)
170{
171 return vcpu->svm->vmcb->save.efer & KVM_EFER_LMA;
172}
173
174static inline void force_new_asid(struct kvm_vcpu *vcpu)
175{
176 vcpu->svm->asid_generation--;
177}
178
179static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
180{
181 force_new_asid(vcpu);
182}
183
184static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
185{
186 if (!(efer & KVM_EFER_LMA))
187 efer &= ~KVM_EFER_LME;
188
189 vcpu->svm->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
190 vcpu->shadow_efer = efer;
191}
192
193static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
194{
195 vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID |
196 SVM_EVTINJ_VALID_ERR |
197 SVM_EVTINJ_TYPE_EXEPT |
198 GP_VECTOR;
199 vcpu->svm->vmcb->control.event_inj_err = error_code;
200}
201
202static void inject_ud(struct kvm_vcpu *vcpu)
203{
204 vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID |
205 SVM_EVTINJ_TYPE_EXEPT |
206 UD_VECTOR;
207}
208
209static void inject_db(struct kvm_vcpu *vcpu)
210{
211 vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID |
212 SVM_EVTINJ_TYPE_EXEPT |
213 DB_VECTOR;
214}
215
216static int is_page_fault(uint32_t info)
217{
218 info &= SVM_EVTINJ_VEC_MASK | SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
219 return info == (PF_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT);
220}
221
222static int is_external_interrupt(u32 info)
223{
224 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
225 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
226}
227
228static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
229{
230 if (!vcpu->svm->next_rip) {
231 printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__);
232 return;
233 }
234 if (vcpu->svm->next_rip - vcpu->svm->vmcb->save.rip > 15) {
235 printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
236 __FUNCTION__,
237 vcpu->svm->vmcb->save.rip,
238 vcpu->svm->next_rip);
239 }
240
241 vcpu->rip = vcpu->svm->vmcb->save.rip = vcpu->svm->next_rip;
242 vcpu->svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
243}
244
245static int has_svm(void)
246{
247 uint32_t eax, ebx, ecx, edx;
248
249 if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) {
250 printk(KERN_INFO "has_svm: not amd\n");
251 return 0;
252 }
253
254 cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
255 if (eax < SVM_CPUID_FUNC) {
256 printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n");
257 return 0;
258 }
259
260 cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
261 if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
262 printk(KERN_DEBUG "has_svm: svm not available\n");
263 return 0;
264 }
265 return 1;
266}
267
268static void svm_hardware_disable(void *garbage)
269{
270 struct svm_cpu_data *svm_data
271 = per_cpu(svm_data, raw_smp_processor_id());
272
273 if (svm_data) {
274 uint64_t efer;
275
276 wrmsrl(MSR_VM_HSAVE_PA, 0);
277 rdmsrl(MSR_EFER, efer);
278 wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
279 per_cpu(svm_data, raw_smp_processor_id()) = 0;
280 __free_page(svm_data->save_area);
281 kfree(svm_data);
282 }
283}
284
285static void svm_hardware_enable(void *garbage)
286{
287
288 struct svm_cpu_data *svm_data;
289 uint64_t efer;
290#ifdef __x86_64__
291 struct desc_ptr gdt_descr;
292#else
293 struct Xgt_desc_struct gdt_descr;
294#endif
295 struct desc_struct *gdt;
296 int me = raw_smp_processor_id();
297
298 if (!has_svm()) {
299 printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
300 return;
301 }
302 svm_data = per_cpu(svm_data, me);
303
304 if (!svm_data) {
305 printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
306 me);
307 return;
308 }
309
310 svm_data->asid_generation = 1;
311 svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
312 svm_data->next_asid = svm_data->max_asid + 1;
313
314 asm volatile ( "sgdt %0" : "=m"(gdt_descr) );
315 gdt = (struct desc_struct *)gdt_descr.address;
316 svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
317
318 rdmsrl(MSR_EFER, efer);
319 wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK);
320
321 wrmsrl(MSR_VM_HSAVE_PA,
322 page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
323}
324
325static int svm_cpu_init(int cpu)
326{
327 struct svm_cpu_data *svm_data;
328 int r;
329
330 svm_data = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
331 if (!svm_data)
332 return -ENOMEM;
333 svm_data->cpu = cpu;
334 svm_data->save_area = alloc_page(GFP_KERNEL);
335 r = -ENOMEM;
336 if (!svm_data->save_area)
337 goto err_1;
338
339 per_cpu(svm_data, cpu) = svm_data;
340
341 return 0;
342
343err_1:
344 kfree(svm_data);
345 return r;
346
347}
348
349static int set_msr_interception(u32 *msrpm, unsigned msr,
350 int read, int write)
351{
352 int i;
353
354 for (i = 0; i < NUM_MSR_MAPS; i++) {
355 if (msr >= msrpm_ranges[i] &&
356 msr < msrpm_ranges[i] + MSRS_IN_RANGE) {
357 u32 msr_offset = (i * MSRS_IN_RANGE + msr -
358 msrpm_ranges[i]) * 2;
359
360 u32 *base = msrpm + (msr_offset / 32);
361 u32 msr_shift = msr_offset % 32;
362 u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1);
363 *base = (*base & ~(0x3 << msr_shift)) |
364 (mask << msr_shift);
365 return 1;
366 }
367 }
368 printk(KERN_DEBUG "%s: not found 0x%x\n", __FUNCTION__, msr);
369 return 0;
370}
371
372static __init int svm_hardware_setup(void)
373{
374 int cpu;
375 struct page *iopm_pages;
376 struct page *msrpm_pages;
377 void *msrpm_va;
378 int r;
379
380
381 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
382
383 if (!iopm_pages)
384 return -ENOMEM;
385 memset(page_address(iopm_pages), 0xff,
386 PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
387 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
388
389
390 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
391
392 r = -ENOMEM;
393 if (!msrpm_pages)
394 goto err_1;
395
396 msrpm_va = page_address(msrpm_pages);
397 memset(msrpm_va, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
398 msrpm_base = page_to_pfn(msrpm_pages) << PAGE_SHIFT;
399
400#ifdef __x86_64__
401 set_msr_interception(msrpm_va, MSR_GS_BASE, 1, 1);
402 set_msr_interception(msrpm_va, MSR_FS_BASE, 1, 1);
403 set_msr_interception(msrpm_va, MSR_KERNEL_GS_BASE, 1, 1);
404 set_msr_interception(msrpm_va, MSR_STAR, 1, 1);
405 set_msr_interception(msrpm_va, MSR_LSTAR, 1, 1);
406 set_msr_interception(msrpm_va, MSR_CSTAR, 1, 1);
407 set_msr_interception(msrpm_va, MSR_SYSCALL_MASK, 1, 1);
408#endif
409 set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_CS, 1, 1);
410 set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_ESP, 1, 1);
411 set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_EIP, 1, 1);
412
413 for_each_online_cpu(cpu) {
414 r = svm_cpu_init(cpu);
415 if (r)
416 goto err_2;
417 }
418 return 0;
419
420err_2:
421 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
422 msrpm_base = 0;
423err_1:
424 __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
425 iopm_base = 0;
426 return r;
427}
428
429static __exit void svm_hardware_unsetup(void)
430{
431 __free_pages(pfn_to_page(msrpm_base >> PAGE_SHIFT), MSRPM_ALLOC_ORDER);
432 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
433 iopm_base = msrpm_base = 0;
434}
435
436static void init_seg(struct vmcb_seg *seg)
437{
438 seg->selector = 0;
439 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
440 SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
441 seg->limit = 0xffff;
442 seg->base = 0;
443}
444
445static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
446{
447 seg->selector = 0;
448 seg->attrib = SVM_SELECTOR_P_MASK | type;
449 seg->limit = 0xffff;
450 seg->base = 0;
451}
452
453static int svm_vcpu_setup(struct kvm_vcpu *vcpu)
454{
455 return 0;
456}
457
458static void init_vmcb(struct vmcb *vmcb)
459{
460 struct vmcb_control_area *control = &vmcb->control;
461 struct vmcb_save_area *save = &vmcb->save;
462 u64 tsc;
463
464 control->intercept_cr_read = INTERCEPT_CR0_MASK |
465 INTERCEPT_CR3_MASK |
466 INTERCEPT_CR4_MASK;
467
468 control->intercept_cr_write = INTERCEPT_CR0_MASK |
469 INTERCEPT_CR3_MASK |
470 INTERCEPT_CR4_MASK;
471
472 control->intercept_dr_read = INTERCEPT_DR0_MASK |
473 INTERCEPT_DR1_MASK |
474 INTERCEPT_DR2_MASK |
475 INTERCEPT_DR3_MASK;
476
477 control->intercept_dr_write = INTERCEPT_DR0_MASK |
478 INTERCEPT_DR1_MASK |
479 INTERCEPT_DR2_MASK |
480 INTERCEPT_DR3_MASK |
481 INTERCEPT_DR5_MASK |
482 INTERCEPT_DR7_MASK;
483
484 control->intercept_exceptions = 1 << PF_VECTOR;
485
486
487 control->intercept = (1ULL << INTERCEPT_INTR) |
488 (1ULL << INTERCEPT_NMI) |
489 /*
490 * selective cr0 intercept bug?
491 * 0: 0f 22 d8 mov %eax,%cr3
492 * 3: 0f 20 c0 mov %cr0,%eax
493 * 6: 0d 00 00 00 80 or $0x80000000,%eax
494 * b: 0f 22 c0 mov %eax,%cr0
495 * set cr3 ->interception
496 * get cr0 ->interception
497 * set cr0 -> no interception
498 */
499 /* (1ULL << INTERCEPT_SELECTIVE_CR0) | */
500 (1ULL << INTERCEPT_CPUID) |
501 (1ULL << INTERCEPT_HLT) |
502 (1ULL << INTERCEPT_INVLPG) |
503 (1ULL << INTERCEPT_INVLPGA) |
504 (1ULL << INTERCEPT_IOIO_PROT) |
505 (1ULL << INTERCEPT_MSR_PROT) |
506 (1ULL << INTERCEPT_TASK_SWITCH) |
507 (1ULL << INTERCEPT_VMRUN) |
508 (1ULL << INTERCEPT_VMMCALL) |
509 (1ULL << INTERCEPT_VMLOAD) |
510 (1ULL << INTERCEPT_VMSAVE) |
511 (1ULL << INTERCEPT_STGI) |
512 (1ULL << INTERCEPT_CLGI) |
513 (1ULL << INTERCEPT_SKINIT);
514
515 control->iopm_base_pa = iopm_base;
516 control->msrpm_base_pa = msrpm_base;
517 rdtscll(tsc);
518 control->tsc_offset = -tsc;
519 control->int_ctl = V_INTR_MASKING_MASK;
520
521 init_seg(&save->es);
522 init_seg(&save->ss);
523 init_seg(&save->ds);
524 init_seg(&save->fs);
525 init_seg(&save->gs);
526
527 save->cs.selector = 0xf000;
528 /* Executable/Readable Code Segment */
529 save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
530 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
531 save->cs.limit = 0xffff;
532 save->cs.base = 0xffff0000;
533
534 save->gdtr.limit = 0xffff;
535 save->idtr.limit = 0xffff;
536
537 init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
538 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
539
540 save->efer = MSR_EFER_SVME_MASK;
541
542 save->dr6 = 0xffff0ff0;
543 save->dr7 = 0x400;
544 save->rflags = 2;
545 save->rip = 0x0000fff0;
546
547 /*
548 * cr0 val on cpu init should be 0x60000010, we enable cpu
549 * cache by default. the orderly way is to enable cache in bios.
550 */
551 save->cr0 = 0x00000010 | CR0_PG_MASK;
552 save->cr4 = CR4_PAE_MASK;
553 /* rdx = ?? */
554}
555
556static int svm_create_vcpu(struct kvm_vcpu *vcpu)
557{
558 struct page *page;
559 int r;
560
561 r = -ENOMEM;
562 vcpu->svm = kzalloc(sizeof *vcpu->svm, GFP_KERNEL);
563 if (!vcpu->svm)
564 goto out1;
565 page = alloc_page(GFP_KERNEL);
566 if (!page)
567 goto out2;
568
569 vcpu->svm->vmcb = page_address(page);
570 memset(vcpu->svm->vmcb, 0, PAGE_SIZE);
571 vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
572 vcpu->svm->cr0 = 0x00000010;
573 vcpu->svm->asid_generation = 0;
574 memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs));
575 init_vmcb(vcpu->svm->vmcb);
576
577 return 0;
578
579out2:
580 kfree(vcpu->svm);
581out1:
582 return r;
583}
584
585static void svm_free_vcpu(struct kvm_vcpu *vcpu)
586{
587 if (!vcpu->svm)
588 return;
589 if (vcpu->svm->vmcb)
590 __free_page(pfn_to_page(vcpu->svm->vmcb_pa >> PAGE_SHIFT));
591 kfree(vcpu->svm);
592}
593
594static struct kvm_vcpu *svm_vcpu_load(struct kvm_vcpu *vcpu)
595{
596 get_cpu();
597 return vcpu;
598}
599
600static void svm_vcpu_put(struct kvm_vcpu *vcpu)
601{
602 put_cpu();
603}
604
605static void svm_cache_regs(struct kvm_vcpu *vcpu)
606{
607 vcpu->regs[VCPU_REGS_RAX] = vcpu->svm->vmcb->save.rax;
608 vcpu->regs[VCPU_REGS_RSP] = vcpu->svm->vmcb->save.rsp;
609 vcpu->rip = vcpu->svm->vmcb->save.rip;
610}
611
612static void svm_decache_regs(struct kvm_vcpu *vcpu)
613{
614 vcpu->svm->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
615 vcpu->svm->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
616 vcpu->svm->vmcb->save.rip = vcpu->rip;
617}
618
619static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
620{
621 return vcpu->svm->vmcb->save.rflags;
622}
623
624static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
625{
626 vcpu->svm->vmcb->save.rflags = rflags;
627}
628
629static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
630{
631 struct vmcb_save_area *save = &vcpu->svm->vmcb->save;
632
633 switch (seg) {
634 case VCPU_SREG_CS: return &save->cs;
635 case VCPU_SREG_DS: return &save->ds;
636 case VCPU_SREG_ES: return &save->es;
637 case VCPU_SREG_FS: return &save->fs;
638 case VCPU_SREG_GS: return &save->gs;
639 case VCPU_SREG_SS: return &save->ss;
640 case VCPU_SREG_TR: return &save->tr;
641 case VCPU_SREG_LDTR: return &save->ldtr;
642 }
643 BUG();
644 return 0;
645}
646
647static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
648{
649 struct vmcb_seg *s = svm_seg(vcpu, seg);
650
651 return s->base;
652}
653
654static void svm_get_segment(struct kvm_vcpu *vcpu,
655 struct kvm_segment *var, int seg)
656{
657 struct vmcb_seg *s = svm_seg(vcpu, seg);
658
659 var->base = s->base;
660 var->limit = s->limit;
661 var->selector = s->selector;
662 var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
663 var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
664 var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
665 var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
666 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
667 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
668 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
669 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
670 var->unusable = !var->present;
671}
672
673static void svm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
674{
675 struct vmcb_seg *s = svm_seg(vcpu, VCPU_SREG_CS);
676
677 *db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
678 *l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
679}
680
681static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
682{
683 dt->limit = vcpu->svm->vmcb->save.ldtr.limit;
684 dt->base = vcpu->svm->vmcb->save.ldtr.base;
685}
686
687static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
688{
689 vcpu->svm->vmcb->save.ldtr.limit = dt->limit;
690 vcpu->svm->vmcb->save.ldtr.base = dt->base ;
691}
692
693static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
694{
695 dt->limit = vcpu->svm->vmcb->save.gdtr.limit;
696 dt->base = vcpu->svm->vmcb->save.gdtr.base;
697}
698
699static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
700{
701 vcpu->svm->vmcb->save.gdtr.limit = dt->limit;
702 vcpu->svm->vmcb->save.gdtr.base = dt->base ;
703}
704
705static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
706{
707#ifdef __x86_64__
708 if (vcpu->shadow_efer & KVM_EFER_LME) {
709 if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) {
710 vcpu->shadow_efer |= KVM_EFER_LMA;
711 vcpu->svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME;
712 }
713
714 if (is_paging(vcpu) && !(cr0 & CR0_PG_MASK) ) {
715 vcpu->shadow_efer &= ~KVM_EFER_LMA;
716 vcpu->svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME);
717 }
718 }
719#endif
720 vcpu->svm->cr0 = cr0;
721 vcpu->svm->vmcb->save.cr0 = cr0 | CR0_PG_MASK;
722 vcpu->cr0 = cr0;
723}
724
725static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
726{
727 vcpu->cr4 = cr4;
728 vcpu->svm->vmcb->save.cr4 = cr4 | CR4_PAE_MASK;
729}
730
731static void svm_set_segment(struct kvm_vcpu *vcpu,
732 struct kvm_segment *var, int seg)
733{
734 struct vmcb_seg *s = svm_seg(vcpu, seg);
735
736 s->base = var->base;
737 s->limit = var->limit;
738 s->selector = var->selector;
739 if (var->unusable)
740 s->attrib = 0;
741 else {
742 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
743 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
744 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
745 s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
746 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
747 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
748 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
749 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
750 }
751 if (seg == VCPU_SREG_CS)
752 vcpu->svm->vmcb->save.cpl
753 = (vcpu->svm->vmcb->save.cs.attrib
754 >> SVM_SELECTOR_DPL_SHIFT) & 3;
755
756}
757
758/* FIXME:
759
760 vcpu->svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
761 vcpu->svm->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK);
762
763*/
764
765static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
766{
767 return -EOPNOTSUPP;
768}
769
770static void load_host_msrs(struct kvm_vcpu *vcpu)
771{
772 int i;
773
774 for ( i = 0; i < NR_HOST_SAVE_MSRS; i++)
775 wrmsrl(host_save_msrs[i], vcpu->svm->host_msrs[i]);
776}
777
778static void save_host_msrs(struct kvm_vcpu *vcpu)
779{
780 int i;
781
782 for ( i = 0; i < NR_HOST_SAVE_MSRS; i++)
783 rdmsrl(host_save_msrs[i], vcpu->svm->host_msrs[i]);
784}
785
786static void new_asid(struct kvm_vcpu *vcpu, struct svm_cpu_data *svm_data)
787{
788 if (svm_data->next_asid > svm_data->max_asid) {
789 ++svm_data->asid_generation;
790 svm_data->next_asid = 1;
791 vcpu->svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
792 }
793
794 vcpu->cpu = svm_data->cpu;
795 vcpu->svm->asid_generation = svm_data->asid_generation;
796 vcpu->svm->vmcb->control.asid = svm_data->next_asid++;
797}
798
799static void svm_invlpg(struct kvm_vcpu *vcpu, gva_t address)
800{
801 invlpga(address, vcpu->svm->vmcb->control.asid); // is needed?
802}
803
804static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
805{
806 return vcpu->svm->db_regs[dr];
807}
808
809static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
810 int *exception)
811{
812 *exception = 0;
813
814 if (vcpu->svm->vmcb->save.dr7 & DR7_GD_MASK) {
815 vcpu->svm->vmcb->save.dr7 &= ~DR7_GD_MASK;
816 vcpu->svm->vmcb->save.dr6 |= DR6_BD_MASK;
817 *exception = DB_VECTOR;
818 return;
819 }
820
821 switch (dr) {
822 case 0 ... 3:
823 vcpu->svm->db_regs[dr] = value;
824 return;
825 case 4 ... 5:
826 if (vcpu->cr4 & CR4_DE_MASK) {
827 *exception = UD_VECTOR;
828 return;
829 }
830 case 7: {
831 if (value & ~((1ULL << 32) - 1)) {
832 *exception = GP_VECTOR;
833 return;
834 }
835 vcpu->svm->vmcb->save.dr7 = value;
836 return;
837 }
838 default:
839 printk(KERN_DEBUG "%s: unexpected dr %u\n",
840 __FUNCTION__, dr);
841 *exception = UD_VECTOR;
842 return;
843 }
844}
845
846static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
847{
848 u32 exit_int_info = vcpu->svm->vmcb->control.exit_int_info;
849 u64 fault_address;
850 u32 error_code;
851 enum emulation_result er;
852
853 if (is_external_interrupt(exit_int_info))
854 push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
855
856 spin_lock(&vcpu->kvm->lock);
857
858 fault_address = vcpu->svm->vmcb->control.exit_info_2;
859 error_code = vcpu->svm->vmcb->control.exit_info_1;
860 if (!vcpu->mmu.page_fault(vcpu, fault_address, error_code)) {
861 spin_unlock(&vcpu->kvm->lock);
862 return 1;
863 }
864 er = emulate_instruction(vcpu, kvm_run, fault_address, error_code);
865 spin_unlock(&vcpu->kvm->lock);
866
867 switch (er) {
868 case EMULATE_DONE:
869 return 1;
870 case EMULATE_DO_MMIO:
871 ++kvm_stat.mmio_exits;
872 kvm_run->exit_reason = KVM_EXIT_MMIO;
873 return 0;
874 case EMULATE_FAIL:
875 vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__);
876 break;
877 default:
878 BUG();
879 }
880
881 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
882 return 0;
883}
884
885static int io_get_override(struct kvm_vcpu *vcpu,
886 struct vmcb_seg **seg,
887 int *addr_override)
888{
889 u8 inst[MAX_INST_SIZE];
890 unsigned ins_length;
891 gva_t rip;
892 int i;
893
894 rip = vcpu->svm->vmcb->save.rip;
895 ins_length = vcpu->svm->next_rip - rip;
896 rip += vcpu->svm->vmcb->save.cs.base;
897
898 if (ins_length > MAX_INST_SIZE)
899 printk(KERN_DEBUG
900 "%s: inst length err, cs base 0x%llx rip 0x%llx "
901 "next rip 0x%llx ins_length %u\n",
902 __FUNCTION__,
903 vcpu->svm->vmcb->save.cs.base,
904 vcpu->svm->vmcb->save.rip,
905 vcpu->svm->vmcb->control.exit_info_2,
906 ins_length);
907
908 if (kvm_read_guest(vcpu, rip, ins_length, inst) != ins_length)
909 /* #PF */
910 return 0;
911
912 *addr_override = 0;
913 *seg = 0;
914 for (i = 0; i < ins_length; i++)
915 switch (inst[i]) {
916 case 0xf0:
917 case 0xf2:
918 case 0xf3:
919 case 0x66:
920 continue;
921 case 0x67:
922 *addr_override = 1;
923 continue;
924 case 0x2e:
925 *seg = &vcpu->svm->vmcb->save.cs;
926 continue;
927 case 0x36:
928 *seg = &vcpu->svm->vmcb->save.ss;
929 continue;
930 case 0x3e:
931 *seg = &vcpu->svm->vmcb->save.ds;
932 continue;
933 case 0x26:
934 *seg = &vcpu->svm->vmcb->save.es;
935 continue;
936 case 0x64:
937 *seg = &vcpu->svm->vmcb->save.fs;
938 continue;
939 case 0x65:
940 *seg = &vcpu->svm->vmcb->save.gs;
941 continue;
942 default:
943 return 1;
944 }
945 printk(KERN_DEBUG "%s: unexpected\n", __FUNCTION__);
946 return 0;
947}
948
949static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address)
950{
951 unsigned long addr_mask;
952 unsigned long *reg;
953 struct vmcb_seg *seg;
954 int addr_override;
955 struct vmcb_save_area *save_area = &vcpu->svm->vmcb->save;
956 u16 cs_attrib = save_area->cs.attrib;
957 unsigned addr_size = get_addr_size(vcpu);
958
959 if (!io_get_override(vcpu, &seg, &addr_override))
960 return 0;
961
962 if (addr_override)
963 addr_size = (addr_size == 2) ? 4: (addr_size >> 1);
964
965 if (ins) {
966 reg = &vcpu->regs[VCPU_REGS_RDI];
967 seg = &vcpu->svm->vmcb->save.es;
968 } else {
969 reg = &vcpu->regs[VCPU_REGS_RSI];
970 seg = (seg) ? seg : &vcpu->svm->vmcb->save.ds;
971 }
972
973 addr_mask = ~0ULL >> (64 - (addr_size * 8));
974
975 if ((cs_attrib & SVM_SELECTOR_L_MASK) &&
976 !(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_VM)) {
977 *address = (*reg & addr_mask);
978 return addr_mask;
979 }
980
981 if (!(seg->attrib & SVM_SELECTOR_P_SHIFT)) {
982 svm_inject_gp(vcpu, 0);
983 return 0;
984 }
985
986 *address = (*reg & addr_mask) + seg->base;
987 return addr_mask;
988}
989
990static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
991{
992 u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug?
993 int _in = io_info & SVM_IOIO_TYPE_MASK;
994
995 ++kvm_stat.io_exits;
996
997 vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2;
998
999 kvm_run->exit_reason = KVM_EXIT_IO;
1000 kvm_run->io.port = io_info >> 16;
1001 kvm_run->io.direction = (_in) ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
1002 kvm_run->io.size = ((io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT);
1003 kvm_run->io.string = (io_info & SVM_IOIO_STR_MASK) != 0;
1004 kvm_run->io.rep = (io_info & SVM_IOIO_REP_MASK) != 0;
1005
1006 if (kvm_run->io.string) {
1007 unsigned addr_mask;
1008
1009 addr_mask = io_adress(vcpu, _in, &kvm_run->io.address);
1010 if (!addr_mask) {
1011 printk(KERN_DEBUG "%s: get io address failed\n", __FUNCTION__);
1012 return 1;
1013 }
1014
1015 if (kvm_run->io.rep) {
1016 kvm_run->io.count = vcpu->regs[VCPU_REGS_RCX] & addr_mask;
1017 kvm_run->io.string_down = (vcpu->svm->vmcb->save.rflags
1018 & X86_EFLAGS_DF) != 0;
1019 }
1020 } else {
1021 kvm_run->io.value = vcpu->svm->vmcb->save.rax;
1022 }
1023 return 0;
1024}
1025
1026
1027static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1028{
1029 return 1;
1030}
1031
1032static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1033{
1034 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
1035 skip_emulated_instruction(vcpu);
1036 if (vcpu->irq_summary && (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF))
1037 return 1;
1038
1039 kvm_run->exit_reason = KVM_EXIT_HLT;
1040 return 0;
1041}
1042
1043static int invalid_op_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1044{
1045 inject_ud(vcpu);
1046 return 1;
1047}
1048
1049static int task_switch_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1050{
1051 printk(KERN_DEBUG "%s: task swiche is unsupported\n", __FUNCTION__);
1052 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
1053 return 0;
1054}
1055
1056static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1057{
1058 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2;
1059 kvm_run->exit_reason = KVM_EXIT_CPUID;
1060 return 0;
1061}
1062
1063static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1064{
1065 if (emulate_instruction(vcpu, 0, 0, 0) != EMULATE_DONE)
1066 printk(KERN_ERR "%s: failed\n", __FUNCTION__);
1067 return 1;
1068}
1069
1070static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
1071{
1072 switch (ecx) {
1073 case MSR_IA32_MC0_CTL:
1074 case MSR_IA32_MCG_STATUS:
1075 case MSR_IA32_MCG_CAP:
1076 case MSR_IA32_MC0_MISC:
1077 case MSR_IA32_MC0_MISC+4:
1078 case MSR_IA32_MC0_MISC+8:
1079 case MSR_IA32_MC0_MISC+12:
1080 case MSR_IA32_MC0_MISC+16:
1081 case MSR_IA32_UCODE_REV:
1082 /* MTRR registers */
1083 case 0xfe:
1084 case 0x200 ... 0x2ff:
1085 *data = 0;
1086 break;
1087 case MSR_IA32_TIME_STAMP_COUNTER: {
1088 u64 tsc;
1089
1090 rdtscll(tsc);
1091 *data = vcpu->svm->vmcb->control.tsc_offset + tsc;
1092 break;
1093 }
1094 case MSR_EFER:
1095 *data = vcpu->shadow_efer;
1096 break;
1097 case MSR_IA32_APICBASE:
1098 *data = vcpu->apic_base;
1099 break;
1100#ifdef __x86_64__
1101 case MSR_STAR:
1102 *data = vcpu->svm->vmcb->save.star;
1103 break;
1104 case MSR_LSTAR:
1105 *data = vcpu->svm->vmcb->save.lstar;
1106 break;
1107 case MSR_CSTAR:
1108 *data = vcpu->svm->vmcb->save.cstar;
1109 break;
1110 case MSR_KERNEL_GS_BASE:
1111 *data = vcpu->svm->vmcb->save.kernel_gs_base;
1112 break;
1113 case MSR_SYSCALL_MASK:
1114 *data = vcpu->svm->vmcb->save.sfmask;
1115 break;
1116#endif
1117 case MSR_IA32_SYSENTER_CS:
1118 *data = vcpu->svm->vmcb->save.sysenter_cs;
1119 break;
1120 case MSR_IA32_SYSENTER_EIP:
1121 *data = vcpu->svm->vmcb->save.sysenter_eip;
1122 break;
1123 case MSR_IA32_SYSENTER_ESP:
1124 *data = vcpu->svm->vmcb->save.sysenter_esp;
1125 break;
1126 default:
1127 printk(KERN_ERR "kvm: unhandled rdmsr: 0x%x\n", ecx);
1128 return 1;
1129 }
1130 return 0;
1131}
1132
1133static int rdmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1134{
1135 u32 ecx = vcpu->regs[VCPU_REGS_RCX];
1136 u64 data;
1137
1138 if (svm_get_msr(vcpu, ecx, &data))
1139 svm_inject_gp(vcpu, 0);
1140 else {
1141 vcpu->svm->vmcb->save.rax = data & 0xffffffff;
1142 vcpu->regs[VCPU_REGS_RDX] = data >> 32;
1143 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2;
1144 skip_emulated_instruction(vcpu);
1145 }
1146 return 1;
1147}
1148
1149static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
1150{
1151 switch (ecx) {
1152#ifdef __x86_64__
1153 case MSR_EFER:
1154 set_efer(vcpu, data);
1155 break;
1156#endif
1157 case MSR_IA32_MC0_STATUS:
1158 printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n"
1159 , __FUNCTION__, data);
1160 break;
1161 case MSR_IA32_TIME_STAMP_COUNTER: {
1162 u64 tsc;
1163
1164 rdtscll(tsc);
1165 vcpu->svm->vmcb->control.tsc_offset = data - tsc;
1166 break;
1167 }
1168 case MSR_IA32_UCODE_REV:
1169 case MSR_IA32_UCODE_WRITE:
1170 case 0x200 ... 0x2ff: /* MTRRs */
1171 break;
1172 case MSR_IA32_APICBASE:
1173 vcpu->apic_base = data;
1174 break;
1175#ifdef __x86_64___
1176 case MSR_STAR:
1177 vcpu->svm->vmcb->save.star = data;
1178 break;
1179 case MSR_LSTAR:
1180 vcpu->svm->vmcb->save.lstar = data;
1181 break;
1182 case MSR_CSTAR:
1183 vcpu->svm->vmcb->save.cstar = data;
1184 break;
1185 case MSR_KERNEL_GS_BASE:
1186 vcpu->svm->vmcb->save.kernel_gs_base = data;
1187 break;
1188 case MSR_SYSCALL_MASK:
1189 vcpu->svm->vmcb->save.sfmask = data;
1190 break;
1191#endif
1192 case MSR_IA32_SYSENTER_CS:
1193 vcpu->svm->vmcb->save.sysenter_cs = data;
1194 break;
1195 case MSR_IA32_SYSENTER_EIP:
1196 vcpu->svm->vmcb->save.sysenter_eip = data;
1197 break;
1198 case MSR_IA32_SYSENTER_ESP:
1199 vcpu->svm->vmcb->save.sysenter_esp = data;
1200 break;
1201 default:
1202 printk(KERN_ERR "kvm: unhandled wrmsr: %x\n", ecx);
1203 return 1;
1204 }
1205 return 0;
1206}
1207
1208static int wrmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1209{
1210 u32 ecx = vcpu->regs[VCPU_REGS_RCX];
1211 u64 data = (vcpu->svm->vmcb->save.rax & -1u)
1212 | ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32);
1213 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2;
1214 if (svm_set_msr(vcpu, ecx, data))
1215 svm_inject_gp(vcpu, 0);
1216 else
1217 skip_emulated_instruction(vcpu);
1218 return 1;
1219}
1220
1221static int msr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1222{
1223 if (vcpu->svm->vmcb->control.exit_info_1)
1224 return wrmsr_interception(vcpu, kvm_run);
1225 else
1226 return rdmsr_interception(vcpu, kvm_run);
1227}
1228
1229static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
1230 struct kvm_run *kvm_run) = {
1231 [SVM_EXIT_READ_CR0] = emulate_on_interception,
1232 [SVM_EXIT_READ_CR3] = emulate_on_interception,
1233 [SVM_EXIT_READ_CR4] = emulate_on_interception,
1234 /* for now: */
1235 [SVM_EXIT_WRITE_CR0] = emulate_on_interception,
1236 [SVM_EXIT_WRITE_CR3] = emulate_on_interception,
1237 [SVM_EXIT_WRITE_CR4] = emulate_on_interception,
1238 [SVM_EXIT_READ_DR0] = emulate_on_interception,
1239 [SVM_EXIT_READ_DR1] = emulate_on_interception,
1240 [SVM_EXIT_READ_DR2] = emulate_on_interception,
1241 [SVM_EXIT_READ_DR3] = emulate_on_interception,
1242 [SVM_EXIT_WRITE_DR0] = emulate_on_interception,
1243 [SVM_EXIT_WRITE_DR1] = emulate_on_interception,
1244 [SVM_EXIT_WRITE_DR2] = emulate_on_interception,
1245 [SVM_EXIT_WRITE_DR3] = emulate_on_interception,
1246 [SVM_EXIT_WRITE_DR5] = emulate_on_interception,
1247 [SVM_EXIT_WRITE_DR7] = emulate_on_interception,
1248 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
1249 [SVM_EXIT_INTR] = nop_on_interception,
1250 [SVM_EXIT_NMI] = nop_on_interception,
1251 [SVM_EXIT_SMI] = nop_on_interception,
1252 [SVM_EXIT_INIT] = nop_on_interception,
1253 /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */
1254 [SVM_EXIT_CPUID] = cpuid_interception,
1255 [SVM_EXIT_HLT] = halt_interception,
1256 [SVM_EXIT_INVLPG] = emulate_on_interception,
1257 [SVM_EXIT_INVLPGA] = invalid_op_interception,
1258 [SVM_EXIT_IOIO] = io_interception,
1259 [SVM_EXIT_MSR] = msr_interception,
1260 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
1261 [SVM_EXIT_VMRUN] = invalid_op_interception,
1262 [SVM_EXIT_VMMCALL] = invalid_op_interception,
1263 [SVM_EXIT_VMLOAD] = invalid_op_interception,
1264 [SVM_EXIT_VMSAVE] = invalid_op_interception,
1265 [SVM_EXIT_STGI] = invalid_op_interception,
1266 [SVM_EXIT_CLGI] = invalid_op_interception,
1267 [SVM_EXIT_SKINIT] = invalid_op_interception,
1268};
1269
1270
1271static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1272{
1273 u32 exit_code = vcpu->svm->vmcb->control.exit_code;
1274
1275 kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT;
1276
1277 if (is_external_interrupt(vcpu->svm->vmcb->control.exit_int_info) &&
1278 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR)
1279 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
1280 "exit_code 0x%x\n",
1281 __FUNCTION__, vcpu->svm->vmcb->control.exit_int_info,
1282 exit_code);
1283
1284 if (exit_code >= sizeof(svm_exit_handlers) / sizeof(*svm_exit_handlers)
1285 || svm_exit_handlers[exit_code] == 0) {
1286 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
1287 printk(KERN_ERR "%s: 0x%x @ 0x%llx cr0 0x%lx rflags 0x%llx\n",
1288 __FUNCTION__,
1289 exit_code,
1290 vcpu->svm->vmcb->save.rip,
1291 vcpu->cr0,
1292 vcpu->svm->vmcb->save.rflags);
1293 return 0;
1294 }
1295
1296 return svm_exit_handlers[exit_code](vcpu, kvm_run);
1297}
1298
1299static void reload_tss(struct kvm_vcpu *vcpu)
1300{
1301 int cpu = raw_smp_processor_id();
1302
1303 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
1304 svm_data->tss_desc->type = 9; //available 32/64-bit TSS
1305 load_TR_desc();
1306}
1307
1308static void pre_svm_run(struct kvm_vcpu *vcpu)
1309{
1310 int cpu = raw_smp_processor_id();
1311
1312 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
1313
1314 vcpu->svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
1315 if (vcpu->cpu != cpu ||
1316 vcpu->svm->asid_generation != svm_data->asid_generation)
1317 new_asid(vcpu, svm_data);
1318}
1319
1320
1321static inline void kvm_try_inject_irq(struct kvm_vcpu *vcpu)
1322{
1323 struct vmcb_control_area *control;
1324
1325 if (!vcpu->irq_summary)
1326 return;
1327
1328 control = &vcpu->svm->vmcb->control;
1329
1330 control->int_vector = pop_irq(vcpu);
1331 control->int_ctl &= ~V_INTR_PRIO_MASK;
1332 control->int_ctl |= V_IRQ_MASK |
1333 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
1334}
1335
1336static void kvm_reput_irq(struct kvm_vcpu *vcpu)
1337{
1338 struct vmcb_control_area *control = &vcpu->svm->vmcb->control;
1339
1340 if (control->int_ctl & V_IRQ_MASK) {
1341 control->int_ctl &= ~V_IRQ_MASK;
1342 push_irq(vcpu, control->int_vector);
1343 }
1344}
1345
1346static void save_db_regs(unsigned long *db_regs)
1347{
1348#ifdef __x86_64__
1349 asm ("mov %%dr0, %%rax \n\t"
1350 "mov %%rax, %[dr0] \n\t"
1351 "mov %%dr1, %%rax \n\t"
1352 "mov %%rax, %[dr1] \n\t"
1353 "mov %%dr2, %%rax \n\t"
1354 "mov %%rax, %[dr2] \n\t"
1355 "mov %%dr3, %%rax \n\t"
1356 "mov %%rax, %[dr3] \n\t"
1357 : [dr0] "=m"(db_regs[0]),
1358 [dr1] "=m"(db_regs[1]),
1359 [dr2] "=m"(db_regs[2]),
1360 [dr3] "=m"(db_regs[3])
1361 : : "rax");
1362#else
1363 asm ("mov %%dr0, %%eax \n\t"
1364 "mov %%eax, %[dr0] \n\t"
1365 "mov %%dr1, %%eax \n\t"
1366 "mov %%eax, %[dr1] \n\t"
1367 "mov %%dr2, %%eax \n\t"
1368 "mov %%eax, %[dr2] \n\t"
1369 "mov %%dr3, %%eax \n\t"
1370 "mov %%eax, %[dr3] \n\t"
1371 : [dr0] "=m"(db_regs[0]),
1372 [dr1] "=m"(db_regs[1]),
1373 [dr2] "=m"(db_regs[2]),
1374 [dr3] "=m"(db_regs[3])
1375 : : "eax");
1376#endif
1377}
1378
1379static void load_db_regs(unsigned long *db_regs)
1380{
1381 asm volatile ("mov %[dr0], %%dr0 \n\t"
1382 "mov %[dr1], %%dr1 \n\t"
1383 "mov %[dr2], %%dr2 \n\t"
1384 "mov %[dr3], %%dr3 \n\t"
1385 :
1386 : [dr0] "r"(db_regs[0]),
1387 [dr1] "r"(db_regs[1]),
1388 [dr2] "r"(db_regs[2]),
1389 [dr3] "r"(db_regs[3])
1390#ifdef __x86_64__
1391 : "rax");
1392#else
1393 : "eax");
1394#endif
1395}
1396
1397static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1398{
1399 u16 fs_selector;
1400 u16 gs_selector;
1401 u16 ldt_selector;
1402
1403again:
1404 kvm_try_inject_irq(vcpu);
1405
1406 clgi();
1407
1408 pre_svm_run(vcpu);
1409
1410 save_host_msrs(vcpu);
1411 fs_selector = read_fs();
1412 gs_selector = read_gs();
1413 ldt_selector = read_ldt();
1414 vcpu->svm->host_cr2 = kvm_read_cr2();
1415 vcpu->svm->host_dr6 = read_dr6();
1416 vcpu->svm->host_dr7 = read_dr7();
1417 vcpu->svm->vmcb->save.cr2 = vcpu->cr2;
1418
1419 if (vcpu->svm->vmcb->save.dr7 & 0xff) {
1420 write_dr7(0);
1421 save_db_regs(vcpu->svm->host_db_regs);
1422 load_db_regs(vcpu->svm->db_regs);
1423 }
1424 asm volatile (
1425#ifdef __x86_64__
1426 "push %%rbx; push %%rcx; push %%rdx;"
1427 "push %%rsi; push %%rdi; push %%rbp;"
1428 "push %%r8; push %%r9; push %%r10; push %%r11;"
1429 "push %%r12; push %%r13; push %%r14; push %%r15;"
1430#else
1431 "push %%ebx; push %%ecx; push %%edx;"
1432 "push %%esi; push %%edi; push %%ebp;"
1433#endif
1434
1435#ifdef __x86_64__
1436 "mov %c[rbx](%[vcpu]), %%rbx \n\t"
1437 "mov %c[rcx](%[vcpu]), %%rcx \n\t"
1438 "mov %c[rdx](%[vcpu]), %%rdx \n\t"
1439 "mov %c[rsi](%[vcpu]), %%rsi \n\t"
1440 "mov %c[rdi](%[vcpu]), %%rdi \n\t"
1441 "mov %c[rbp](%[vcpu]), %%rbp \n\t"
1442 "mov %c[r8](%[vcpu]), %%r8 \n\t"
1443 "mov %c[r9](%[vcpu]), %%r9 \n\t"
1444 "mov %c[r10](%[vcpu]), %%r10 \n\t"
1445 "mov %c[r11](%[vcpu]), %%r11 \n\t"
1446 "mov %c[r12](%[vcpu]), %%r12 \n\t"
1447 "mov %c[r13](%[vcpu]), %%r13 \n\t"
1448 "mov %c[r14](%[vcpu]), %%r14 \n\t"
1449 "mov %c[r15](%[vcpu]), %%r15 \n\t"
1450#else
1451 "mov %c[rbx](%[vcpu]), %%ebx \n\t"
1452 "mov %c[rcx](%[vcpu]), %%ecx \n\t"
1453 "mov %c[rdx](%[vcpu]), %%edx \n\t"
1454 "mov %c[rsi](%[vcpu]), %%esi \n\t"
1455 "mov %c[rdi](%[vcpu]), %%edi \n\t"
1456 "mov %c[rbp](%[vcpu]), %%ebp \n\t"
1457#endif
1458
1459#ifdef __x86_64__
1460 /* Enter guest mode */
1461 "push %%rax \n\t"
1462 "mov %c[svm](%[vcpu]), %%rax \n\t"
1463 "mov %c[vmcb](%%rax), %%rax \n\t"
1464 SVM_VMLOAD "\n\t"
1465 SVM_VMRUN "\n\t"
1466 SVM_VMSAVE "\n\t"
1467 "pop %%rax \n\t"
1468#else
1469 /* Enter guest mode */
1470 "push %%eax \n\t"
1471 "mov %c[svm](%[vcpu]), %%eax \n\t"
1472 "mov %c[vmcb](%%eax), %%eax \n\t"
1473 SVM_VMLOAD "\n\t"
1474 SVM_VMRUN "\n\t"
1475 SVM_VMSAVE "\n\t"
1476 "pop %%eax \n\t"
1477#endif
1478
1479 /* Save guest registers, load host registers */
1480#ifdef __x86_64__
1481 "mov %%rbx, %c[rbx](%[vcpu]) \n\t"
1482 "mov %%rcx, %c[rcx](%[vcpu]) \n\t"
1483 "mov %%rdx, %c[rdx](%[vcpu]) \n\t"
1484 "mov %%rsi, %c[rsi](%[vcpu]) \n\t"
1485 "mov %%rdi, %c[rdi](%[vcpu]) \n\t"
1486 "mov %%rbp, %c[rbp](%[vcpu]) \n\t"
1487 "mov %%r8, %c[r8](%[vcpu]) \n\t"
1488 "mov %%r9, %c[r9](%[vcpu]) \n\t"
1489 "mov %%r10, %c[r10](%[vcpu]) \n\t"
1490 "mov %%r11, %c[r11](%[vcpu]) \n\t"
1491 "mov %%r12, %c[r12](%[vcpu]) \n\t"
1492 "mov %%r13, %c[r13](%[vcpu]) \n\t"
1493 "mov %%r14, %c[r14](%[vcpu]) \n\t"
1494 "mov %%r15, %c[r15](%[vcpu]) \n\t"
1495
1496 "pop %%r15; pop %%r14; pop %%r13; pop %%r12;"
1497 "pop %%r11; pop %%r10; pop %%r9; pop %%r8;"
1498 "pop %%rbp; pop %%rdi; pop %%rsi;"
1499 "pop %%rdx; pop %%rcx; pop %%rbx; \n\t"
1500#else
1501 "mov %%ebx, %c[rbx](%[vcpu]) \n\t"
1502 "mov %%ecx, %c[rcx](%[vcpu]) \n\t"
1503 "mov %%edx, %c[rdx](%[vcpu]) \n\t"
1504 "mov %%esi, %c[rsi](%[vcpu]) \n\t"
1505 "mov %%edi, %c[rdi](%[vcpu]) \n\t"
1506 "mov %%ebp, %c[rbp](%[vcpu]) \n\t"
1507
1508 "pop %%ebp; pop %%edi; pop %%esi;"
1509 "pop %%edx; pop %%ecx; pop %%ebx; \n\t"
1510#endif
1511 :
1512 : [vcpu]"a"(vcpu),
1513 [svm]"i"(offsetof(struct kvm_vcpu, svm)),
1514 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
1515 [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
1516 [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])),
1517 [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])),
1518 [rsi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RSI])),
1519 [rdi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDI])),
1520 [rbp]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBP]))
1521#ifdef __x86_64__
1522 ,[r8 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R8 ])),
1523 [r9 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R9 ])),
1524 [r10]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R10])),
1525 [r11]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R11])),
1526 [r12]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R12])),
1527 [r13]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R13])),
1528 [r14]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R14])),
1529 [r15]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R15]))
1530#endif
1531 : "cc", "memory" );
1532
1533 if ((vcpu->svm->vmcb->save.dr7 & 0xff))
1534 load_db_regs(vcpu->svm->host_db_regs);
1535
1536 vcpu->cr2 = vcpu->svm->vmcb->save.cr2;
1537
1538 write_dr6(vcpu->svm->host_dr6);
1539 write_dr7(vcpu->svm->host_dr7);
1540 kvm_write_cr2(vcpu->svm->host_cr2);
1541
1542 load_fs(fs_selector);
1543 load_gs(gs_selector);
1544 load_ldt(ldt_selector);
1545 load_host_msrs(vcpu);
1546
1547 reload_tss(vcpu);
1548
1549 stgi();
1550
1551 kvm_reput_irq(vcpu);
1552
1553 vcpu->svm->next_rip = 0;
1554
1555 if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
1556 kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY;
1557 kvm_run->exit_reason = vcpu->svm->vmcb->control.exit_code;
1558 return 0;
1559 }
1560
1561 if (handle_exit(vcpu, kvm_run)) {
1562 if (signal_pending(current)) {
1563 ++kvm_stat.signal_exits;
1564 return -EINTR;
1565 }
1566 kvm_resched(vcpu);
1567 goto again;
1568 }
1569 return 0;
1570}
1571
1572static void svm_flush_tlb(struct kvm_vcpu *vcpu)
1573{
1574 force_new_asid(vcpu);
1575}
1576
1577static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
1578{
1579 vcpu->svm->vmcb->save.cr3 = root;
1580 force_new_asid(vcpu);
1581}
1582
1583static void svm_inject_page_fault(struct kvm_vcpu *vcpu,
1584 unsigned long addr,
1585 uint32_t err_code)
1586{
1587 uint32_t exit_int_info = vcpu->svm->vmcb->control.exit_int_info;
1588
1589 ++kvm_stat.pf_guest;
1590
1591 if (is_page_fault(exit_int_info)) {
1592
1593 vcpu->svm->vmcb->control.event_inj_err = 0;
1594 vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID |
1595 SVM_EVTINJ_VALID_ERR |
1596 SVM_EVTINJ_TYPE_EXEPT |
1597 DF_VECTOR;
1598 return;
1599 }
1600 vcpu->cr2 = addr;
1601 vcpu->svm->vmcb->save.cr2 = addr;
1602 vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID |
1603 SVM_EVTINJ_VALID_ERR |
1604 SVM_EVTINJ_TYPE_EXEPT |
1605 PF_VECTOR;
1606 vcpu->svm->vmcb->control.event_inj_err = err_code;
1607}
1608
1609
1610static int is_disabled(void)
1611{
1612 return 0;
1613}
1614
1615static struct kvm_arch_ops svm_arch_ops = {
1616 .cpu_has_kvm_support = has_svm,
1617 .disabled_by_bios = is_disabled,
1618 .hardware_setup = svm_hardware_setup,
1619 .hardware_unsetup = svm_hardware_unsetup,
1620 .hardware_enable = svm_hardware_enable,
1621 .hardware_disable = svm_hardware_disable,
1622
1623 .vcpu_create = svm_create_vcpu,
1624 .vcpu_free = svm_free_vcpu,
1625
1626 .vcpu_load = svm_vcpu_load,
1627 .vcpu_put = svm_vcpu_put,
1628
1629 .set_guest_debug = svm_guest_debug,
1630 .get_msr = svm_get_msr,
1631 .set_msr = svm_set_msr,
1632 .get_segment_base = svm_get_segment_base,
1633 .get_segment = svm_get_segment,
1634 .set_segment = svm_set_segment,
1635 .is_long_mode = svm_is_long_mode,
1636 .get_cs_db_l_bits = svm_get_cs_db_l_bits,
1637 .set_cr0 = svm_set_cr0,
1638 .set_cr0_no_modeswitch = svm_set_cr0,
1639 .set_cr3 = svm_set_cr3,
1640 .set_cr4 = svm_set_cr4,
1641 .set_efer = svm_set_efer,
1642 .get_idt = svm_get_idt,
1643 .set_idt = svm_set_idt,
1644 .get_gdt = svm_get_gdt,
1645 .set_gdt = svm_set_gdt,
1646 .get_dr = svm_get_dr,
1647 .set_dr = svm_set_dr,
1648 .cache_regs = svm_cache_regs,
1649 .decache_regs = svm_decache_regs,
1650 .get_rflags = svm_get_rflags,
1651 .set_rflags = svm_set_rflags,
1652
1653 .invlpg = svm_invlpg,
1654 .tlb_flush = svm_flush_tlb,
1655 .inject_page_fault = svm_inject_page_fault,
1656
1657 .inject_gp = svm_inject_gp,
1658
1659 .run = svm_vcpu_run,
1660 .skip_emulated_instruction = skip_emulated_instruction,
1661 .vcpu_setup = svm_vcpu_setup,
1662};
1663
1664static int __init svm_init(void)
1665{
1666 kvm_emulator_want_group7_invlpg();
1667 kvm_init_arch(&svm_arch_ops, THIS_MODULE);
1668 return 0;
1669}
1670
1671static void __exit svm_exit(void)
1672{
1673 kvm_exit_arch();
1674}
1675
1676module_init(svm_init)
1677module_exit(svm_exit)