aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/kvm/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/kvm/vmx.c')
-rw-r--r--drivers/kvm/vmx.c652
1 files changed, 412 insertions, 240 deletions
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index c1ac106ace8c..80628f69916d 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -17,28 +17,35 @@
17 17
18#include "kvm.h" 18#include "kvm.h"
19#include "vmx.h" 19#include "vmx.h"
20#include "segment_descriptor.h"
21
20#include <linux/module.h> 22#include <linux/module.h>
21#include <linux/kernel.h> 23#include <linux/kernel.h>
22#include <linux/mm.h> 24#include <linux/mm.h>
23#include <linux/highmem.h> 25#include <linux/highmem.h>
24#include <linux/profile.h> 26#include <linux/profile.h>
25#include <linux/sched.h> 27#include <linux/sched.h>
28
26#include <asm/io.h> 29#include <asm/io.h>
27#include <asm/desc.h> 30#include <asm/desc.h>
28 31
29#include "segment_descriptor.h"
30
31MODULE_AUTHOR("Qumranet"); 32MODULE_AUTHOR("Qumranet");
32MODULE_LICENSE("GPL"); 33MODULE_LICENSE("GPL");
33 34
35static int init_rmode_tss(struct kvm *kvm);
36
34static DEFINE_PER_CPU(struct vmcs *, vmxarea); 37static DEFINE_PER_CPU(struct vmcs *, vmxarea);
35static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 38static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
36 39
40static struct page *vmx_io_bitmap_a;
41static struct page *vmx_io_bitmap_b;
42
37#ifdef CONFIG_X86_64 43#ifdef CONFIG_X86_64
38#define HOST_IS_64 1 44#define HOST_IS_64 1
39#else 45#else
40#define HOST_IS_64 0 46#define HOST_IS_64 0
41#endif 47#endif
48#define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE)
42 49
43static struct vmcs_descriptor { 50static struct vmcs_descriptor {
44 int size; 51 int size;
@@ -82,18 +89,17 @@ static const u32 vmx_msr_index[] = {
82}; 89};
83#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) 90#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
84 91
85#ifdef CONFIG_X86_64 92static inline u64 msr_efer_save_restore_bits(struct vmx_msr_entry msr)
86static unsigned msr_offset_kernel_gs_base; 93{
87#define NR_64BIT_MSRS 4 94 return (u64)msr.data & EFER_SAVE_RESTORE_BITS;
88/* 95}
89 * avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt 96
90 * mechanism (cpu bug AA24) 97static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu)
91 */ 98{
92#define NR_BAD_MSRS 2 99 int efer_offset = vcpu->msr_offset_efer;
93#else 100 return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) !=
94#define NR_64BIT_MSRS 0 101 msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]);
95#define NR_BAD_MSRS 0 102}
96#endif
97 103
98static inline int is_page_fault(u32 intr_info) 104static inline int is_page_fault(u32 intr_info)
99{ 105{
@@ -115,13 +121,23 @@ static inline int is_external_interrupt(u32 intr_info)
115 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); 121 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
116} 122}
117 123
118static struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr) 124static int __find_msr_index(struct kvm_vcpu *vcpu, u32 msr)
119{ 125{
120 int i; 126 int i;
121 127
122 for (i = 0; i < vcpu->nmsrs; ++i) 128 for (i = 0; i < vcpu->nmsrs; ++i)
123 if (vcpu->guest_msrs[i].index == msr) 129 if (vcpu->guest_msrs[i].index == msr)
124 return &vcpu->guest_msrs[i]; 130 return i;
131 return -1;
132}
133
134static struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr)
135{
136 int i;
137
138 i = __find_msr_index(vcpu, msr);
139 if (i >= 0)
140 return &vcpu->guest_msrs[i];
125 return NULL; 141 return NULL;
126} 142}
127 143
@@ -147,6 +163,7 @@ static void __vcpu_clear(void *arg)
147 vmcs_clear(vcpu->vmcs); 163 vmcs_clear(vcpu->vmcs);
148 if (per_cpu(current_vmcs, cpu) == vcpu->vmcs) 164 if (per_cpu(current_vmcs, cpu) == vcpu->vmcs)
149 per_cpu(current_vmcs, cpu) = NULL; 165 per_cpu(current_vmcs, cpu) = NULL;
166 rdtscll(vcpu->host_tsc);
150} 167}
151 168
152static void vcpu_clear(struct kvm_vcpu *vcpu) 169static void vcpu_clear(struct kvm_vcpu *vcpu)
@@ -234,6 +251,127 @@ static void vmcs_set_bits(unsigned long field, u32 mask)
234 vmcs_writel(field, vmcs_readl(field) | mask); 251 vmcs_writel(field, vmcs_readl(field) | mask);
235} 252}
236 253
254static void update_exception_bitmap(struct kvm_vcpu *vcpu)
255{
256 u32 eb;
257
258 eb = 1u << PF_VECTOR;
259 if (!vcpu->fpu_active)
260 eb |= 1u << NM_VECTOR;
261 if (vcpu->guest_debug.enabled)
262 eb |= 1u << 1;
263 if (vcpu->rmode.active)
264 eb = ~0;
265 vmcs_write32(EXCEPTION_BITMAP, eb);
266}
267
268static void reload_tss(void)
269{
270#ifndef CONFIG_X86_64
271
272 /*
273 * VT restores TR but not its size. Useless.
274 */
275 struct descriptor_table gdt;
276 struct segment_descriptor *descs;
277
278 get_gdt(&gdt);
279 descs = (void *)gdt.base;
280 descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
281 load_TR_desc();
282#endif
283}
284
285static void load_transition_efer(struct kvm_vcpu *vcpu)
286{
287 u64 trans_efer;
288 int efer_offset = vcpu->msr_offset_efer;
289
290 trans_efer = vcpu->host_msrs[efer_offset].data;
291 trans_efer &= ~EFER_SAVE_RESTORE_BITS;
292 trans_efer |= msr_efer_save_restore_bits(
293 vcpu->guest_msrs[efer_offset]);
294 wrmsrl(MSR_EFER, trans_efer);
295 vcpu->stat.efer_reload++;
296}
297
298static void vmx_save_host_state(struct kvm_vcpu *vcpu)
299{
300 struct vmx_host_state *hs = &vcpu->vmx_host_state;
301
302 if (hs->loaded)
303 return;
304
305 hs->loaded = 1;
306 /*
307 * Set host fs and gs selectors. Unfortunately, 22.2.3 does not
308 * allow segment selectors with cpl > 0 or ti == 1.
309 */
310 hs->ldt_sel = read_ldt();
311 hs->fs_gs_ldt_reload_needed = hs->ldt_sel;
312 hs->fs_sel = read_fs();
313 if (!(hs->fs_sel & 7))
314 vmcs_write16(HOST_FS_SELECTOR, hs->fs_sel);
315 else {
316 vmcs_write16(HOST_FS_SELECTOR, 0);
317 hs->fs_gs_ldt_reload_needed = 1;
318 }
319 hs->gs_sel = read_gs();
320 if (!(hs->gs_sel & 7))
321 vmcs_write16(HOST_GS_SELECTOR, hs->gs_sel);
322 else {
323 vmcs_write16(HOST_GS_SELECTOR, 0);
324 hs->fs_gs_ldt_reload_needed = 1;
325 }
326
327#ifdef CONFIG_X86_64
328 vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
329 vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
330#else
331 vmcs_writel(HOST_FS_BASE, segment_base(hs->fs_sel));
332 vmcs_writel(HOST_GS_BASE, segment_base(hs->gs_sel));
333#endif
334
335#ifdef CONFIG_X86_64
336 if (is_long_mode(vcpu)) {
337 save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1);
338 }
339#endif
340 load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
341 if (msr_efer_need_save_restore(vcpu))
342 load_transition_efer(vcpu);
343}
344
345static void vmx_load_host_state(struct kvm_vcpu *vcpu)
346{
347 struct vmx_host_state *hs = &vcpu->vmx_host_state;
348
349 if (!hs->loaded)
350 return;
351
352 hs->loaded = 0;
353 if (hs->fs_gs_ldt_reload_needed) {
354 load_ldt(hs->ldt_sel);
355 load_fs(hs->fs_sel);
356 /*
357 * If we have to reload gs, we must take care to
358 * preserve our gs base.
359 */
360 local_irq_disable();
361 load_gs(hs->gs_sel);
362#ifdef CONFIG_X86_64
363 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
364#endif
365 local_irq_enable();
366
367 reload_tss();
368 }
369 save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
370 load_msrs(vcpu->host_msrs, vcpu->save_nmsrs);
371 if (msr_efer_need_save_restore(vcpu))
372 load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1);
373}
374
237/* 375/*
238 * Switches to specified vcpu, until a matching vcpu_put(), but assumes 376 * Switches to specified vcpu, until a matching vcpu_put(), but assumes
239 * vcpu mutex is already taken. 377 * vcpu mutex is already taken.
@@ -242,6 +380,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
242{ 380{
243 u64 phys_addr = __pa(vcpu->vmcs); 381 u64 phys_addr = __pa(vcpu->vmcs);
244 int cpu; 382 int cpu;
383 u64 tsc_this, delta;
245 384
246 cpu = get_cpu(); 385 cpu = get_cpu();
247 386
@@ -275,15 +414,43 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
275 414
276 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); 415 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
277 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ 416 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
417
418 /*
419 * Make sure the time stamp counter is monotonous.
420 */
421 rdtscll(tsc_this);
422 delta = vcpu->host_tsc - tsc_this;
423 vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta);
278 } 424 }
279} 425}
280 426
281static void vmx_vcpu_put(struct kvm_vcpu *vcpu) 427static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
282{ 428{
429 vmx_load_host_state(vcpu);
283 kvm_put_guest_fpu(vcpu); 430 kvm_put_guest_fpu(vcpu);
284 put_cpu(); 431 put_cpu();
285} 432}
286 433
434static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
435{
436 if (vcpu->fpu_active)
437 return;
438 vcpu->fpu_active = 1;
439 vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK);
440 if (vcpu->cr0 & CR0_TS_MASK)
441 vmcs_set_bits(GUEST_CR0, CR0_TS_MASK);
442 update_exception_bitmap(vcpu);
443}
444
445static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
446{
447 if (!vcpu->fpu_active)
448 return;
449 vcpu->fpu_active = 0;
450 vmcs_set_bits(GUEST_CR0, CR0_TS_MASK);
451 update_exception_bitmap(vcpu);
452}
453
287static void vmx_vcpu_decache(struct kvm_vcpu *vcpu) 454static void vmx_vcpu_decache(struct kvm_vcpu *vcpu)
288{ 455{
289 vcpu_clear(vcpu); 456 vcpu_clear(vcpu);
@@ -332,41 +499,61 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
332} 499}
333 500
334/* 501/*
502 * Swap MSR entry in host/guest MSR entry array.
503 */
504void move_msr_up(struct kvm_vcpu *vcpu, int from, int to)
505{
506 struct vmx_msr_entry tmp;
507 tmp = vcpu->guest_msrs[to];
508 vcpu->guest_msrs[to] = vcpu->guest_msrs[from];
509 vcpu->guest_msrs[from] = tmp;
510 tmp = vcpu->host_msrs[to];
511 vcpu->host_msrs[to] = vcpu->host_msrs[from];
512 vcpu->host_msrs[from] = tmp;
513}
514
515/*
335 * Set up the vmcs to automatically save and restore system 516 * Set up the vmcs to automatically save and restore system
336 * msrs. Don't touch the 64-bit msrs if the guest is in legacy 517 * msrs. Don't touch the 64-bit msrs if the guest is in legacy
337 * mode, as fiddling with msrs is very expensive. 518 * mode, as fiddling with msrs is very expensive.
338 */ 519 */
339static void setup_msrs(struct kvm_vcpu *vcpu) 520static void setup_msrs(struct kvm_vcpu *vcpu)
340{ 521{
341 int nr_skip, nr_good_msrs; 522 int save_nmsrs;
342
343 if (is_long_mode(vcpu))
344 nr_skip = NR_BAD_MSRS;
345 else
346 nr_skip = NR_64BIT_MSRS;
347 nr_good_msrs = vcpu->nmsrs - nr_skip;
348 523
349 /* 524 save_nmsrs = 0;
350 * MSR_K6_STAR is only needed on long mode guests, and only
351 * if efer.sce is enabled.
352 */
353 if (find_msr_entry(vcpu, MSR_K6_STAR)) {
354 --nr_good_msrs;
355#ifdef CONFIG_X86_64 525#ifdef CONFIG_X86_64
356 if (is_long_mode(vcpu) && (vcpu->shadow_efer & EFER_SCE)) 526 if (is_long_mode(vcpu)) {
357 ++nr_good_msrs; 527 int index;
358#endif 528
529 index = __find_msr_index(vcpu, MSR_SYSCALL_MASK);
530 if (index >= 0)
531 move_msr_up(vcpu, index, save_nmsrs++);
532 index = __find_msr_index(vcpu, MSR_LSTAR);
533 if (index >= 0)
534 move_msr_up(vcpu, index, save_nmsrs++);
535 index = __find_msr_index(vcpu, MSR_CSTAR);
536 if (index >= 0)
537 move_msr_up(vcpu, index, save_nmsrs++);
538 index = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE);
539 if (index >= 0)
540 move_msr_up(vcpu, index, save_nmsrs++);
541 /*
542 * MSR_K6_STAR is only needed on long mode guests, and only
543 * if efer.sce is enabled.
544 */
545 index = __find_msr_index(vcpu, MSR_K6_STAR);
546 if ((index >= 0) && (vcpu->shadow_efer & EFER_SCE))
547 move_msr_up(vcpu, index, save_nmsrs++);
359 } 548 }
549#endif
550 vcpu->save_nmsrs = save_nmsrs;
360 551
361 vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, 552#ifdef CONFIG_X86_64
362 virt_to_phys(vcpu->guest_msrs + nr_skip)); 553 vcpu->msr_offset_kernel_gs_base =
363 vmcs_writel(VM_EXIT_MSR_STORE_ADDR, 554 __find_msr_index(vcpu, MSR_KERNEL_GS_BASE);
364 virt_to_phys(vcpu->guest_msrs + nr_skip)); 555#endif
365 vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, 556 vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
366 virt_to_phys(vcpu->host_msrs + nr_skip));
367 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */
368 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
369 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
370} 557}
371 558
372/* 559/*
@@ -394,23 +581,6 @@ static void guest_write_tsc(u64 guest_tsc)
394 vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); 581 vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc);
395} 582}
396 583
397static void reload_tss(void)
398{
399#ifndef CONFIG_X86_64
400
401 /*
402 * VT restores TR but not its size. Useless.
403 */
404 struct descriptor_table gdt;
405 struct segment_descriptor *descs;
406
407 get_gdt(&gdt);
408 descs = (void *)gdt.base;
409 descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
410 load_TR_desc();
411#endif
412}
413
414/* 584/*
415 * Reads an msr value (of 'msr_index') into 'pdata'. 585 * Reads an msr value (of 'msr_index') into 'pdata'.
416 * Returns 0 on success, non-0 otherwise. 586 * Returns 0 on success, non-0 otherwise.
@@ -470,10 +640,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
470static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) 640static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
471{ 641{
472 struct vmx_msr_entry *msr; 642 struct vmx_msr_entry *msr;
643 int ret = 0;
644
473 switch (msr_index) { 645 switch (msr_index) {
474#ifdef CONFIG_X86_64 646#ifdef CONFIG_X86_64
475 case MSR_EFER: 647 case MSR_EFER:
476 return kvm_set_msr_common(vcpu, msr_index, data); 648 ret = kvm_set_msr_common(vcpu, msr_index, data);
649 if (vcpu->vmx_host_state.loaded)
650 load_transition_efer(vcpu);
651 break;
477 case MSR_FS_BASE: 652 case MSR_FS_BASE:
478 vmcs_writel(GUEST_FS_BASE, data); 653 vmcs_writel(GUEST_FS_BASE, data);
479 break; 654 break;
@@ -497,14 +672,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
497 msr = find_msr_entry(vcpu, msr_index); 672 msr = find_msr_entry(vcpu, msr_index);
498 if (msr) { 673 if (msr) {
499 msr->data = data; 674 msr->data = data;
675 if (vcpu->vmx_host_state.loaded)
676 load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
500 break; 677 break;
501 } 678 }
502 return kvm_set_msr_common(vcpu, msr_index, data); 679 ret = kvm_set_msr_common(vcpu, msr_index, data);
503 msr->data = data;
504 break;
505 } 680 }
506 681
507 return 0; 682 return ret;
508} 683}
509 684
510/* 685/*
@@ -530,10 +705,8 @@ static void vcpu_put_rsp_rip(struct kvm_vcpu *vcpu)
530static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) 705static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
531{ 706{
532 unsigned long dr7 = 0x400; 707 unsigned long dr7 = 0x400;
533 u32 exception_bitmap;
534 int old_singlestep; 708 int old_singlestep;
535 709
536 exception_bitmap = vmcs_read32(EXCEPTION_BITMAP);
537 old_singlestep = vcpu->guest_debug.singlestep; 710 old_singlestep = vcpu->guest_debug.singlestep;
538 711
539 vcpu->guest_debug.enabled = dbg->enabled; 712 vcpu->guest_debug.enabled = dbg->enabled;
@@ -549,13 +722,9 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
549 dr7 |= 0 << (i*4+16); /* execution breakpoint */ 722 dr7 |= 0 << (i*4+16); /* execution breakpoint */
550 } 723 }
551 724
552 exception_bitmap |= (1u << 1); /* Trap debug exceptions */
553
554 vcpu->guest_debug.singlestep = dbg->singlestep; 725 vcpu->guest_debug.singlestep = dbg->singlestep;
555 } else { 726 } else
556 exception_bitmap &= ~(1u << 1); /* Ignore debug exceptions */
557 vcpu->guest_debug.singlestep = 0; 727 vcpu->guest_debug.singlestep = 0;
558 }
559 728
560 if (old_singlestep && !vcpu->guest_debug.singlestep) { 729 if (old_singlestep && !vcpu->guest_debug.singlestep) {
561 unsigned long flags; 730 unsigned long flags;
@@ -565,7 +734,7 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
565 vmcs_writel(GUEST_RFLAGS, flags); 734 vmcs_writel(GUEST_RFLAGS, flags);
566 } 735 }
567 736
568 vmcs_write32(EXCEPTION_BITMAP, exception_bitmap); 737 update_exception_bitmap(vcpu);
569 vmcs_writel(GUEST_DR7, dr7); 738 vmcs_writel(GUEST_DR7, dr7);
570 739
571 return 0; 740 return 0;
@@ -679,14 +848,6 @@ static __exit void hardware_unsetup(void)
679 free_kvm_area(); 848 free_kvm_area();
680} 849}
681 850
682static void update_exception_bitmap(struct kvm_vcpu *vcpu)
683{
684 if (vcpu->rmode.active)
685 vmcs_write32(EXCEPTION_BITMAP, ~0);
686 else
687 vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR);
688}
689
690static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save) 851static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save)
691{ 852{
692 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 853 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -793,6 +954,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
793 fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds); 954 fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds);
794 fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs); 955 fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs);
795 fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs); 956 fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs);
957
958 init_rmode_tss(vcpu->kvm);
796} 959}
797 960
798#ifdef CONFIG_X86_64 961#ifdef CONFIG_X86_64
@@ -837,6 +1000,8 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
837 1000
838static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1001static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
839{ 1002{
1003 vmx_fpu_deactivate(vcpu);
1004
840 if (vcpu->rmode.active && (cr0 & CR0_PE_MASK)) 1005 if (vcpu->rmode.active && (cr0 & CR0_PE_MASK))
841 enter_pmode(vcpu); 1006 enter_pmode(vcpu);
842 1007
@@ -852,26 +1017,20 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
852 } 1017 }
853#endif 1018#endif
854 1019
855 if (!(cr0 & CR0_TS_MASK)) {
856 vcpu->fpu_active = 1;
857 vmcs_clear_bits(EXCEPTION_BITMAP, CR0_TS_MASK);
858 }
859
860 vmcs_writel(CR0_READ_SHADOW, cr0); 1020 vmcs_writel(CR0_READ_SHADOW, cr0);
861 vmcs_writel(GUEST_CR0, 1021 vmcs_writel(GUEST_CR0,
862 (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); 1022 (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
863 vcpu->cr0 = cr0; 1023 vcpu->cr0 = cr0;
1024
1025 if (!(cr0 & CR0_TS_MASK) || !(cr0 & CR0_PE_MASK))
1026 vmx_fpu_activate(vcpu);
864} 1027}
865 1028
866static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 1029static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
867{ 1030{
868 vmcs_writel(GUEST_CR3, cr3); 1031 vmcs_writel(GUEST_CR3, cr3);
869 1032 if (vcpu->cr0 & CR0_PE_MASK)
870 if (!(vcpu->cr0 & CR0_TS_MASK)) { 1033 vmx_fpu_deactivate(vcpu);
871 vcpu->fpu_active = 0;
872 vmcs_set_bits(GUEST_CR0, CR0_TS_MASK);
873 vmcs_set_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR);
874 }
875} 1034}
876 1035
877static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1036static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -937,23 +1096,11 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
937 var->unusable = (ar >> 16) & 1; 1096 var->unusable = (ar >> 16) & 1;
938} 1097}
939 1098
940static void vmx_set_segment(struct kvm_vcpu *vcpu, 1099static u32 vmx_segment_access_rights(struct kvm_segment *var)
941 struct kvm_segment *var, int seg)
942{ 1100{
943 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
944 u32 ar; 1101 u32 ar;
945 1102
946 vmcs_writel(sf->base, var->base); 1103 if (var->unusable)
947 vmcs_write32(sf->limit, var->limit);
948 vmcs_write16(sf->selector, var->selector);
949 if (vcpu->rmode.active && var->s) {
950 /*
951 * Hack real-mode segments into vm86 compatibility.
952 */
953 if (var->base == 0xffff0000 && var->selector == 0xf000)
954 vmcs_writel(sf->base, 0xf0000);
955 ar = 0xf3;
956 } else if (var->unusable)
957 ar = 1 << 16; 1104 ar = 1 << 16;
958 else { 1105 else {
959 ar = var->type & 15; 1106 ar = var->type & 15;
@@ -967,6 +1114,35 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
967 } 1114 }
968 if (ar == 0) /* a 0 value means unusable */ 1115 if (ar == 0) /* a 0 value means unusable */
969 ar = AR_UNUSABLE_MASK; 1116 ar = AR_UNUSABLE_MASK;
1117
1118 return ar;
1119}
1120
1121static void vmx_set_segment(struct kvm_vcpu *vcpu,
1122 struct kvm_segment *var, int seg)
1123{
1124 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
1125 u32 ar;
1126
1127 if (vcpu->rmode.active && seg == VCPU_SREG_TR) {
1128 vcpu->rmode.tr.selector = var->selector;
1129 vcpu->rmode.tr.base = var->base;
1130 vcpu->rmode.tr.limit = var->limit;
1131 vcpu->rmode.tr.ar = vmx_segment_access_rights(var);
1132 return;
1133 }
1134 vmcs_writel(sf->base, var->base);
1135 vmcs_write32(sf->limit, var->limit);
1136 vmcs_write16(sf->selector, var->selector);
1137 if (vcpu->rmode.active && var->s) {
1138 /*
1139 * Hack real-mode segments into vm86 compatibility.
1140 */
1141 if (var->base == 0xffff0000 && var->selector == 0xf000)
1142 vmcs_writel(sf->base, 0xf0000);
1143 ar = 0xf3;
1144 } else
1145 ar = vmx_segment_access_rights(var);
970 vmcs_write32(sf->ar_bytes, ar); 1146 vmcs_write32(sf->ar_bytes, ar);
971} 1147}
972 1148
@@ -1018,16 +1194,16 @@ static int init_rmode_tss(struct kvm* kvm)
1018 } 1194 }
1019 1195
1020 page = kmap_atomic(p1, KM_USER0); 1196 page = kmap_atomic(p1, KM_USER0);
1021 memset(page, 0, PAGE_SIZE); 1197 clear_page(page);
1022 *(u16*)(page + 0x66) = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; 1198 *(u16*)(page + 0x66) = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
1023 kunmap_atomic(page, KM_USER0); 1199 kunmap_atomic(page, KM_USER0);
1024 1200
1025 page = kmap_atomic(p2, KM_USER0); 1201 page = kmap_atomic(p2, KM_USER0);
1026 memset(page, 0, PAGE_SIZE); 1202 clear_page(page);
1027 kunmap_atomic(page, KM_USER0); 1203 kunmap_atomic(page, KM_USER0);
1028 1204
1029 page = kmap_atomic(p3, KM_USER0); 1205 page = kmap_atomic(p3, KM_USER0);
1030 memset(page, 0, PAGE_SIZE); 1206 clear_page(page);
1031 *(page + RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1) = ~0; 1207 *(page + RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1) = ~0;
1032 kunmap_atomic(page, KM_USER0); 1208 kunmap_atomic(page, KM_USER0);
1033 1209
@@ -1066,7 +1242,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1066 struct descriptor_table dt; 1242 struct descriptor_table dt;
1067 int i; 1243 int i;
1068 int ret = 0; 1244 int ret = 0;
1069 extern asmlinkage void kvm_vmx_return(void); 1245 unsigned long kvm_vmx_return;
1070 1246
1071 if (!init_rmode_tss(vcpu->kvm)) { 1247 if (!init_rmode_tss(vcpu->kvm)) {
1072 ret = -ENOMEM; 1248 ret = -ENOMEM;
@@ -1076,9 +1252,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1076 memset(vcpu->regs, 0, sizeof(vcpu->regs)); 1252 memset(vcpu->regs, 0, sizeof(vcpu->regs));
1077 vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val(); 1253 vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val();
1078 vcpu->cr8 = 0; 1254 vcpu->cr8 = 0;
1079 vcpu->apic_base = 0xfee00000 | 1255 vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
1080 /*for vcpu 0*/ MSR_IA32_APICBASE_BSP | 1256 if (vcpu == &vcpu->kvm->vcpus[0])
1081 MSR_IA32_APICBASE_ENABLE; 1257 vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
1082 1258
1083 fx_init(vcpu); 1259 fx_init(vcpu);
1084 1260
@@ -1129,8 +1305,8 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1129 vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); 1305 vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
1130 1306
1131 /* I/O */ 1307 /* I/O */
1132 vmcs_write64(IO_BITMAP_A, 0); 1308 vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a));
1133 vmcs_write64(IO_BITMAP_B, 0); 1309 vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b));
1134 1310
1135 guest_write_tsc(0); 1311 guest_write_tsc(0);
1136 1312
@@ -1150,12 +1326,11 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1150 CPU_BASED_HLT_EXITING /* 20.6.2 */ 1326 CPU_BASED_HLT_EXITING /* 20.6.2 */
1151 | CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */ 1327 | CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */
1152 | CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */ 1328 | CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */
1153 | CPU_BASED_UNCOND_IO_EXITING /* 20.6.2 */ 1329 | CPU_BASED_ACTIVATE_IO_BITMAP /* 20.6.2 */
1154 | CPU_BASED_MOV_DR_EXITING 1330 | CPU_BASED_MOV_DR_EXITING
1155 | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */ 1331 | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */
1156 ); 1332 );
1157 1333
1158 vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR);
1159 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); 1334 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
1160 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); 1335 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
1161 vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ 1336 vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
@@ -1185,8 +1360,11 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1185 get_idt(&dt); 1360 get_idt(&dt);
1186 vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ 1361 vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */
1187 1362
1188 1363 asm ("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return));
1189 vmcs_writel(HOST_RIP, (unsigned long)kvm_vmx_return); /* 22.2.5 */ 1364 vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */
1365 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
1366 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
1367 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
1190 1368
1191 rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); 1369 rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk);
1192 vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); 1370 vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs);
@@ -1210,10 +1388,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1210 vcpu->host_msrs[j].reserved = 0; 1388 vcpu->host_msrs[j].reserved = 0;
1211 vcpu->host_msrs[j].data = data; 1389 vcpu->host_msrs[j].data = data;
1212 vcpu->guest_msrs[j] = vcpu->host_msrs[j]; 1390 vcpu->guest_msrs[j] = vcpu->host_msrs[j];
1213#ifdef CONFIG_X86_64
1214 if (index == MSR_KERNEL_GS_BASE)
1215 msr_offset_kernel_gs_base = j;
1216#endif
1217 ++vcpu->nmsrs; 1391 ++vcpu->nmsrs;
1218 } 1392 }
1219 1393
@@ -1241,6 +1415,8 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1241#ifdef CONFIG_X86_64 1415#ifdef CONFIG_X86_64
1242 vmx_set_efer(vcpu, 0); 1416 vmx_set_efer(vcpu, 0);
1243#endif 1417#endif
1418 vmx_fpu_activate(vcpu);
1419 update_exception_bitmap(vcpu);
1244 1420
1245 return 0; 1421 return 0;
1246 1422
@@ -1365,7 +1541,11 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
1365 if (!vcpu->rmode.active) 1541 if (!vcpu->rmode.active)
1366 return 0; 1542 return 0;
1367 1543
1368 if (vec == GP_VECTOR && err_code == 0) 1544 /*
1545 * Instruction with address size override prefix opcode 0x67
1546 * Cause the #SS fault with 0 error code in VM86 mode.
1547 */
1548 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0)
1369 if (emulate_instruction(vcpu, NULL, 0, 0) == EMULATE_DONE) 1549 if (emulate_instruction(vcpu, NULL, 0, 0) == EMULATE_DONE)
1370 return 1; 1550 return 1;
1371 return 0; 1551 return 0;
@@ -1400,10 +1580,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1400 } 1580 }
1401 1581
1402 if (is_no_device(intr_info)) { 1582 if (is_no_device(intr_info)) {
1403 vcpu->fpu_active = 1; 1583 vmx_fpu_activate(vcpu);
1404 vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR);
1405 if (!(vcpu->cr0 & CR0_TS_MASK))
1406 vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK);
1407 return 1; 1584 return 1;
1408 } 1585 }
1409 1586
@@ -1445,8 +1622,13 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1445 1622
1446 if (vcpu->rmode.active && 1623 if (vcpu->rmode.active &&
1447 handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, 1624 handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
1448 error_code)) 1625 error_code)) {
1626 if (vcpu->halt_request) {
1627 vcpu->halt_request = 0;
1628 return kvm_emulate_halt(vcpu);
1629 }
1449 return 1; 1630 return 1;
1631 }
1450 1632
1451 if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == (INTR_TYPE_EXCEPTION | 1)) { 1633 if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == (INTR_TYPE_EXCEPTION | 1)) {
1452 kvm_run->exit_reason = KVM_EXIT_DEBUG; 1634 kvm_run->exit_reason = KVM_EXIT_DEBUG;
@@ -1595,11 +1777,10 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1595 break; 1777 break;
1596 case 2: /* clts */ 1778 case 2: /* clts */
1597 vcpu_load_rsp_rip(vcpu); 1779 vcpu_load_rsp_rip(vcpu);
1598 vcpu->fpu_active = 1; 1780 vmx_fpu_deactivate(vcpu);
1599 vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR);
1600 vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK);
1601 vcpu->cr0 &= ~CR0_TS_MASK; 1781 vcpu->cr0 &= ~CR0_TS_MASK;
1602 vmcs_writel(CR0_READ_SHADOW, vcpu->cr0); 1782 vmcs_writel(CR0_READ_SHADOW, vcpu->cr0);
1783 vmx_fpu_activate(vcpu);
1603 skip_emulated_instruction(vcpu); 1784 skip_emulated_instruction(vcpu);
1604 return 1; 1785 return 1;
1605 case 1: /*mov from cr*/ 1786 case 1: /*mov from cr*/
@@ -1734,12 +1915,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
1734static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1915static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1735{ 1916{
1736 skip_emulated_instruction(vcpu); 1917 skip_emulated_instruction(vcpu);
1737 if (vcpu->irq_summary) 1918 return kvm_emulate_halt(vcpu);
1738 return 1;
1739
1740 kvm_run->exit_reason = KVM_EXIT_HLT;
1741 ++vcpu->stat.halt_exits;
1742 return 0;
1743} 1919}
1744 1920
1745static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1921static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -1770,7 +1946,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
1770}; 1946};
1771 1947
1772static const int kvm_vmx_max_exit_handlers = 1948static const int kvm_vmx_max_exit_handlers =
1773 sizeof(kvm_vmx_exit_handlers) / sizeof(*kvm_vmx_exit_handlers); 1949 ARRAY_SIZE(kvm_vmx_exit_handlers);
1774 1950
1775/* 1951/*
1776 * The guest has exited. See if we can fix it or if we need userspace 1952 * The guest has exited. See if we can fix it or if we need userspace
@@ -1810,61 +1986,44 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
1810 (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); 1986 (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
1811} 1987}
1812 1988
1989static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
1990{
1991}
1992
1813static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1993static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1814{ 1994{
1815 u8 fail; 1995 u8 fail;
1816 u16 fs_sel, gs_sel, ldt_sel;
1817 int fs_gs_ldt_reload_needed;
1818 int r; 1996 int r;
1819 1997
1820again: 1998preempted:
1821 /* 1999 if (vcpu->guest_debug.enabled)
1822 * Set host fs and gs selectors. Unfortunately, 22.2.3 does not 2000 kvm_guest_debug_pre(vcpu);
1823 * allow segment selectors with cpl > 0 or ti == 1.
1824 */
1825 fs_sel = read_fs();
1826 gs_sel = read_gs();
1827 ldt_sel = read_ldt();
1828 fs_gs_ldt_reload_needed = (fs_sel & 7) | (gs_sel & 7) | ldt_sel;
1829 if (!fs_gs_ldt_reload_needed) {
1830 vmcs_write16(HOST_FS_SELECTOR, fs_sel);
1831 vmcs_write16(HOST_GS_SELECTOR, gs_sel);
1832 } else {
1833 vmcs_write16(HOST_FS_SELECTOR, 0);
1834 vmcs_write16(HOST_GS_SELECTOR, 0);
1835 }
1836
1837#ifdef CONFIG_X86_64
1838 vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
1839 vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
1840#else
1841 vmcs_writel(HOST_FS_BASE, segment_base(fs_sel));
1842 vmcs_writel(HOST_GS_BASE, segment_base(gs_sel));
1843#endif
1844 2001
2002again:
1845 if (!vcpu->mmio_read_completed) 2003 if (!vcpu->mmio_read_completed)
1846 do_interrupt_requests(vcpu, kvm_run); 2004 do_interrupt_requests(vcpu, kvm_run);
1847 2005
1848 if (vcpu->guest_debug.enabled) 2006 vmx_save_host_state(vcpu);
1849 kvm_guest_debug_pre(vcpu);
1850
1851 kvm_load_guest_fpu(vcpu); 2007 kvm_load_guest_fpu(vcpu);
1852 2008
2009 r = kvm_mmu_reload(vcpu);
2010 if (unlikely(r))
2011 goto out;
2012
1853 /* 2013 /*
1854 * Loading guest fpu may have cleared host cr0.ts 2014 * Loading guest fpu may have cleared host cr0.ts
1855 */ 2015 */
1856 vmcs_writel(HOST_CR0, read_cr0()); 2016 vmcs_writel(HOST_CR0, read_cr0());
1857 2017
1858#ifdef CONFIG_X86_64 2018 local_irq_disable();
1859 if (is_long_mode(vcpu)) { 2019
1860 save_msrs(vcpu->host_msrs + msr_offset_kernel_gs_base, 1); 2020 vcpu->guest_mode = 1;
1861 load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); 2021 if (vcpu->requests)
1862 } 2022 if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests))
1863#endif 2023 vmx_flush_tlb(vcpu);
1864 2024
1865 asm ( 2025 asm (
1866 /* Store host registers */ 2026 /* Store host registers */
1867 "pushf \n\t"
1868#ifdef CONFIG_X86_64 2027#ifdef CONFIG_X86_64
1869 "push %%rax; push %%rbx; push %%rdx;" 2028 "push %%rax; push %%rbx; push %%rdx;"
1870 "push %%rsi; push %%rdi; push %%rbp;" 2029 "push %%rsi; push %%rdi; push %%rbp;"
@@ -1909,12 +2068,11 @@ again:
1909 "mov %c[rcx](%3), %%ecx \n\t" /* kills %3 (ecx) */ 2068 "mov %c[rcx](%3), %%ecx \n\t" /* kills %3 (ecx) */
1910#endif 2069#endif
1911 /* Enter guest mode */ 2070 /* Enter guest mode */
1912 "jne launched \n\t" 2071 "jne .Llaunched \n\t"
1913 ASM_VMX_VMLAUNCH "\n\t" 2072 ASM_VMX_VMLAUNCH "\n\t"
1914 "jmp kvm_vmx_return \n\t" 2073 "jmp .Lkvm_vmx_return \n\t"
1915 "launched: " ASM_VMX_VMRESUME "\n\t" 2074 ".Llaunched: " ASM_VMX_VMRESUME "\n\t"
1916 ".globl kvm_vmx_return \n\t" 2075 ".Lkvm_vmx_return: "
1917 "kvm_vmx_return: "
1918 /* Save guest registers, load host registers, keep flags */ 2076 /* Save guest registers, load host registers, keep flags */
1919#ifdef CONFIG_X86_64 2077#ifdef CONFIG_X86_64
1920 "xchg %3, (%%rsp) \n\t" 2078 "xchg %3, (%%rsp) \n\t"
@@ -1957,7 +2115,6 @@ again:
1957 "pop %%ecx; popa \n\t" 2115 "pop %%ecx; popa \n\t"
1958#endif 2116#endif
1959 "setbe %0 \n\t" 2117 "setbe %0 \n\t"
1960 "popf \n\t"
1961 : "=q" (fail) 2118 : "=q" (fail)
1962 : "r"(vcpu->launched), "d"((unsigned long)HOST_RSP), 2119 : "r"(vcpu->launched), "d"((unsigned long)HOST_RSP),
1963 "c"(vcpu), 2120 "c"(vcpu),
@@ -1981,84 +2138,61 @@ again:
1981 [cr2]"i"(offsetof(struct kvm_vcpu, cr2)) 2138 [cr2]"i"(offsetof(struct kvm_vcpu, cr2))
1982 : "cc", "memory" ); 2139 : "cc", "memory" );
1983 2140
1984 /* 2141 vcpu->guest_mode = 0;
1985 * Reload segment selectors ASAP. (it's needed for a functional 2142 local_irq_enable();
1986 * kernel: x86 relies on having __KERNEL_PDA in %fs and x86_64
1987 * relies on having 0 in %gs for the CPU PDA to work.)
1988 */
1989 if (fs_gs_ldt_reload_needed) {
1990 load_ldt(ldt_sel);
1991 load_fs(fs_sel);
1992 /*
1993 * If we have to reload gs, we must take care to
1994 * preserve our gs base.
1995 */
1996 local_irq_disable();
1997 load_gs(gs_sel);
1998#ifdef CONFIG_X86_64
1999 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
2000#endif
2001 local_irq_enable();
2002 2143
2003 reload_tss();
2004 }
2005 ++vcpu->stat.exits; 2144 ++vcpu->stat.exits;
2006 2145
2007#ifdef CONFIG_X86_64
2008 if (is_long_mode(vcpu)) {
2009 save_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
2010 load_msrs(vcpu->host_msrs, NR_BAD_MSRS);
2011 }
2012#endif
2013
2014 vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; 2146 vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
2015 2147
2016 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 2148 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
2017 2149
2018 if (fail) { 2150 if (unlikely(fail)) {
2019 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 2151 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
2020 kvm_run->fail_entry.hardware_entry_failure_reason 2152 kvm_run->fail_entry.hardware_entry_failure_reason
2021 = vmcs_read32(VM_INSTRUCTION_ERROR); 2153 = vmcs_read32(VM_INSTRUCTION_ERROR);
2022 r = 0; 2154 r = 0;
2023 } else { 2155 goto out;
2024 /* 2156 }
2025 * Profile KVM exit RIPs: 2157 /*
2026 */ 2158 * Profile KVM exit RIPs:
2027 if (unlikely(prof_on == KVM_PROFILING)) 2159 */
2028 profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); 2160 if (unlikely(prof_on == KVM_PROFILING))
2029 2161 profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP));
2030 vcpu->launched = 1; 2162
2031 r = kvm_handle_exit(kvm_run, vcpu); 2163 vcpu->launched = 1;
2032 if (r > 0) { 2164 r = kvm_handle_exit(kvm_run, vcpu);
2033 /* Give scheduler a change to reschedule. */ 2165 if (r > 0) {
2034 if (signal_pending(current)) { 2166 /* Give scheduler a change to reschedule. */
2035 ++vcpu->stat.signal_exits; 2167 if (signal_pending(current)) {
2036 post_kvm_run_save(vcpu, kvm_run); 2168 r = -EINTR;
2037 kvm_run->exit_reason = KVM_EXIT_INTR; 2169 kvm_run->exit_reason = KVM_EXIT_INTR;
2038 return -EINTR; 2170 ++vcpu->stat.signal_exits;
2039 } 2171 goto out;
2040 2172 }
2041 if (dm_request_for_irq_injection(vcpu, kvm_run)) { 2173
2042 ++vcpu->stat.request_irq_exits; 2174 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
2043 post_kvm_run_save(vcpu, kvm_run); 2175 r = -EINTR;
2044 kvm_run->exit_reason = KVM_EXIT_INTR; 2176 kvm_run->exit_reason = KVM_EXIT_INTR;
2045 return -EINTR; 2177 ++vcpu->stat.request_irq_exits;
2046 } 2178 goto out;
2047 2179 }
2048 kvm_resched(vcpu); 2180 if (!need_resched()) {
2181 ++vcpu->stat.light_exits;
2049 goto again; 2182 goto again;
2050 } 2183 }
2051 } 2184 }
2052 2185
2186out:
2187 if (r > 0) {
2188 kvm_resched(vcpu);
2189 goto preempted;
2190 }
2191
2053 post_kvm_run_save(vcpu, kvm_run); 2192 post_kvm_run_save(vcpu, kvm_run);
2054 return r; 2193 return r;
2055} 2194}
2056 2195
2057static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
2058{
2059 vmcs_writel(GUEST_CR3, vmcs_readl(GUEST_CR3));
2060}
2061
2062static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, 2196static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,
2063 unsigned long addr, 2197 unsigned long addr,
2064 u32 err_code) 2198 u32 err_code)
@@ -2122,7 +2256,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
2122 vmcs_clear(vmcs); 2256 vmcs_clear(vmcs);
2123 vcpu->vmcs = vmcs; 2257 vcpu->vmcs = vmcs;
2124 vcpu->launched = 0; 2258 vcpu->launched = 0;
2125 vcpu->fpu_active = 1;
2126 2259
2127 return 0; 2260 return 0;
2128 2261
@@ -2188,11 +2321,50 @@ static struct kvm_arch_ops vmx_arch_ops = {
2188 2321
2189static int __init vmx_init(void) 2322static int __init vmx_init(void)
2190{ 2323{
2191 return kvm_init_arch(&vmx_arch_ops, THIS_MODULE); 2324 void *iova;
2325 int r;
2326
2327 vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
2328 if (!vmx_io_bitmap_a)
2329 return -ENOMEM;
2330
2331 vmx_io_bitmap_b = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
2332 if (!vmx_io_bitmap_b) {
2333 r = -ENOMEM;
2334 goto out;
2335 }
2336
2337 /*
2338 * Allow direct access to the PC debug port (it is often used for I/O
2339 * delays, but the vmexits simply slow things down).
2340 */
2341 iova = kmap(vmx_io_bitmap_a);
2342 memset(iova, 0xff, PAGE_SIZE);
2343 clear_bit(0x80, iova);
2344 kunmap(vmx_io_bitmap_a);
2345
2346 iova = kmap(vmx_io_bitmap_b);
2347 memset(iova, 0xff, PAGE_SIZE);
2348 kunmap(vmx_io_bitmap_b);
2349
2350 r = kvm_init_arch(&vmx_arch_ops, THIS_MODULE);
2351 if (r)
2352 goto out1;
2353
2354 return 0;
2355
2356out1:
2357 __free_page(vmx_io_bitmap_b);
2358out:
2359 __free_page(vmx_io_bitmap_a);
2360 return r;
2192} 2361}
2193 2362
2194static void __exit vmx_exit(void) 2363static void __exit vmx_exit(void)
2195{ 2364{
2365 __free_page(vmx_io_bitmap_b);
2366 __free_page(vmx_io_bitmap_a);
2367
2196 kvm_exit_arch(); 2368 kvm_exit_arch();
2197} 2369}
2198 2370