aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c893
1 files changed, 637 insertions, 256 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 10ce6ee4c491..2643b430d83a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -26,10 +26,14 @@
26#include <linux/highmem.h> 26#include <linux/highmem.h>
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/moduleparam.h> 28#include <linux/moduleparam.h>
29#include "kvm_cache_regs.h"
30#include "x86.h"
29 31
30#include <asm/io.h> 32#include <asm/io.h>
31#include <asm/desc.h> 33#include <asm/desc.h>
32 34
35#define __ex(x) __kvm_handle_fault_on_reboot(x)
36
33MODULE_AUTHOR("Qumranet"); 37MODULE_AUTHOR("Qumranet");
34MODULE_LICENSE("GPL"); 38MODULE_LICENSE("GPL");
35 39
@@ -45,6 +49,9 @@ module_param(flexpriority_enabled, bool, 0);
45static int enable_ept = 1; 49static int enable_ept = 1;
46module_param(enable_ept, bool, 0); 50module_param(enable_ept, bool, 0);
47 51
52static int emulate_invalid_guest_state = 0;
53module_param(emulate_invalid_guest_state, bool, 0);
54
48struct vmcs { 55struct vmcs {
49 u32 revision_id; 56 u32 revision_id;
50 u32 abort; 57 u32 abort;
@@ -53,6 +60,8 @@ struct vmcs {
53 60
54struct vcpu_vmx { 61struct vcpu_vmx {
55 struct kvm_vcpu vcpu; 62 struct kvm_vcpu vcpu;
63 struct list_head local_vcpus_link;
64 unsigned long host_rsp;
56 int launched; 65 int launched;
57 u8 fail; 66 u8 fail;
58 u32 idt_vectoring_info; 67 u32 idt_vectoring_info;
@@ -80,6 +89,7 @@ struct vcpu_vmx {
80 } irq; 89 } irq;
81 } rmode; 90 } rmode;
82 int vpid; 91 int vpid;
92 bool emulation_required;
83}; 93};
84 94
85static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 95static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -88,9 +98,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
88} 98}
89 99
90static int init_rmode(struct kvm *kvm); 100static int init_rmode(struct kvm *kvm);
101static u64 construct_eptp(unsigned long root_hpa);
91 102
92static DEFINE_PER_CPU(struct vmcs *, vmxarea); 103static DEFINE_PER_CPU(struct vmcs *, vmxarea);
93static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 104static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
105static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu);
94 106
95static struct page *vmx_io_bitmap_a; 107static struct page *vmx_io_bitmap_a;
96static struct page *vmx_io_bitmap_b; 108static struct page *vmx_io_bitmap_b;
@@ -260,6 +272,11 @@ static inline int cpu_has_vmx_vpid(void)
260 SECONDARY_EXEC_ENABLE_VPID); 272 SECONDARY_EXEC_ENABLE_VPID);
261} 273}
262 274
275static inline int cpu_has_virtual_nmis(void)
276{
277 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
278}
279
263static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) 280static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
264{ 281{
265 int i; 282 int i;
@@ -278,7 +295,7 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
278 u64 gva; 295 u64 gva;
279 } operand = { vpid, 0, gva }; 296 } operand = { vpid, 0, gva };
280 297
281 asm volatile (ASM_VMX_INVVPID 298 asm volatile (__ex(ASM_VMX_INVVPID)
282 /* CF==1 or ZF==1 --> rc = -1 */ 299 /* CF==1 or ZF==1 --> rc = -1 */
283 "; ja 1f ; ud2 ; 1:" 300 "; ja 1f ; ud2 ; 1:"
284 : : "a"(&operand), "c"(ext) : "cc", "memory"); 301 : : "a"(&operand), "c"(ext) : "cc", "memory");
@@ -290,7 +307,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa)
290 u64 eptp, gpa; 307 u64 eptp, gpa;
291 } operand = {eptp, gpa}; 308 } operand = {eptp, gpa};
292 309
293 asm volatile (ASM_VMX_INVEPT 310 asm volatile (__ex(ASM_VMX_INVEPT)
294 /* CF==1 or ZF==1 --> rc = -1 */ 311 /* CF==1 or ZF==1 --> rc = -1 */
295 "; ja 1f ; ud2 ; 1:\n" 312 "; ja 1f ; ud2 ; 1:\n"
296 : : "a" (&operand), "c" (ext) : "cc", "memory"); 313 : : "a" (&operand), "c" (ext) : "cc", "memory");
@@ -311,7 +328,7 @@ static void vmcs_clear(struct vmcs *vmcs)
311 u64 phys_addr = __pa(vmcs); 328 u64 phys_addr = __pa(vmcs);
312 u8 error; 329 u8 error;
313 330
314 asm volatile (ASM_VMX_VMCLEAR_RAX "; setna %0" 331 asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0"
315 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) 332 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
316 : "cc", "memory"); 333 : "cc", "memory");
317 if (error) 334 if (error)
@@ -329,6 +346,9 @@ static void __vcpu_clear(void *arg)
329 if (per_cpu(current_vmcs, cpu) == vmx->vmcs) 346 if (per_cpu(current_vmcs, cpu) == vmx->vmcs)
330 per_cpu(current_vmcs, cpu) = NULL; 347 per_cpu(current_vmcs, cpu) = NULL;
331 rdtscll(vmx->vcpu.arch.host_tsc); 348 rdtscll(vmx->vcpu.arch.host_tsc);
349 list_del(&vmx->local_vcpus_link);
350 vmx->vcpu.cpu = -1;
351 vmx->launched = 0;
332} 352}
333 353
334static void vcpu_clear(struct vcpu_vmx *vmx) 354static void vcpu_clear(struct vcpu_vmx *vmx)
@@ -336,7 +356,6 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
336 if (vmx->vcpu.cpu == -1) 356 if (vmx->vcpu.cpu == -1)
337 return; 357 return;
338 smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); 358 smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1);
339 vmx->launched = 0;
340} 359}
341 360
342static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) 361static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
@@ -378,7 +397,7 @@ static unsigned long vmcs_readl(unsigned long field)
378{ 397{
379 unsigned long value; 398 unsigned long value;
380 399
381 asm volatile (ASM_VMX_VMREAD_RDX_RAX 400 asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX)
382 : "=a"(value) : "d"(field) : "cc"); 401 : "=a"(value) : "d"(field) : "cc");
383 return value; 402 return value;
384} 403}
@@ -413,7 +432,7 @@ static void vmcs_writel(unsigned long field, unsigned long value)
413{ 432{
414 u8 error; 433 u8 error;
415 434
416 asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0" 435 asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0"
417 : "=q"(error) : "a"(value), "d"(field) : "cc"); 436 : "=q"(error) : "a"(value), "d"(field) : "cc");
418 if (unlikely(error)) 437 if (unlikely(error))
419 vmwrite_error(field, value); 438 vmwrite_error(field, value);
@@ -431,10 +450,8 @@ static void vmcs_write32(unsigned long field, u32 value)
431 450
432static void vmcs_write64(unsigned long field, u64 value) 451static void vmcs_write64(unsigned long field, u64 value)
433{ 452{
434#ifdef CONFIG_X86_64
435 vmcs_writel(field, value);
436#else
437 vmcs_writel(field, value); 453 vmcs_writel(field, value);
454#ifndef CONFIG_X86_64
438 asm volatile (""); 455 asm volatile ("");
439 vmcs_writel(field+1, value >> 32); 456 vmcs_writel(field+1, value >> 32);
440#endif 457#endif
@@ -458,7 +475,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
458 if (!vcpu->fpu_active) 475 if (!vcpu->fpu_active)
459 eb |= 1u << NM_VECTOR; 476 eb |= 1u << NM_VECTOR;
460 if (vcpu->guest_debug.enabled) 477 if (vcpu->guest_debug.enabled)
461 eb |= 1u << 1; 478 eb |= 1u << DB_VECTOR;
462 if (vcpu->arch.rmode.active) 479 if (vcpu->arch.rmode.active)
463 eb = ~0; 480 eb = ~0;
464 if (vm_need_ept()) 481 if (vm_need_ept())
@@ -474,7 +491,7 @@ static void reload_tss(void)
474 struct descriptor_table gdt; 491 struct descriptor_table gdt;
475 struct desc_struct *descs; 492 struct desc_struct *descs;
476 493
477 get_gdt(&gdt); 494 kvm_get_gdt(&gdt);
478 descs = (void *)gdt.base; 495 descs = (void *)gdt.base;
479 descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ 496 descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
480 load_TR_desc(); 497 load_TR_desc();
@@ -530,9 +547,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
530 * Set host fs and gs selectors. Unfortunately, 22.2.3 does not 547 * Set host fs and gs selectors. Unfortunately, 22.2.3 does not
531 * allow segment selectors with cpl > 0 or ti == 1. 548 * allow segment selectors with cpl > 0 or ti == 1.
532 */ 549 */
533 vmx->host_state.ldt_sel = read_ldt(); 550 vmx->host_state.ldt_sel = kvm_read_ldt();
534 vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; 551 vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
535 vmx->host_state.fs_sel = read_fs(); 552 vmx->host_state.fs_sel = kvm_read_fs();
536 if (!(vmx->host_state.fs_sel & 7)) { 553 if (!(vmx->host_state.fs_sel & 7)) {
537 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); 554 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
538 vmx->host_state.fs_reload_needed = 0; 555 vmx->host_state.fs_reload_needed = 0;
@@ -540,7 +557,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
540 vmcs_write16(HOST_FS_SELECTOR, 0); 557 vmcs_write16(HOST_FS_SELECTOR, 0);
541 vmx->host_state.fs_reload_needed = 1; 558 vmx->host_state.fs_reload_needed = 1;
542 } 559 }
543 vmx->host_state.gs_sel = read_gs(); 560 vmx->host_state.gs_sel = kvm_read_gs();
544 if (!(vmx->host_state.gs_sel & 7)) 561 if (!(vmx->host_state.gs_sel & 7))
545 vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); 562 vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
546 else { 563 else {
@@ -576,15 +593,15 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
576 ++vmx->vcpu.stat.host_state_reload; 593 ++vmx->vcpu.stat.host_state_reload;
577 vmx->host_state.loaded = 0; 594 vmx->host_state.loaded = 0;
578 if (vmx->host_state.fs_reload_needed) 595 if (vmx->host_state.fs_reload_needed)
579 load_fs(vmx->host_state.fs_sel); 596 kvm_load_fs(vmx->host_state.fs_sel);
580 if (vmx->host_state.gs_ldt_reload_needed) { 597 if (vmx->host_state.gs_ldt_reload_needed) {
581 load_ldt(vmx->host_state.ldt_sel); 598 kvm_load_ldt(vmx->host_state.ldt_sel);
582 /* 599 /*
583 * If we have to reload gs, we must take care to 600 * If we have to reload gs, we must take care to
584 * preserve our gs base. 601 * preserve our gs base.
585 */ 602 */
586 local_irq_save(flags); 603 local_irq_save(flags);
587 load_gs(vmx->host_state.gs_sel); 604 kvm_load_gs(vmx->host_state.gs_sel);
588#ifdef CONFIG_X86_64 605#ifdef CONFIG_X86_64
589 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); 606 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
590#endif 607#endif
@@ -617,13 +634,17 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
617 vcpu_clear(vmx); 634 vcpu_clear(vmx);
618 kvm_migrate_timers(vcpu); 635 kvm_migrate_timers(vcpu);
619 vpid_sync_vcpu_all(vmx); 636 vpid_sync_vcpu_all(vmx);
637 local_irq_disable();
638 list_add(&vmx->local_vcpus_link,
639 &per_cpu(vcpus_on_cpu, cpu));
640 local_irq_enable();
620 } 641 }
621 642
622 if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { 643 if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
623 u8 error; 644 u8 error;
624 645
625 per_cpu(current_vmcs, cpu) = vmx->vmcs; 646 per_cpu(current_vmcs, cpu) = vmx->vmcs;
626 asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0" 647 asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
627 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) 648 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
628 : "cc"); 649 : "cc");
629 if (error) 650 if (error)
@@ -640,8 +661,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
640 * Linux uses per-cpu TSS and GDT, so set these when switching 661 * Linux uses per-cpu TSS and GDT, so set these when switching
641 * processors. 662 * processors.
642 */ 663 */
643 vmcs_writel(HOST_TR_BASE, read_tr_base()); /* 22.2.4 */ 664 vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
644 get_gdt(&dt); 665 kvm_get_gdt(&dt);
645 vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ 666 vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */
646 667
647 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); 668 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
@@ -684,11 +705,6 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
684 update_exception_bitmap(vcpu); 705 update_exception_bitmap(vcpu);
685} 706}
686 707
687static void vmx_vcpu_decache(struct kvm_vcpu *vcpu)
688{
689 vcpu_clear(to_vmx(vcpu));
690}
691
692static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 708static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
693{ 709{
694 return vmcs_readl(GUEST_RFLAGS); 710 return vmcs_readl(GUEST_RFLAGS);
@@ -706,9 +722,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
706 unsigned long rip; 722 unsigned long rip;
707 u32 interruptibility; 723 u32 interruptibility;
708 724
709 rip = vmcs_readl(GUEST_RIP); 725 rip = kvm_rip_read(vcpu);
710 rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 726 rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
711 vmcs_writel(GUEST_RIP, rip); 727 kvm_rip_write(vcpu, rip);
712 728
713 /* 729 /*
714 * We emulated an instruction, so temporary interrupt blocking 730 * We emulated an instruction, so temporary interrupt blocking
@@ -724,19 +740,35 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
724static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, 740static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
725 bool has_error_code, u32 error_code) 741 bool has_error_code, u32 error_code)
726{ 742{
743 struct vcpu_vmx *vmx = to_vmx(vcpu);
744
745 if (has_error_code)
746 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
747
748 if (vcpu->arch.rmode.active) {
749 vmx->rmode.irq.pending = true;
750 vmx->rmode.irq.vector = nr;
751 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
752 if (nr == BP_VECTOR)
753 vmx->rmode.irq.rip++;
754 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
755 nr | INTR_TYPE_SOFT_INTR
756 | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
757 | INTR_INFO_VALID_MASK);
758 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
759 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
760 return;
761 }
762
727 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 763 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
728 nr | INTR_TYPE_EXCEPTION 764 nr | INTR_TYPE_EXCEPTION
729 | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) 765 | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
730 | INTR_INFO_VALID_MASK); 766 | INTR_INFO_VALID_MASK);
731 if (has_error_code)
732 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
733} 767}
734 768
735static bool vmx_exception_injected(struct kvm_vcpu *vcpu) 769static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
736{ 770{
737 struct vcpu_vmx *vmx = to_vmx(vcpu); 771 return false;
738
739 return !(vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
740} 772}
741 773
742/* 774/*
@@ -913,6 +945,18 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
913 case MSR_IA32_TIME_STAMP_COUNTER: 945 case MSR_IA32_TIME_STAMP_COUNTER:
914 guest_write_tsc(data); 946 guest_write_tsc(data);
915 break; 947 break;
948 case MSR_P6_PERFCTR0:
949 case MSR_P6_PERFCTR1:
950 case MSR_P6_EVNTSEL0:
951 case MSR_P6_EVNTSEL1:
952 /*
953 * Just discard all writes to the performance counters; this
954 * should keep both older linux and windows 64-bit guests
955 * happy
956 */
957 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
958
959 break;
916 default: 960 default:
917 vmx_load_host_state(vmx); 961 vmx_load_host_state(vmx);
918 msr = find_msr_entry(vmx, msr_index); 962 msr = find_msr_entry(vmx, msr_index);
@@ -926,24 +970,19 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
926 return ret; 970 return ret;
927} 971}
928 972
929/* 973static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
930 * Sync the rsp and rip registers into the vcpu structure. This allows
931 * registers to be accessed by indexing vcpu->arch.regs.
932 */
933static void vcpu_load_rsp_rip(struct kvm_vcpu *vcpu)
934{
935 vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
936 vcpu->arch.rip = vmcs_readl(GUEST_RIP);
937}
938
939/*
940 * Syncs rsp and rip back into the vmcs. Should be called after possible
941 * modification.
942 */
943static void vcpu_put_rsp_rip(struct kvm_vcpu *vcpu)
944{ 974{
945 vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); 975 __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
946 vmcs_writel(GUEST_RIP, vcpu->arch.rip); 976 switch (reg) {
977 case VCPU_REGS_RSP:
978 vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
979 break;
980 case VCPU_REGS_RIP:
981 vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
982 break;
983 default:
984 break;
985 }
947} 986}
948 987
949static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) 988static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
@@ -986,17 +1025,9 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
986 1025
987static int vmx_get_irq(struct kvm_vcpu *vcpu) 1026static int vmx_get_irq(struct kvm_vcpu *vcpu)
988{ 1027{
989 struct vcpu_vmx *vmx = to_vmx(vcpu); 1028 if (!vcpu->arch.interrupt.pending)
990 u32 idtv_info_field; 1029 return -1;
991 1030 return vcpu->arch.interrupt.nr;
992 idtv_info_field = vmx->idt_vectoring_info;
993 if (idtv_info_field & INTR_INFO_VALID_MASK) {
994 if (is_external_interrupt(idtv_info_field))
995 return idtv_info_field & VECTORING_INFO_VECTOR_MASK;
996 else
997 printk(KERN_DEBUG "pending exception: not handled yet\n");
998 }
999 return -1;
1000} 1031}
1001 1032
1002static __init int cpu_has_kvm_support(void) 1033static __init int cpu_has_kvm_support(void)
@@ -1010,9 +1041,9 @@ static __init int vmx_disabled_by_bios(void)
1010 u64 msr; 1041 u64 msr;
1011 1042
1012 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); 1043 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1013 return (msr & (MSR_IA32_FEATURE_CONTROL_LOCKED | 1044 return (msr & (FEATURE_CONTROL_LOCKED |
1014 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) 1045 FEATURE_CONTROL_VMXON_ENABLED))
1015 == MSR_IA32_FEATURE_CONTROL_LOCKED; 1046 == FEATURE_CONTROL_LOCKED;
1016 /* locked but not enabled */ 1047 /* locked but not enabled */
1017} 1048}
1018 1049
@@ -1022,23 +1053,36 @@ static void hardware_enable(void *garbage)
1022 u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); 1053 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
1023 u64 old; 1054 u64 old;
1024 1055
1056 INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
1025 rdmsrl(MSR_IA32_FEATURE_CONTROL, old); 1057 rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
1026 if ((old & (MSR_IA32_FEATURE_CONTROL_LOCKED | 1058 if ((old & (FEATURE_CONTROL_LOCKED |
1027 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) 1059 FEATURE_CONTROL_VMXON_ENABLED))
1028 != (MSR_IA32_FEATURE_CONTROL_LOCKED | 1060 != (FEATURE_CONTROL_LOCKED |
1029 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) 1061 FEATURE_CONTROL_VMXON_ENABLED))
1030 /* enable and lock */ 1062 /* enable and lock */
1031 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | 1063 wrmsrl(MSR_IA32_FEATURE_CONTROL, old |
1032 MSR_IA32_FEATURE_CONTROL_LOCKED | 1064 FEATURE_CONTROL_LOCKED |
1033 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED); 1065 FEATURE_CONTROL_VMXON_ENABLED);
1034 write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ 1066 write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */
1035 asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) 1067 asm volatile (ASM_VMX_VMXON_RAX
1068 : : "a"(&phys_addr), "m"(phys_addr)
1036 : "memory", "cc"); 1069 : "memory", "cc");
1037} 1070}
1038 1071
1072static void vmclear_local_vcpus(void)
1073{
1074 int cpu = raw_smp_processor_id();
1075 struct vcpu_vmx *vmx, *n;
1076
1077 list_for_each_entry_safe(vmx, n, &per_cpu(vcpus_on_cpu, cpu),
1078 local_vcpus_link)
1079 __vcpu_clear(vmx);
1080}
1081
1039static void hardware_disable(void *garbage) 1082static void hardware_disable(void *garbage)
1040{ 1083{
1041 asm volatile (ASM_VMX_VMXOFF : : : "cc"); 1084 vmclear_local_vcpus();
1085 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
1042 write_cr4(read_cr4() & ~X86_CR4_VMXE); 1086 write_cr4(read_cr4() & ~X86_CR4_VMXE);
1043} 1087}
1044 1088
@@ -1072,7 +1116,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1072 u32 _vmentry_control = 0; 1116 u32 _vmentry_control = 0;
1073 1117
1074 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; 1118 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
1075 opt = 0; 1119 opt = PIN_BASED_VIRTUAL_NMIS;
1076 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, 1120 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
1077 &_pin_based_exec_control) < 0) 1121 &_pin_based_exec_control) < 0)
1078 return -EIO; 1122 return -EIO;
@@ -1086,7 +1130,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1086 CPU_BASED_CR3_STORE_EXITING | 1130 CPU_BASED_CR3_STORE_EXITING |
1087 CPU_BASED_USE_IO_BITMAPS | 1131 CPU_BASED_USE_IO_BITMAPS |
1088 CPU_BASED_MOV_DR_EXITING | 1132 CPU_BASED_MOV_DR_EXITING |
1089 CPU_BASED_USE_TSC_OFFSETING; 1133 CPU_BASED_USE_TSC_OFFSETING |
1134 CPU_BASED_INVLPG_EXITING;
1090 opt = CPU_BASED_TPR_SHADOW | 1135 opt = CPU_BASED_TPR_SHADOW |
1091 CPU_BASED_USE_MSR_BITMAPS | 1136 CPU_BASED_USE_MSR_BITMAPS |
1092 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; 1137 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -1115,9 +1160,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1115 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; 1160 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
1116#endif 1161#endif
1117 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { 1162 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
1118 /* CR3 accesses don't need to cause VM Exits when EPT enabled */ 1163 /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
1164 enabled */
1119 min &= ~(CPU_BASED_CR3_LOAD_EXITING | 1165 min &= ~(CPU_BASED_CR3_LOAD_EXITING |
1120 CPU_BASED_CR3_STORE_EXITING); 1166 CPU_BASED_CR3_STORE_EXITING |
1167 CPU_BASED_INVLPG_EXITING);
1121 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, 1168 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
1122 &_cpu_based_exec_control) < 0) 1169 &_cpu_based_exec_control) < 0)
1123 return -EIO; 1170 return -EIO;
@@ -1254,7 +1301,9 @@ static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save)
1254static void enter_pmode(struct kvm_vcpu *vcpu) 1301static void enter_pmode(struct kvm_vcpu *vcpu)
1255{ 1302{
1256 unsigned long flags; 1303 unsigned long flags;
1304 struct vcpu_vmx *vmx = to_vmx(vcpu);
1257 1305
1306 vmx->emulation_required = 1;
1258 vcpu->arch.rmode.active = 0; 1307 vcpu->arch.rmode.active = 0;
1259 1308
1260 vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base); 1309 vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base);
@@ -1271,6 +1320,9 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1271 1320
1272 update_exception_bitmap(vcpu); 1321 update_exception_bitmap(vcpu);
1273 1322
1323 if (emulate_invalid_guest_state)
1324 return;
1325
1274 fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es); 1326 fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
1275 fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds); 1327 fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
1276 fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); 1328 fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
@@ -1311,7 +1363,9 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
1311static void enter_rmode(struct kvm_vcpu *vcpu) 1363static void enter_rmode(struct kvm_vcpu *vcpu)
1312{ 1364{
1313 unsigned long flags; 1365 unsigned long flags;
1366 struct vcpu_vmx *vmx = to_vmx(vcpu);
1314 1367
1368 vmx->emulation_required = 1;
1315 vcpu->arch.rmode.active = 1; 1369 vcpu->arch.rmode.active = 1;
1316 1370
1317 vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE); 1371 vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
@@ -1333,6 +1387,9 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1333 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); 1387 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
1334 update_exception_bitmap(vcpu); 1388 update_exception_bitmap(vcpu);
1335 1389
1390 if (emulate_invalid_guest_state)
1391 goto continue_rmode;
1392
1336 vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4); 1393 vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);
1337 vmcs_write32(GUEST_SS_LIMIT, 0xffff); 1394 vmcs_write32(GUEST_SS_LIMIT, 0xffff);
1338 vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); 1395 vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
@@ -1348,6 +1405,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1348 fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); 1405 fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
1349 fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); 1406 fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
1350 1407
1408continue_rmode:
1351 kvm_mmu_reset_context(vcpu); 1409 kvm_mmu_reset_context(vcpu);
1352 init_rmode(vcpu->kvm); 1410 init_rmode(vcpu->kvm);
1353} 1411}
@@ -1389,6 +1447,8 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
1389static void vmx_flush_tlb(struct kvm_vcpu *vcpu) 1447static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
1390{ 1448{
1391 vpid_sync_vcpu_all(to_vmx(vcpu)); 1449 vpid_sync_vcpu_all(to_vmx(vcpu));
1450 if (vm_need_ept())
1451 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
1392} 1452}
1393 1453
1394static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 1454static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
@@ -1420,7 +1480,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1420 if (!(cr0 & X86_CR0_PG)) { 1480 if (!(cr0 & X86_CR0_PG)) {
1421 /* From paging/starting to nonpaging */ 1481 /* From paging/starting to nonpaging */
1422 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 1482 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
1423 vmcs_config.cpu_based_exec_ctrl | 1483 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
1424 (CPU_BASED_CR3_LOAD_EXITING | 1484 (CPU_BASED_CR3_LOAD_EXITING |
1425 CPU_BASED_CR3_STORE_EXITING)); 1485 CPU_BASED_CR3_STORE_EXITING));
1426 vcpu->arch.cr0 = cr0; 1486 vcpu->arch.cr0 = cr0;
@@ -1430,7 +1490,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1430 } else if (!is_paging(vcpu)) { 1490 } else if (!is_paging(vcpu)) {
1431 /* From nonpaging to paging */ 1491 /* From nonpaging to paging */
1432 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 1492 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
1433 vmcs_config.cpu_based_exec_ctrl & 1493 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
1434 ~(CPU_BASED_CR3_LOAD_EXITING | 1494 ~(CPU_BASED_CR3_LOAD_EXITING |
1435 CPU_BASED_CR3_STORE_EXITING)); 1495 CPU_BASED_CR3_STORE_EXITING));
1436 vcpu->arch.cr0 = cr0; 1496 vcpu->arch.cr0 = cr0;
@@ -1679,6 +1739,186 @@ static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
1679 vmcs_writel(GUEST_GDTR_BASE, dt->base); 1739 vmcs_writel(GUEST_GDTR_BASE, dt->base);
1680} 1740}
1681 1741
1742static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
1743{
1744 struct kvm_segment var;
1745 u32 ar;
1746
1747 vmx_get_segment(vcpu, &var, seg);
1748 ar = vmx_segment_access_rights(&var);
1749
1750 if (var.base != (var.selector << 4))
1751 return false;
1752 if (var.limit != 0xffff)
1753 return false;
1754 if (ar != 0xf3)
1755 return false;
1756
1757 return true;
1758}
1759
1760static bool code_segment_valid(struct kvm_vcpu *vcpu)
1761{
1762 struct kvm_segment cs;
1763 unsigned int cs_rpl;
1764
1765 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
1766 cs_rpl = cs.selector & SELECTOR_RPL_MASK;
1767
1768 if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
1769 return false;
1770 if (!cs.s)
1771 return false;
1772 if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) {
1773 if (cs.dpl > cs_rpl)
1774 return false;
1775 } else if (cs.type & AR_TYPE_CODE_MASK) {
1776 if (cs.dpl != cs_rpl)
1777 return false;
1778 }
1779 if (!cs.present)
1780 return false;
1781
1782 /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */
1783 return true;
1784}
1785
1786static bool stack_segment_valid(struct kvm_vcpu *vcpu)
1787{
1788 struct kvm_segment ss;
1789 unsigned int ss_rpl;
1790
1791 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
1792 ss_rpl = ss.selector & SELECTOR_RPL_MASK;
1793
1794 if ((ss.type != 3) || (ss.type != 7))
1795 return false;
1796 if (!ss.s)
1797 return false;
1798 if (ss.dpl != ss_rpl) /* DPL != RPL */
1799 return false;
1800 if (!ss.present)
1801 return false;
1802
1803 return true;
1804}
1805
1806static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
1807{
1808 struct kvm_segment var;
1809 unsigned int rpl;
1810
1811 vmx_get_segment(vcpu, &var, seg);
1812 rpl = var.selector & SELECTOR_RPL_MASK;
1813
1814 if (!var.s)
1815 return false;
1816 if (!var.present)
1817 return false;
1818 if (~var.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK)) {
1819 if (var.dpl < rpl) /* DPL < RPL */
1820 return false;
1821 }
1822
1823 /* TODO: Add other members to kvm_segment_field to allow checking for other access
1824 * rights flags
1825 */
1826 return true;
1827}
1828
1829static bool tr_valid(struct kvm_vcpu *vcpu)
1830{
1831 struct kvm_segment tr;
1832
1833 vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
1834
1835 if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */
1836 return false;
1837 if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */
1838 return false;
1839 if (!tr.present)
1840 return false;
1841
1842 return true;
1843}
1844
1845static bool ldtr_valid(struct kvm_vcpu *vcpu)
1846{
1847 struct kvm_segment ldtr;
1848
1849 vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
1850
1851 if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */
1852 return false;
1853 if (ldtr.type != 2)
1854 return false;
1855 if (!ldtr.present)
1856 return false;
1857
1858 return true;
1859}
1860
1861static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
1862{
1863 struct kvm_segment cs, ss;
1864
1865 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
1866 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
1867
1868 return ((cs.selector & SELECTOR_RPL_MASK) ==
1869 (ss.selector & SELECTOR_RPL_MASK));
1870}
1871
1872/*
1873 * Check if guest state is valid. Returns true if valid, false if
1874 * not.
1875 * We assume that registers are always usable
1876 */
1877static bool guest_state_valid(struct kvm_vcpu *vcpu)
1878{
1879 /* real mode guest state checks */
1880 if (!(vcpu->arch.cr0 & X86_CR0_PE)) {
1881 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
1882 return false;
1883 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
1884 return false;
1885 if (!rmode_segment_valid(vcpu, VCPU_SREG_DS))
1886 return false;
1887 if (!rmode_segment_valid(vcpu, VCPU_SREG_ES))
1888 return false;
1889 if (!rmode_segment_valid(vcpu, VCPU_SREG_FS))
1890 return false;
1891 if (!rmode_segment_valid(vcpu, VCPU_SREG_GS))
1892 return false;
1893 } else {
1894 /* protected mode guest state checks */
1895 if (!cs_ss_rpl_check(vcpu))
1896 return false;
1897 if (!code_segment_valid(vcpu))
1898 return false;
1899 if (!stack_segment_valid(vcpu))
1900 return false;
1901 if (!data_segment_valid(vcpu, VCPU_SREG_DS))
1902 return false;
1903 if (!data_segment_valid(vcpu, VCPU_SREG_ES))
1904 return false;
1905 if (!data_segment_valid(vcpu, VCPU_SREG_FS))
1906 return false;
1907 if (!data_segment_valid(vcpu, VCPU_SREG_GS))
1908 return false;
1909 if (!tr_valid(vcpu))
1910 return false;
1911 if (!ldtr_valid(vcpu))
1912 return false;
1913 }
1914 /* TODO:
1915 * - Add checks on RIP
1916 * - Add checks on RFLAGS
1917 */
1918
1919 return true;
1920}
1921
1682static int init_rmode_tss(struct kvm *kvm) 1922static int init_rmode_tss(struct kvm *kvm)
1683{ 1923{
1684 gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; 1924 gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
@@ -1690,7 +1930,8 @@ static int init_rmode_tss(struct kvm *kvm)
1690 if (r < 0) 1930 if (r < 0)
1691 goto out; 1931 goto out;
1692 data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; 1932 data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
1693 r = kvm_write_guest_page(kvm, fn++, &data, 0x66, sizeof(u16)); 1933 r = kvm_write_guest_page(kvm, fn++, &data,
1934 TSS_IOPB_BASE_OFFSET, sizeof(u16));
1694 if (r < 0) 1935 if (r < 0)
1695 goto out; 1936 goto out;
1696 r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); 1937 r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
@@ -1753,7 +1994,7 @@ static void seg_setup(int seg)
1753 vmcs_write16(sf->selector, 0); 1994 vmcs_write16(sf->selector, 0);
1754 vmcs_writel(sf->base, 0); 1995 vmcs_writel(sf->base, 0);
1755 vmcs_write32(sf->limit, 0xffff); 1996 vmcs_write32(sf->limit, 0xffff);
1756 vmcs_write32(sf->ar_bytes, 0x93); 1997 vmcs_write32(sf->ar_bytes, 0xf3);
1757} 1998}
1758 1999
1759static int alloc_apic_access_page(struct kvm *kvm) 2000static int alloc_apic_access_page(struct kvm *kvm)
@@ -1772,9 +2013,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
1772 if (r) 2013 if (r)
1773 goto out; 2014 goto out;
1774 2015
1775 down_read(&current->mm->mmap_sem);
1776 kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); 2016 kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
1777 up_read(&current->mm->mmap_sem);
1778out: 2017out:
1779 up_write(&kvm->slots_lock); 2018 up_write(&kvm->slots_lock);
1780 return r; 2019 return r;
@@ -1796,10 +2035,8 @@ static int alloc_identity_pagetable(struct kvm *kvm)
1796 if (r) 2035 if (r)
1797 goto out; 2036 goto out;
1798 2037
1799 down_read(&current->mm->mmap_sem);
1800 kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, 2038 kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
1801 VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT); 2039 VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT);
1802 up_read(&current->mm->mmap_sem);
1803out: 2040out:
1804 up_write(&kvm->slots_lock); 2041 up_write(&kvm->slots_lock);
1805 return r; 2042 return r;
@@ -1821,7 +2058,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx)
1821 spin_unlock(&vmx_vpid_lock); 2058 spin_unlock(&vmx_vpid_lock);
1822} 2059}
1823 2060
1824void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) 2061static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
1825{ 2062{
1826 void *va; 2063 void *va;
1827 2064
@@ -1881,7 +2118,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1881 } 2118 }
1882 if (!vm_need_ept()) 2119 if (!vm_need_ept())
1883 exec_control |= CPU_BASED_CR3_STORE_EXITING | 2120 exec_control |= CPU_BASED_CR3_STORE_EXITING |
1884 CPU_BASED_CR3_LOAD_EXITING; 2121 CPU_BASED_CR3_LOAD_EXITING |
2122 CPU_BASED_INVLPG_EXITING;
1885 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); 2123 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
1886 2124
1887 if (cpu_has_secondary_exec_ctrls()) { 2125 if (cpu_has_secondary_exec_ctrls()) {
@@ -1907,8 +2145,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1907 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ 2145 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
1908 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 2146 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
1909 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 2147 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
1910 vmcs_write16(HOST_FS_SELECTOR, read_fs()); /* 22.2.4 */ 2148 vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs()); /* 22.2.4 */
1911 vmcs_write16(HOST_GS_SELECTOR, read_gs()); /* 22.2.4 */ 2149 vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs()); /* 22.2.4 */
1912 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 2150 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
1913#ifdef CONFIG_X86_64 2151#ifdef CONFIG_X86_64
1914 rdmsrl(MSR_FS_BASE, a); 2152 rdmsrl(MSR_FS_BASE, a);
@@ -1922,7 +2160,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1922 2160
1923 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ 2161 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
1924 2162
1925 get_idt(&dt); 2163 kvm_get_idt(&dt);
1926 vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ 2164 vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */
1927 2165
1928 asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); 2166 asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return));
@@ -1983,6 +2221,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
1983 u64 msr; 2221 u64 msr;
1984 int ret; 2222 int ret;
1985 2223
2224 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
1986 down_read(&vcpu->kvm->slots_lock); 2225 down_read(&vcpu->kvm->slots_lock);
1987 if (!init_rmode(vmx->vcpu.kvm)) { 2226 if (!init_rmode(vmx->vcpu.kvm)) {
1988 ret = -ENOMEM; 2227 ret = -ENOMEM;
@@ -2000,6 +2239,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2000 2239
2001 fx_init(&vmx->vcpu); 2240 fx_init(&vmx->vcpu);
2002 2241
2242 seg_setup(VCPU_SREG_CS);
2003 /* 2243 /*
2004 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode 2244 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
2005 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. 2245 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
@@ -2011,8 +2251,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2011 vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); 2251 vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8);
2012 vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); 2252 vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12);
2013 } 2253 }
2014 vmcs_write32(GUEST_CS_LIMIT, 0xffff);
2015 vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
2016 2254
2017 seg_setup(VCPU_SREG_DS); 2255 seg_setup(VCPU_SREG_DS);
2018 seg_setup(VCPU_SREG_ES); 2256 seg_setup(VCPU_SREG_ES);
@@ -2036,10 +2274,10 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2036 2274
2037 vmcs_writel(GUEST_RFLAGS, 0x02); 2275 vmcs_writel(GUEST_RFLAGS, 0x02);
2038 if (vmx->vcpu.vcpu_id == 0) 2276 if (vmx->vcpu.vcpu_id == 0)
2039 vmcs_writel(GUEST_RIP, 0xfff0); 2277 kvm_rip_write(vcpu, 0xfff0);
2040 else 2278 else
2041 vmcs_writel(GUEST_RIP, 0); 2279 kvm_rip_write(vcpu, 0);
2042 vmcs_writel(GUEST_RSP, 0); 2280 kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
2043 2281
2044 /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */ 2282 /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
2045 vmcs_writel(GUEST_DR7, 0x400); 2283 vmcs_writel(GUEST_DR7, 0x400);
@@ -2089,6 +2327,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2089 2327
2090 ret = 0; 2328 ret = 0;
2091 2329
2330 /* HACK: Don't enable emulation on guest boot/reset */
2331 vmx->emulation_required = 0;
2332
2092out: 2333out:
2093 up_read(&vcpu->kvm->slots_lock); 2334 up_read(&vcpu->kvm->slots_lock);
2094 return ret; 2335 return ret;
@@ -2100,20 +2341,27 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
2100 2341
2101 KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); 2342 KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
2102 2343
2344 ++vcpu->stat.irq_injections;
2103 if (vcpu->arch.rmode.active) { 2345 if (vcpu->arch.rmode.active) {
2104 vmx->rmode.irq.pending = true; 2346 vmx->rmode.irq.pending = true;
2105 vmx->rmode.irq.vector = irq; 2347 vmx->rmode.irq.vector = irq;
2106 vmx->rmode.irq.rip = vmcs_readl(GUEST_RIP); 2348 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
2107 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 2349 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2108 irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK); 2350 irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK);
2109 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); 2351 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
2110 vmcs_writel(GUEST_RIP, vmx->rmode.irq.rip - 1); 2352 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
2111 return; 2353 return;
2112 } 2354 }
2113 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 2355 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2114 irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); 2356 irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
2115} 2357}
2116 2358
2359static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
2360{
2361 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2362 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
2363}
2364
2117static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) 2365static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
2118{ 2366{
2119 int word_index = __ffs(vcpu->arch.irq_summary); 2367 int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2123,7 +2371,7 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
2123 clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); 2371 clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
2124 if (!vcpu->arch.irq_pending[word_index]) 2372 if (!vcpu->arch.irq_pending[word_index])
2125 clear_bit(word_index, &vcpu->arch.irq_summary); 2373 clear_bit(word_index, &vcpu->arch.irq_summary);
2126 vmx_inject_irq(vcpu, irq); 2374 kvm_queue_interrupt(vcpu, irq);
2127} 2375}
2128 2376
2129 2377
@@ -2137,13 +2385,12 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
2137 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); 2385 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
2138 2386
2139 if (vcpu->arch.interrupt_window_open && 2387 if (vcpu->arch.interrupt_window_open &&
2140 vcpu->arch.irq_summary && 2388 vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
2141 !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
2142 /*
2143 * If interrupts enabled, and not blocked by sti or mov ss. Good.
2144 */
2145 kvm_do_inject_irq(vcpu); 2389 kvm_do_inject_irq(vcpu);
2146 2390
2391 if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending)
2392 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
2393
2147 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 2394 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2148 if (!vcpu->arch.interrupt_window_open && 2395 if (!vcpu->arch.interrupt_window_open &&
2149 (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) 2396 (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
@@ -2194,9 +2441,6 @@ static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
2194static int handle_rmode_exception(struct kvm_vcpu *vcpu, 2441static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2195 int vec, u32 err_code) 2442 int vec, u32 err_code)
2196{ 2443{
2197 if (!vcpu->arch.rmode.active)
2198 return 0;
2199
2200 /* 2444 /*
2201 * Instruction with address size override prefix opcode 0x67 2445 * Instruction with address size override prefix opcode 0x67
2202 * Cause the #SS fault with 0 error code in VM86 mode. 2446 * Cause the #SS fault with 0 error code in VM86 mode.
@@ -2204,6 +2448,25 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2204 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) 2448 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0)
2205 if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE) 2449 if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE)
2206 return 1; 2450 return 1;
2451 /*
2452 * Forward all other exceptions that are valid in real mode.
2453 * FIXME: Breaks guest debugging in real mode, needs to be fixed with
2454 * the required debugging infrastructure rework.
2455 */
2456 switch (vec) {
2457 case DE_VECTOR:
2458 case DB_VECTOR:
2459 case BP_VECTOR:
2460 case OF_VECTOR:
2461 case BR_VECTOR:
2462 case UD_VECTOR:
2463 case DF_VECTOR:
2464 case SS_VECTOR:
2465 case GP_VECTOR:
2466 case MF_VECTOR:
2467 kvm_queue_exception(vcpu, vec);
2468 return 1;
2469 }
2207 return 0; 2470 return 0;
2208} 2471}
2209 2472
@@ -2245,7 +2508,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2245 } 2508 }
2246 2509
2247 error_code = 0; 2510 error_code = 0;
2248 rip = vmcs_readl(GUEST_RIP); 2511 rip = kvm_rip_read(vcpu);
2249 if (intr_info & INTR_INFO_DELIVER_CODE_MASK) 2512 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
2250 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 2513 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
2251 if (is_page_fault(intr_info)) { 2514 if (is_page_fault(intr_info)) {
@@ -2255,6 +2518,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2255 cr2 = vmcs_readl(EXIT_QUALIFICATION); 2518 cr2 = vmcs_readl(EXIT_QUALIFICATION);
2256 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, 2519 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
2257 (u32)((u64)cr2 >> 32), handler); 2520 (u32)((u64)cr2 >> 32), handler);
2521 if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending)
2522 kvm_mmu_unprotect_page_virt(vcpu, cr2);
2258 return kvm_mmu_page_fault(vcpu, cr2, error_code); 2523 return kvm_mmu_page_fault(vcpu, cr2, error_code);
2259 } 2524 }
2260 2525
@@ -2341,27 +2606,25 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2341 reg = (exit_qualification >> 8) & 15; 2606 reg = (exit_qualification >> 8) & 15;
2342 switch ((exit_qualification >> 4) & 3) { 2607 switch ((exit_qualification >> 4) & 3) {
2343 case 0: /* mov to cr */ 2608 case 0: /* mov to cr */
2344 KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)vcpu->arch.regs[reg], 2609 KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr,
2345 (u32)((u64)vcpu->arch.regs[reg] >> 32), handler); 2610 (u32)kvm_register_read(vcpu, reg),
2611 (u32)((u64)kvm_register_read(vcpu, reg) >> 32),
2612 handler);
2346 switch (cr) { 2613 switch (cr) {
2347 case 0: 2614 case 0:
2348 vcpu_load_rsp_rip(vcpu); 2615 kvm_set_cr0(vcpu, kvm_register_read(vcpu, reg));
2349 kvm_set_cr0(vcpu, vcpu->arch.regs[reg]);
2350 skip_emulated_instruction(vcpu); 2616 skip_emulated_instruction(vcpu);
2351 return 1; 2617 return 1;
2352 case 3: 2618 case 3:
2353 vcpu_load_rsp_rip(vcpu); 2619 kvm_set_cr3(vcpu, kvm_register_read(vcpu, reg));
2354 kvm_set_cr3(vcpu, vcpu->arch.regs[reg]);
2355 skip_emulated_instruction(vcpu); 2620 skip_emulated_instruction(vcpu);
2356 return 1; 2621 return 1;
2357 case 4: 2622 case 4:
2358 vcpu_load_rsp_rip(vcpu); 2623 kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg));
2359 kvm_set_cr4(vcpu, vcpu->arch.regs[reg]);
2360 skip_emulated_instruction(vcpu); 2624 skip_emulated_instruction(vcpu);
2361 return 1; 2625 return 1;
2362 case 8: 2626 case 8:
2363 vcpu_load_rsp_rip(vcpu); 2627 kvm_set_cr8(vcpu, kvm_register_read(vcpu, reg));
2364 kvm_set_cr8(vcpu, vcpu->arch.regs[reg]);
2365 skip_emulated_instruction(vcpu); 2628 skip_emulated_instruction(vcpu);
2366 if (irqchip_in_kernel(vcpu->kvm)) 2629 if (irqchip_in_kernel(vcpu->kvm))
2367 return 1; 2630 return 1;
@@ -2370,7 +2633,6 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2370 }; 2633 };
2371 break; 2634 break;
2372 case 2: /* clts */ 2635 case 2: /* clts */
2373 vcpu_load_rsp_rip(vcpu);
2374 vmx_fpu_deactivate(vcpu); 2636 vmx_fpu_deactivate(vcpu);
2375 vcpu->arch.cr0 &= ~X86_CR0_TS; 2637 vcpu->arch.cr0 &= ~X86_CR0_TS;
2376 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); 2638 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
@@ -2381,21 +2643,17 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2381 case 1: /*mov from cr*/ 2643 case 1: /*mov from cr*/
2382 switch (cr) { 2644 switch (cr) {
2383 case 3: 2645 case 3:
2384 vcpu_load_rsp_rip(vcpu); 2646 kvm_register_write(vcpu, reg, vcpu->arch.cr3);
2385 vcpu->arch.regs[reg] = vcpu->arch.cr3;
2386 vcpu_put_rsp_rip(vcpu);
2387 KVMTRACE_3D(CR_READ, vcpu, (u32)cr, 2647 KVMTRACE_3D(CR_READ, vcpu, (u32)cr,
2388 (u32)vcpu->arch.regs[reg], 2648 (u32)kvm_register_read(vcpu, reg),
2389 (u32)((u64)vcpu->arch.regs[reg] >> 32), 2649 (u32)((u64)kvm_register_read(vcpu, reg) >> 32),
2390 handler); 2650 handler);
2391 skip_emulated_instruction(vcpu); 2651 skip_emulated_instruction(vcpu);
2392 return 1; 2652 return 1;
2393 case 8: 2653 case 8:
2394 vcpu_load_rsp_rip(vcpu); 2654 kvm_register_write(vcpu, reg, kvm_get_cr8(vcpu));
2395 vcpu->arch.regs[reg] = kvm_get_cr8(vcpu);
2396 vcpu_put_rsp_rip(vcpu);
2397 KVMTRACE_2D(CR_READ, vcpu, (u32)cr, 2655 KVMTRACE_2D(CR_READ, vcpu, (u32)cr,
2398 (u32)vcpu->arch.regs[reg], handler); 2656 (u32)kvm_register_read(vcpu, reg), handler);
2399 skip_emulated_instruction(vcpu); 2657 skip_emulated_instruction(vcpu);
2400 return 1; 2658 return 1;
2401 } 2659 }
@@ -2427,7 +2685,6 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2427 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 2685 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
2428 dr = exit_qualification & 7; 2686 dr = exit_qualification & 7;
2429 reg = (exit_qualification >> 8) & 15; 2687 reg = (exit_qualification >> 8) & 15;
2430 vcpu_load_rsp_rip(vcpu);
2431 if (exit_qualification & 16) { 2688 if (exit_qualification & 16) {
2432 /* mov from dr */ 2689 /* mov from dr */
2433 switch (dr) { 2690 switch (dr) {
@@ -2440,12 +2697,11 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2440 default: 2697 default:
2441 val = 0; 2698 val = 0;
2442 } 2699 }
2443 vcpu->arch.regs[reg] = val; 2700 kvm_register_write(vcpu, reg, val);
2444 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); 2701 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
2445 } else { 2702 } else {
2446 /* mov to dr */ 2703 /* mov to dr */
2447 } 2704 }
2448 vcpu_put_rsp_rip(vcpu);
2449 skip_emulated_instruction(vcpu); 2705 skip_emulated_instruction(vcpu);
2450 return 1; 2706 return 1;
2451} 2707}
@@ -2538,6 +2794,15 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2538 return 1; 2794 return 1;
2539} 2795}
2540 2796
2797static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2798{
2799 u64 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
2800
2801 kvm_mmu_invlpg(vcpu, exit_qualification);
2802 skip_emulated_instruction(vcpu);
2803 return 1;
2804}
2805
2541static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2806static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2542{ 2807{
2543 skip_emulated_instruction(vcpu); 2808 skip_emulated_instruction(vcpu);
@@ -2554,8 +2819,6 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2554 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 2819 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
2555 offset = exit_qualification & 0xffful; 2820 offset = exit_qualification & 0xffful;
2556 2821
2557 KVMTRACE_1D(APIC_ACCESS, vcpu, (u32)offset, handler);
2558
2559 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 2822 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
2560 2823
2561 if (er != EMULATE_DONE) { 2824 if (er != EMULATE_DONE) {
@@ -2639,6 +2902,56 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2639 return 1; 2902 return 1;
2640} 2903}
2641 2904
2905static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2906{
2907 u32 cpu_based_vm_exec_control;
2908
2909 /* clear pending NMI */
2910 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2911 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
2912 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2913 ++vcpu->stat.nmi_window_exits;
2914
2915 return 1;
2916}
2917
2918static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
2919 struct kvm_run *kvm_run)
2920{
2921 struct vcpu_vmx *vmx = to_vmx(vcpu);
2922 int err;
2923
2924 preempt_enable();
2925 local_irq_enable();
2926
2927 while (!guest_state_valid(vcpu)) {
2928 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
2929
2930 switch (err) {
2931 case EMULATE_DONE:
2932 break;
2933 case EMULATE_DO_MMIO:
2934 kvm_report_emulation_failure(vcpu, "mmio");
2935 /* TODO: Handle MMIO */
2936 return;
2937 default:
2938 kvm_report_emulation_failure(vcpu, "emulation failure");
2939 return;
2940 }
2941
2942 if (signal_pending(current))
2943 break;
2944 if (need_resched())
2945 schedule();
2946 }
2947
2948 local_irq_disable();
2949 preempt_disable();
2950
2951 /* Guest state should be valid now, no more emulation should be needed */
2952 vmx->emulation_required = 0;
2953}
2954
2642/* 2955/*
2643 * The exit handlers return 1 if the exit was handled fully and guest execution 2956 * The exit handlers return 1 if the exit was handled fully and guest execution
2644 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 2957 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -2649,6 +2962,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
2649 [EXIT_REASON_EXCEPTION_NMI] = handle_exception, 2962 [EXIT_REASON_EXCEPTION_NMI] = handle_exception,
2650 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 2963 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
2651 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, 2964 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
2965 [EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
2652 [EXIT_REASON_IO_INSTRUCTION] = handle_io, 2966 [EXIT_REASON_IO_INSTRUCTION] = handle_io,
2653 [EXIT_REASON_CR_ACCESS] = handle_cr, 2967 [EXIT_REASON_CR_ACCESS] = handle_cr,
2654 [EXIT_REASON_DR_ACCESS] = handle_dr, 2968 [EXIT_REASON_DR_ACCESS] = handle_dr,
@@ -2657,6 +2971,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
2657 [EXIT_REASON_MSR_WRITE] = handle_wrmsr, 2971 [EXIT_REASON_MSR_WRITE] = handle_wrmsr,
2658 [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, 2972 [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
2659 [EXIT_REASON_HLT] = handle_halt, 2973 [EXIT_REASON_HLT] = handle_halt,
2974 [EXIT_REASON_INVLPG] = handle_invlpg,
2660 [EXIT_REASON_VMCALL] = handle_vmcall, 2975 [EXIT_REASON_VMCALL] = handle_vmcall,
2661 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, 2976 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
2662 [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 2977 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
@@ -2678,8 +2993,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2678 struct vcpu_vmx *vmx = to_vmx(vcpu); 2993 struct vcpu_vmx *vmx = to_vmx(vcpu);
2679 u32 vectoring_info = vmx->idt_vectoring_info; 2994 u32 vectoring_info = vmx->idt_vectoring_info;
2680 2995
2681 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), 2996 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
2682 (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); 2997 (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
2683 2998
2684 /* Access CR3 don't cause VMExit in paging mode, so we need 2999 /* Access CR3 don't cause VMExit in paging mode, so we need
2685 * to sync with guest real CR3. */ 3000 * to sync with guest real CR3. */
@@ -2736,64 +3051,128 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
2736 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 3051 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2737} 3052}
2738 3053
2739static void vmx_intr_assist(struct kvm_vcpu *vcpu) 3054static void enable_nmi_window(struct kvm_vcpu *vcpu)
2740{ 3055{
2741 struct vcpu_vmx *vmx = to_vmx(vcpu); 3056 u32 cpu_based_vm_exec_control;
2742 u32 idtv_info_field, intr_info_field;
2743 int has_ext_irq, interrupt_window_open;
2744 int vector;
2745
2746 update_tpr_threshold(vcpu);
2747 3057
2748 has_ext_irq = kvm_cpu_has_interrupt(vcpu); 3058 if (!cpu_has_virtual_nmis())
2749 intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
2750 idtv_info_field = vmx->idt_vectoring_info;
2751 if (intr_info_field & INTR_INFO_VALID_MASK) {
2752 if (idtv_info_field & INTR_INFO_VALID_MASK) {
2753 /* TODO: fault when IDT_Vectoring */
2754 if (printk_ratelimit())
2755 printk(KERN_ERR "Fault when IDT_Vectoring\n");
2756 }
2757 if (has_ext_irq)
2758 enable_irq_window(vcpu);
2759 return; 3059 return;
2760 }
2761 if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) {
2762 if ((idtv_info_field & VECTORING_INFO_TYPE_MASK)
2763 == INTR_TYPE_EXT_INTR
2764 && vcpu->arch.rmode.active) {
2765 u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK;
2766 3060
2767 vmx_inject_irq(vcpu, vect); 3061 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2768 if (unlikely(has_ext_irq)) 3062 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
2769 enable_irq_window(vcpu); 3063 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2770 return; 3064}
2771 }
2772 3065
2773 KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler); 3066static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
3067{
3068 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3069 return !(guest_intr & (GUEST_INTR_STATE_NMI |
3070 GUEST_INTR_STATE_MOV_SS |
3071 GUEST_INTR_STATE_STI));
3072}
2774 3073
2775 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); 3074static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
2776 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 3075{
2777 vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); 3076 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3077 return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
3078 GUEST_INTR_STATE_STI)) &&
3079 (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
3080}
2778 3081
2779 if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK)) 3082static void enable_intr_window(struct kvm_vcpu *vcpu)
2780 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, 3083{
2781 vmcs_read32(IDT_VECTORING_ERROR_CODE)); 3084 if (vcpu->arch.nmi_pending)
2782 if (unlikely(has_ext_irq)) 3085 enable_nmi_window(vcpu);
3086 else if (kvm_cpu_has_interrupt(vcpu))
3087 enable_irq_window(vcpu);
3088}
3089
3090static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3091{
3092 u32 exit_intr_info;
3093 u32 idt_vectoring_info;
3094 bool unblock_nmi;
3095 u8 vector;
3096 int type;
3097 bool idtv_info_valid;
3098 u32 error;
3099
3100 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
3101 if (cpu_has_virtual_nmis()) {
3102 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
3103 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
3104 /*
3105 * SDM 3: 25.7.1.2
3106 * Re-set bit "block by NMI" before VM entry if vmexit caused by
3107 * a guest IRET fault.
3108 */
3109 if (unblock_nmi && vector != DF_VECTOR)
3110 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
3111 GUEST_INTR_STATE_NMI);
3112 }
3113
3114 idt_vectoring_info = vmx->idt_vectoring_info;
3115 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
3116 vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
3117 type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
3118 if (vmx->vcpu.arch.nmi_injected) {
3119 /*
3120 * SDM 3: 25.7.1.2
3121 * Clear bit "block by NMI" before VM entry if a NMI delivery
3122 * faulted.
3123 */
3124 if (idtv_info_valid && type == INTR_TYPE_NMI_INTR)
3125 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
3126 GUEST_INTR_STATE_NMI);
3127 else
3128 vmx->vcpu.arch.nmi_injected = false;
3129 }
3130 kvm_clear_exception_queue(&vmx->vcpu);
3131 if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) {
3132 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
3133 error = vmcs_read32(IDT_VECTORING_ERROR_CODE);
3134 kvm_queue_exception_e(&vmx->vcpu, vector, error);
3135 } else
3136 kvm_queue_exception(&vmx->vcpu, vector);
3137 vmx->idt_vectoring_info = 0;
3138 }
3139 kvm_clear_interrupt_queue(&vmx->vcpu);
3140 if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) {
3141 kvm_queue_interrupt(&vmx->vcpu, vector);
3142 vmx->idt_vectoring_info = 0;
3143 }
3144}
3145
3146static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3147{
3148 update_tpr_threshold(vcpu);
3149
3150 if (cpu_has_virtual_nmis()) {
3151 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
3152 if (vmx_nmi_enabled(vcpu)) {
3153 vcpu->arch.nmi_pending = false;
3154 vcpu->arch.nmi_injected = true;
3155 } else {
3156 enable_intr_window(vcpu);
3157 return;
3158 }
3159 }
3160 if (vcpu->arch.nmi_injected) {
3161 vmx_inject_nmi(vcpu);
3162 enable_intr_window(vcpu);
3163 return;
3164 }
3165 }
3166 if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
3167 if (vmx_irq_enabled(vcpu))
3168 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
3169 else
2783 enable_irq_window(vcpu); 3170 enable_irq_window(vcpu);
2784 return;
2785 } 3171 }
2786 if (!has_ext_irq) 3172 if (vcpu->arch.interrupt.pending) {
2787 return; 3173 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
2788 interrupt_window_open = 3174 kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
2789 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && 3175 }
2790 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
2791 if (interrupt_window_open) {
2792 vector = kvm_cpu_get_interrupt(vcpu);
2793 vmx_inject_irq(vcpu, vector);
2794 kvm_timer_intr_post(vcpu, vector);
2795 } else
2796 enable_irq_window(vcpu);
2797} 3176}
2798 3177
2799/* 3178/*
@@ -2805,9 +3184,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
2805static void fixup_rmode_irq(struct vcpu_vmx *vmx) 3184static void fixup_rmode_irq(struct vcpu_vmx *vmx)
2806{ 3185{
2807 vmx->rmode.irq.pending = 0; 3186 vmx->rmode.irq.pending = 0;
2808 if (vmcs_readl(GUEST_RIP) + 1 != vmx->rmode.irq.rip) 3187 if (kvm_rip_read(&vmx->vcpu) + 1 != vmx->rmode.irq.rip)
2809 return; 3188 return;
2810 vmcs_writel(GUEST_RIP, vmx->rmode.irq.rip); 3189 kvm_rip_write(&vmx->vcpu, vmx->rmode.irq.rip);
2811 if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) { 3190 if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
2812 vmx->idt_vectoring_info &= ~VECTORING_INFO_TYPE_MASK; 3191 vmx->idt_vectoring_info &= ~VECTORING_INFO_TYPE_MASK;
2813 vmx->idt_vectoring_info |= INTR_TYPE_EXT_INTR; 3192 vmx->idt_vectoring_info |= INTR_TYPE_EXT_INTR;
@@ -2819,11 +3198,30 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx)
2819 | vmx->rmode.irq.vector; 3198 | vmx->rmode.irq.vector;
2820} 3199}
2821 3200
3201#ifdef CONFIG_X86_64
3202#define R "r"
3203#define Q "q"
3204#else
3205#define R "e"
3206#define Q "l"
3207#endif
3208
2822static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3209static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2823{ 3210{
2824 struct vcpu_vmx *vmx = to_vmx(vcpu); 3211 struct vcpu_vmx *vmx = to_vmx(vcpu);
2825 u32 intr_info; 3212 u32 intr_info;
2826 3213
3214 /* Handle invalid guest state instead of entering VMX */
3215 if (vmx->emulation_required && emulate_invalid_guest_state) {
3216 handle_invalid_guest_state(vcpu, kvm_run);
3217 return;
3218 }
3219
3220 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
3221 vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
3222 if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
3223 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
3224
2827 /* 3225 /*
2828 * Loading guest fpu may have cleared host cr0.ts 3226 * Loading guest fpu may have cleared host cr0.ts
2829 */ 3227 */
@@ -2831,26 +3229,25 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2831 3229
2832 asm( 3230 asm(
2833 /* Store host registers */ 3231 /* Store host registers */
2834#ifdef CONFIG_X86_64 3232 "push %%"R"dx; push %%"R"bp;"
2835 "push %%rdx; push %%rbp;" 3233 "push %%"R"cx \n\t"
2836 "push %%rcx \n\t" 3234 "cmp %%"R"sp, %c[host_rsp](%0) \n\t"
2837#else 3235 "je 1f \n\t"
2838 "push %%edx; push %%ebp;" 3236 "mov %%"R"sp, %c[host_rsp](%0) \n\t"
2839 "push %%ecx \n\t" 3237 __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
2840#endif 3238 "1: \n\t"
2841 ASM_VMX_VMWRITE_RSP_RDX "\n\t"
2842 /* Check if vmlaunch of vmresume is needed */ 3239 /* Check if vmlaunch of vmresume is needed */
2843 "cmpl $0, %c[launched](%0) \n\t" 3240 "cmpl $0, %c[launched](%0) \n\t"
2844 /* Load guest registers. Don't clobber flags. */ 3241 /* Load guest registers. Don't clobber flags. */
3242 "mov %c[cr2](%0), %%"R"ax \n\t"
3243 "mov %%"R"ax, %%cr2 \n\t"
3244 "mov %c[rax](%0), %%"R"ax \n\t"
3245 "mov %c[rbx](%0), %%"R"bx \n\t"
3246 "mov %c[rdx](%0), %%"R"dx \n\t"
3247 "mov %c[rsi](%0), %%"R"si \n\t"
3248 "mov %c[rdi](%0), %%"R"di \n\t"
3249 "mov %c[rbp](%0), %%"R"bp \n\t"
2845#ifdef CONFIG_X86_64 3250#ifdef CONFIG_X86_64
2846 "mov %c[cr2](%0), %%rax \n\t"
2847 "mov %%rax, %%cr2 \n\t"
2848 "mov %c[rax](%0), %%rax \n\t"
2849 "mov %c[rbx](%0), %%rbx \n\t"
2850 "mov %c[rdx](%0), %%rdx \n\t"
2851 "mov %c[rsi](%0), %%rsi \n\t"
2852 "mov %c[rdi](%0), %%rdi \n\t"
2853 "mov %c[rbp](%0), %%rbp \n\t"
2854 "mov %c[r8](%0), %%r8 \n\t" 3251 "mov %c[r8](%0), %%r8 \n\t"
2855 "mov %c[r9](%0), %%r9 \n\t" 3252 "mov %c[r9](%0), %%r9 \n\t"
2856 "mov %c[r10](%0), %%r10 \n\t" 3253 "mov %c[r10](%0), %%r10 \n\t"
@@ -2859,34 +3256,25 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2859 "mov %c[r13](%0), %%r13 \n\t" 3256 "mov %c[r13](%0), %%r13 \n\t"
2860 "mov %c[r14](%0), %%r14 \n\t" 3257 "mov %c[r14](%0), %%r14 \n\t"
2861 "mov %c[r15](%0), %%r15 \n\t" 3258 "mov %c[r15](%0), %%r15 \n\t"
2862 "mov %c[rcx](%0), %%rcx \n\t" /* kills %0 (rcx) */
2863#else
2864 "mov %c[cr2](%0), %%eax \n\t"
2865 "mov %%eax, %%cr2 \n\t"
2866 "mov %c[rax](%0), %%eax \n\t"
2867 "mov %c[rbx](%0), %%ebx \n\t"
2868 "mov %c[rdx](%0), %%edx \n\t"
2869 "mov %c[rsi](%0), %%esi \n\t"
2870 "mov %c[rdi](%0), %%edi \n\t"
2871 "mov %c[rbp](%0), %%ebp \n\t"
2872 "mov %c[rcx](%0), %%ecx \n\t" /* kills %0 (ecx) */
2873#endif 3259#endif
3260 "mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */
3261
2874 /* Enter guest mode */ 3262 /* Enter guest mode */
2875 "jne .Llaunched \n\t" 3263 "jne .Llaunched \n\t"
2876 ASM_VMX_VMLAUNCH "\n\t" 3264 __ex(ASM_VMX_VMLAUNCH) "\n\t"
2877 "jmp .Lkvm_vmx_return \n\t" 3265 "jmp .Lkvm_vmx_return \n\t"
2878 ".Llaunched: " ASM_VMX_VMRESUME "\n\t" 3266 ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t"
2879 ".Lkvm_vmx_return: " 3267 ".Lkvm_vmx_return: "
2880 /* Save guest registers, load host registers, keep flags */ 3268 /* Save guest registers, load host registers, keep flags */
3269 "xchg %0, (%%"R"sp) \n\t"
3270 "mov %%"R"ax, %c[rax](%0) \n\t"
3271 "mov %%"R"bx, %c[rbx](%0) \n\t"
3272 "push"Q" (%%"R"sp); pop"Q" %c[rcx](%0) \n\t"
3273 "mov %%"R"dx, %c[rdx](%0) \n\t"
3274 "mov %%"R"si, %c[rsi](%0) \n\t"
3275 "mov %%"R"di, %c[rdi](%0) \n\t"
3276 "mov %%"R"bp, %c[rbp](%0) \n\t"
2881#ifdef CONFIG_X86_64 3277#ifdef CONFIG_X86_64
2882 "xchg %0, (%%rsp) \n\t"
2883 "mov %%rax, %c[rax](%0) \n\t"
2884 "mov %%rbx, %c[rbx](%0) \n\t"
2885 "pushq (%%rsp); popq %c[rcx](%0) \n\t"
2886 "mov %%rdx, %c[rdx](%0) \n\t"
2887 "mov %%rsi, %c[rsi](%0) \n\t"
2888 "mov %%rdi, %c[rdi](%0) \n\t"
2889 "mov %%rbp, %c[rbp](%0) \n\t"
2890 "mov %%r8, %c[r8](%0) \n\t" 3278 "mov %%r8, %c[r8](%0) \n\t"
2891 "mov %%r9, %c[r9](%0) \n\t" 3279 "mov %%r9, %c[r9](%0) \n\t"
2892 "mov %%r10, %c[r10](%0) \n\t" 3280 "mov %%r10, %c[r10](%0) \n\t"
@@ -2895,28 +3283,16 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2895 "mov %%r13, %c[r13](%0) \n\t" 3283 "mov %%r13, %c[r13](%0) \n\t"
2896 "mov %%r14, %c[r14](%0) \n\t" 3284 "mov %%r14, %c[r14](%0) \n\t"
2897 "mov %%r15, %c[r15](%0) \n\t" 3285 "mov %%r15, %c[r15](%0) \n\t"
2898 "mov %%cr2, %%rax \n\t"
2899 "mov %%rax, %c[cr2](%0) \n\t"
2900
2901 "pop %%rbp; pop %%rbp; pop %%rdx \n\t"
2902#else
2903 "xchg %0, (%%esp) \n\t"
2904 "mov %%eax, %c[rax](%0) \n\t"
2905 "mov %%ebx, %c[rbx](%0) \n\t"
2906 "pushl (%%esp); popl %c[rcx](%0) \n\t"
2907 "mov %%edx, %c[rdx](%0) \n\t"
2908 "mov %%esi, %c[rsi](%0) \n\t"
2909 "mov %%edi, %c[rdi](%0) \n\t"
2910 "mov %%ebp, %c[rbp](%0) \n\t"
2911 "mov %%cr2, %%eax \n\t"
2912 "mov %%eax, %c[cr2](%0) \n\t"
2913
2914 "pop %%ebp; pop %%ebp; pop %%edx \n\t"
2915#endif 3286#endif
3287 "mov %%cr2, %%"R"ax \n\t"
3288 "mov %%"R"ax, %c[cr2](%0) \n\t"
3289
3290 "pop %%"R"bp; pop %%"R"bp; pop %%"R"dx \n\t"
2916 "setbe %c[fail](%0) \n\t" 3291 "setbe %c[fail](%0) \n\t"
2917 : : "c"(vmx), "d"((unsigned long)HOST_RSP), 3292 : : "c"(vmx), "d"((unsigned long)HOST_RSP),
2918 [launched]"i"(offsetof(struct vcpu_vmx, launched)), 3293 [launched]"i"(offsetof(struct vcpu_vmx, launched)),
2919 [fail]"i"(offsetof(struct vcpu_vmx, fail)), 3294 [fail]"i"(offsetof(struct vcpu_vmx, fail)),
3295 [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
2920 [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])), 3296 [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
2921 [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])), 3297 [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
2922 [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])), 3298 [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
@@ -2936,20 +3312,22 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2936#endif 3312#endif
2937 [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)) 3313 [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2))
2938 : "cc", "memory" 3314 : "cc", "memory"
3315 , R"bx", R"di", R"si"
2939#ifdef CONFIG_X86_64 3316#ifdef CONFIG_X86_64
2940 , "rbx", "rdi", "rsi"
2941 , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" 3317 , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2942#else
2943 , "ebx", "edi", "rsi"
2944#endif 3318#endif
2945 ); 3319 );
2946 3320
3321 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
3322 vcpu->arch.regs_dirty = 0;
3323
2947 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 3324 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
2948 if (vmx->rmode.irq.pending) 3325 if (vmx->rmode.irq.pending)
2949 fixup_rmode_irq(vmx); 3326 fixup_rmode_irq(vmx);
2950 3327
2951 vcpu->arch.interrupt_window_open = 3328 vcpu->arch.interrupt_window_open =
2952 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; 3329 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
3330 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
2953 3331
2954 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 3332 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
2955 vmx->launched = 1; 3333 vmx->launched = 1;
@@ -2957,18 +3335,24 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2957 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 3335 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
2958 3336
2959 /* We need to handle NMIs before interrupts are enabled */ 3337 /* We need to handle NMIs before interrupts are enabled */
2960 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ 3338 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
3339 (intr_info & INTR_INFO_VALID_MASK)) {
2961 KVMTRACE_0D(NMI, vcpu, handler); 3340 KVMTRACE_0D(NMI, vcpu, handler);
2962 asm("int $2"); 3341 asm("int $2");
2963 } 3342 }
3343
3344 vmx_complete_interrupts(vmx);
2964} 3345}
2965 3346
3347#undef R
3348#undef Q
3349
2966static void vmx_free_vmcs(struct kvm_vcpu *vcpu) 3350static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
2967{ 3351{
2968 struct vcpu_vmx *vmx = to_vmx(vcpu); 3352 struct vcpu_vmx *vmx = to_vmx(vcpu);
2969 3353
2970 if (vmx->vmcs) { 3354 if (vmx->vmcs) {
2971 on_each_cpu(__vcpu_clear, vmx, 1); 3355 vcpu_clear(vmx);
2972 free_vmcs(vmx->vmcs); 3356 free_vmcs(vmx->vmcs);
2973 vmx->vmcs = NULL; 3357 vmx->vmcs = NULL;
2974 } 3358 }
@@ -2999,15 +3383,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
2999 return ERR_PTR(-ENOMEM); 3383 return ERR_PTR(-ENOMEM);
3000 3384
3001 allocate_vpid(vmx); 3385 allocate_vpid(vmx);
3002 if (id == 0 && vm_need_ept()) {
3003 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3004 VMX_EPT_WRITABLE_MASK |
3005 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
3006 kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK,
3007 VMX_EPT_FAKE_DIRTY_MASK, 0ull,
3008 VMX_EPT_EXECUTABLE_MASK);
3009 kvm_enable_tdp();
3010 }
3011 3386
3012 err = kvm_vcpu_init(&vmx->vcpu, kvm, id); 3387 err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
3013 if (err) 3388 if (err)
@@ -3095,7 +3470,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
3095 .prepare_guest_switch = vmx_save_host_state, 3470 .prepare_guest_switch = vmx_save_host_state,
3096 .vcpu_load = vmx_vcpu_load, 3471 .vcpu_load = vmx_vcpu_load,
3097 .vcpu_put = vmx_vcpu_put, 3472 .vcpu_put = vmx_vcpu_put,
3098 .vcpu_decache = vmx_vcpu_decache,
3099 3473
3100 .set_guest_debug = set_guest_debug, 3474 .set_guest_debug = set_guest_debug,
3101 .guest_debug_pre = kvm_guest_debug_pre, 3475 .guest_debug_pre = kvm_guest_debug_pre,
@@ -3115,8 +3489,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
3115 .set_idt = vmx_set_idt, 3489 .set_idt = vmx_set_idt,
3116 .get_gdt = vmx_get_gdt, 3490 .get_gdt = vmx_get_gdt,
3117 .set_gdt = vmx_set_gdt, 3491 .set_gdt = vmx_set_gdt,
3118 .cache_regs = vcpu_load_rsp_rip, 3492 .cache_reg = vmx_cache_reg,
3119 .decache_regs = vcpu_put_rsp_rip,
3120 .get_rflags = vmx_get_rflags, 3493 .get_rflags = vmx_get_rflags,
3121 .set_rflags = vmx_set_rflags, 3494 .set_rflags = vmx_set_rflags,
3122 3495
@@ -3187,8 +3560,16 @@ static int __init vmx_init(void)
3187 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); 3560 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
3188 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); 3561 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
3189 3562
3190 if (cpu_has_vmx_ept()) 3563 if (vm_need_ept()) {
3191 bypass_guest_pf = 0; 3564 bypass_guest_pf = 0;
3565 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3566 VMX_EPT_WRITABLE_MASK |
3567 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
3568 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
3569 VMX_EPT_EXECUTABLE_MASK);
3570 kvm_enable_tdp();
3571 } else
3572 kvm_disable_tdp();
3192 3573
3193 if (bypass_guest_pf) 3574 if (bypass_guest_pf)
3194 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); 3575 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);