diff options
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx.c | 156 |
1 files changed, 115 insertions, 41 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 81fcbe9515c5..bf89ec2cfb82 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -69,6 +69,9 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); | |||
69 | static int __read_mostly vmm_exclusive = 1; | 69 | static int __read_mostly vmm_exclusive = 1; |
70 | module_param(vmm_exclusive, bool, S_IRUGO); | 70 | module_param(vmm_exclusive, bool, S_IRUGO); |
71 | 71 | ||
72 | static int __read_mostly yield_on_hlt = 1; | ||
73 | module_param(yield_on_hlt, bool, S_IRUGO); | ||
74 | |||
72 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | 75 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ |
73 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | 76 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) |
74 | #define KVM_GUEST_CR0_MASK \ | 77 | #define KVM_GUEST_CR0_MASK \ |
@@ -177,6 +180,7 @@ static int init_rmode(struct kvm *kvm); | |||
177 | static u64 construct_eptp(unsigned long root_hpa); | 180 | static u64 construct_eptp(unsigned long root_hpa); |
178 | static void kvm_cpu_vmxon(u64 addr); | 181 | static void kvm_cpu_vmxon(u64 addr); |
179 | static void kvm_cpu_vmxoff(void); | 182 | static void kvm_cpu_vmxoff(void); |
183 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | ||
180 | 184 | ||
181 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 185 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
182 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 186 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -188,6 +192,8 @@ static unsigned long *vmx_io_bitmap_b; | |||
188 | static unsigned long *vmx_msr_bitmap_legacy; | 192 | static unsigned long *vmx_msr_bitmap_legacy; |
189 | static unsigned long *vmx_msr_bitmap_longmode; | 193 | static unsigned long *vmx_msr_bitmap_longmode; |
190 | 194 | ||
195 | static bool cpu_has_load_ia32_efer; | ||
196 | |||
191 | static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); | 197 | static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); |
192 | static DEFINE_SPINLOCK(vmx_vpid_lock); | 198 | static DEFINE_SPINLOCK(vmx_vpid_lock); |
193 | 199 | ||
@@ -472,7 +478,7 @@ static void vmcs_clear(struct vmcs *vmcs) | |||
472 | u8 error; | 478 | u8 error; |
473 | 479 | ||
474 | asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" | 480 | asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" |
475 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | 481 | : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) |
476 | : "cc", "memory"); | 482 | : "cc", "memory"); |
477 | if (error) | 483 | if (error) |
478 | printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", | 484 | printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", |
@@ -485,7 +491,7 @@ static void vmcs_load(struct vmcs *vmcs) | |||
485 | u8 error; | 491 | u8 error; |
486 | 492 | ||
487 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" | 493 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" |
488 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | 494 | : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) |
489 | : "cc", "memory"); | 495 | : "cc", "memory"); |
490 | if (error) | 496 | if (error) |
491 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", | 497 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", |
@@ -565,10 +571,10 @@ static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) | |||
565 | 571 | ||
566 | static unsigned long vmcs_readl(unsigned long field) | 572 | static unsigned long vmcs_readl(unsigned long field) |
567 | { | 573 | { |
568 | unsigned long value; | 574 | unsigned long value = 0; |
569 | 575 | ||
570 | asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) | 576 | asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) |
571 | : "=a"(value) : "d"(field) : "cc"); | 577 | : "+a"(value) : "d"(field) : "cc"); |
572 | return value; | 578 | return value; |
573 | } | 579 | } |
574 | 580 | ||
@@ -661,6 +667,12 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | |||
661 | unsigned i; | 667 | unsigned i; |
662 | struct msr_autoload *m = &vmx->msr_autoload; | 668 | struct msr_autoload *m = &vmx->msr_autoload; |
663 | 669 | ||
670 | if (msr == MSR_EFER && cpu_has_load_ia32_efer) { | ||
671 | vmcs_clear_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER); | ||
672 | vmcs_clear_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER); | ||
673 | return; | ||
674 | } | ||
675 | |||
664 | for (i = 0; i < m->nr; ++i) | 676 | for (i = 0; i < m->nr; ++i) |
665 | if (m->guest[i].index == msr) | 677 | if (m->guest[i].index == msr) |
666 | break; | 678 | break; |
@@ -680,6 +692,14 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | |||
680 | unsigned i; | 692 | unsigned i; |
681 | struct msr_autoload *m = &vmx->msr_autoload; | 693 | struct msr_autoload *m = &vmx->msr_autoload; |
682 | 694 | ||
695 | if (msr == MSR_EFER && cpu_has_load_ia32_efer) { | ||
696 | vmcs_write64(GUEST_IA32_EFER, guest_val); | ||
697 | vmcs_write64(HOST_IA32_EFER, host_val); | ||
698 | vmcs_set_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER); | ||
699 | vmcs_set_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER); | ||
700 | return; | ||
701 | } | ||
702 | |||
683 | for (i = 0; i < m->nr; ++i) | 703 | for (i = 0; i < m->nr; ++i) |
684 | if (m->guest[i].index == msr) | 704 | if (m->guest[i].index == msr) |
685 | break; | 705 | break; |
@@ -1009,6 +1029,17 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
1009 | vmx_set_interrupt_shadow(vcpu, 0); | 1029 | vmx_set_interrupt_shadow(vcpu, 0); |
1010 | } | 1030 | } |
1011 | 1031 | ||
1032 | static void vmx_clear_hlt(struct kvm_vcpu *vcpu) | ||
1033 | { | ||
1034 | /* Ensure that we clear the HLT state in the VMCS. We don't need to | ||
1035 | * explicitly skip the instruction because if the HLT state is set, then | ||
1036 | * the instruction is already executing and RIP has already been | ||
1037 | * advanced. */ | ||
1038 | if (!yield_on_hlt && | ||
1039 | vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) | ||
1040 | vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); | ||
1041 | } | ||
1042 | |||
1012 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 1043 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
1013 | bool has_error_code, u32 error_code, | 1044 | bool has_error_code, u32 error_code, |
1014 | bool reinject) | 1045 | bool reinject) |
@@ -1035,6 +1066,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
1035 | intr_info |= INTR_TYPE_HARD_EXCEPTION; | 1066 | intr_info |= INTR_TYPE_HARD_EXCEPTION; |
1036 | 1067 | ||
1037 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); | 1068 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); |
1069 | vmx_clear_hlt(vcpu); | ||
1038 | } | 1070 | } |
1039 | 1071 | ||
1040 | static bool vmx_rdtscp_supported(void) | 1072 | static bool vmx_rdtscp_supported(void) |
@@ -1305,8 +1337,11 @@ static __init int vmx_disabled_by_bios(void) | |||
1305 | && tboot_enabled()) | 1337 | && tboot_enabled()) |
1306 | return 1; | 1338 | return 1; |
1307 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | 1339 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) |
1308 | && !tboot_enabled()) | 1340 | && !tboot_enabled()) { |
1341 | printk(KERN_WARNING "kvm: disable TXT in the BIOS or " | ||
1342 | " activate TXT before enabling KVM\n"); | ||
1309 | return 1; | 1343 | return 1; |
1344 | } | ||
1310 | } | 1345 | } |
1311 | 1346 | ||
1312 | return 0; | 1347 | return 0; |
@@ -1400,6 +1435,14 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, | |||
1400 | return 0; | 1435 | return 0; |
1401 | } | 1436 | } |
1402 | 1437 | ||
1438 | static __init bool allow_1_setting(u32 msr, u32 ctl) | ||
1439 | { | ||
1440 | u32 vmx_msr_low, vmx_msr_high; | ||
1441 | |||
1442 | rdmsr(msr, vmx_msr_low, vmx_msr_high); | ||
1443 | return vmx_msr_high & ctl; | ||
1444 | } | ||
1445 | |||
1403 | static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | 1446 | static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) |
1404 | { | 1447 | { |
1405 | u32 vmx_msr_low, vmx_msr_high; | 1448 | u32 vmx_msr_low, vmx_msr_high; |
@@ -1416,7 +1459,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1416 | &_pin_based_exec_control) < 0) | 1459 | &_pin_based_exec_control) < 0) |
1417 | return -EIO; | 1460 | return -EIO; |
1418 | 1461 | ||
1419 | min = CPU_BASED_HLT_EXITING | | 1462 | min = |
1420 | #ifdef CONFIG_X86_64 | 1463 | #ifdef CONFIG_X86_64 |
1421 | CPU_BASED_CR8_LOAD_EXITING | | 1464 | CPU_BASED_CR8_LOAD_EXITING | |
1422 | CPU_BASED_CR8_STORE_EXITING | | 1465 | CPU_BASED_CR8_STORE_EXITING | |
@@ -1429,6 +1472,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1429 | CPU_BASED_MWAIT_EXITING | | 1472 | CPU_BASED_MWAIT_EXITING | |
1430 | CPU_BASED_MONITOR_EXITING | | 1473 | CPU_BASED_MONITOR_EXITING | |
1431 | CPU_BASED_INVLPG_EXITING; | 1474 | CPU_BASED_INVLPG_EXITING; |
1475 | |||
1476 | if (yield_on_hlt) | ||
1477 | min |= CPU_BASED_HLT_EXITING; | ||
1478 | |||
1432 | opt = CPU_BASED_TPR_SHADOW | | 1479 | opt = CPU_BASED_TPR_SHADOW | |
1433 | CPU_BASED_USE_MSR_BITMAPS | | 1480 | CPU_BASED_USE_MSR_BITMAPS | |
1434 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 1481 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
@@ -1510,6 +1557,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1510 | vmcs_conf->vmexit_ctrl = _vmexit_control; | 1557 | vmcs_conf->vmexit_ctrl = _vmexit_control; |
1511 | vmcs_conf->vmentry_ctrl = _vmentry_control; | 1558 | vmcs_conf->vmentry_ctrl = _vmentry_control; |
1512 | 1559 | ||
1560 | cpu_has_load_ia32_efer = | ||
1561 | allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS, | ||
1562 | VM_ENTRY_LOAD_IA32_EFER) | ||
1563 | && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS, | ||
1564 | VM_EXIT_LOAD_IA32_EFER); | ||
1565 | |||
1513 | return 0; | 1566 | return 0; |
1514 | } | 1567 | } |
1515 | 1568 | ||
@@ -1683,9 +1736,13 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save) | |||
1683 | save->limit = vmcs_read32(sf->limit); | 1736 | save->limit = vmcs_read32(sf->limit); |
1684 | save->ar = vmcs_read32(sf->ar_bytes); | 1737 | save->ar = vmcs_read32(sf->ar_bytes); |
1685 | vmcs_write16(sf->selector, save->base >> 4); | 1738 | vmcs_write16(sf->selector, save->base >> 4); |
1686 | vmcs_write32(sf->base, save->base & 0xfffff); | 1739 | vmcs_write32(sf->base, save->base & 0xffff0); |
1687 | vmcs_write32(sf->limit, 0xffff); | 1740 | vmcs_write32(sf->limit, 0xffff); |
1688 | vmcs_write32(sf->ar_bytes, 0xf3); | 1741 | vmcs_write32(sf->ar_bytes, 0xf3); |
1742 | if (save->base & 0xf) | ||
1743 | printk_once(KERN_WARNING "kvm: segment base is not paragraph" | ||
1744 | " aligned when entering protected mode (seg=%d)", | ||
1745 | seg); | ||
1689 | } | 1746 | } |
1690 | 1747 | ||
1691 | static void enter_rmode(struct kvm_vcpu *vcpu) | 1748 | static void enter_rmode(struct kvm_vcpu *vcpu) |
@@ -1814,6 +1871,13 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | |||
1814 | vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; | 1871 | vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; |
1815 | } | 1872 | } |
1816 | 1873 | ||
1874 | static void vmx_decache_cr3(struct kvm_vcpu *vcpu) | ||
1875 | { | ||
1876 | if (enable_ept && is_paging(vcpu)) | ||
1877 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | ||
1878 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||
1879 | } | ||
1880 | |||
1817 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 1881 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
1818 | { | 1882 | { |
1819 | ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; | 1883 | ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; |
@@ -1857,6 +1921,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1857 | unsigned long cr0, | 1921 | unsigned long cr0, |
1858 | struct kvm_vcpu *vcpu) | 1922 | struct kvm_vcpu *vcpu) |
1859 | { | 1923 | { |
1924 | vmx_decache_cr3(vcpu); | ||
1860 | if (!(cr0 & X86_CR0_PG)) { | 1925 | if (!(cr0 & X86_CR0_PG)) { |
1861 | /* From paging/starting to nonpaging */ | 1926 | /* From paging/starting to nonpaging */ |
1862 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | 1927 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, |
@@ -1937,7 +2002,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
1937 | if (enable_ept) { | 2002 | if (enable_ept) { |
1938 | eptp = construct_eptp(cr3); | 2003 | eptp = construct_eptp(cr3); |
1939 | vmcs_write64(EPT_POINTER, eptp); | 2004 | vmcs_write64(EPT_POINTER, eptp); |
1940 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : | 2005 | guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) : |
1941 | vcpu->kvm->arch.ept_identity_map_addr; | 2006 | vcpu->kvm->arch.ept_identity_map_addr; |
1942 | ept_load_pdptrs(vcpu); | 2007 | ept_load_pdptrs(vcpu); |
1943 | } | 2008 | } |
@@ -2725,7 +2790,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2725 | vmcs_writel(GUEST_IDTR_BASE, 0); | 2790 | vmcs_writel(GUEST_IDTR_BASE, 0); |
2726 | vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); | 2791 | vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); |
2727 | 2792 | ||
2728 | vmcs_write32(GUEST_ACTIVITY_STATE, 0); | 2793 | vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); |
2729 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); | 2794 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); |
2730 | vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); | 2795 | vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); |
2731 | 2796 | ||
@@ -2787,6 +2852,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
2787 | return; | 2852 | return; |
2788 | } | 2853 | } |
2789 | 2854 | ||
2855 | if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { | ||
2856 | enable_irq_window(vcpu); | ||
2857 | return; | ||
2858 | } | ||
2790 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 2859 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
2791 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | 2860 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; |
2792 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 2861 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
@@ -2814,6 +2883,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) | |||
2814 | } else | 2883 | } else |
2815 | intr |= INTR_TYPE_EXT_INTR; | 2884 | intr |= INTR_TYPE_EXT_INTR; |
2816 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); | 2885 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); |
2886 | vmx_clear_hlt(vcpu); | ||
2817 | } | 2887 | } |
2818 | 2888 | ||
2819 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | 2889 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) |
@@ -2841,6 +2911,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
2841 | } | 2911 | } |
2842 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 2912 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
2843 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 2913 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
2914 | vmx_clear_hlt(vcpu); | ||
2844 | } | 2915 | } |
2845 | 2916 | ||
2846 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | 2917 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) |
@@ -2849,7 +2920,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2849 | return 0; | 2920 | return 0; |
2850 | 2921 | ||
2851 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 2922 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
2852 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI)); | 2923 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI |
2924 | | GUEST_INTR_STATE_NMI)); | ||
2853 | } | 2925 | } |
2854 | 2926 | ||
2855 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | 2927 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) |
@@ -2910,7 +2982,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
2910 | * Cause the #SS fault with 0 error code in VM86 mode. | 2982 | * Cause the #SS fault with 0 error code in VM86 mode. |
2911 | */ | 2983 | */ |
2912 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) | 2984 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) |
2913 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) | 2985 | if (emulate_instruction(vcpu, 0) == EMULATE_DONE) |
2914 | return 1; | 2986 | return 1; |
2915 | /* | 2987 | /* |
2916 | * Forward all other exceptions that are valid in real mode. | 2988 | * Forward all other exceptions that are valid in real mode. |
@@ -3007,7 +3079,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3007 | } | 3079 | } |
3008 | 3080 | ||
3009 | if (is_invalid_opcode(intr_info)) { | 3081 | if (is_invalid_opcode(intr_info)) { |
3010 | er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD); | 3082 | er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); |
3011 | if (er != EMULATE_DONE) | 3083 | if (er != EMULATE_DONE) |
3012 | kvm_queue_exception(vcpu, UD_VECTOR); | 3084 | kvm_queue_exception(vcpu, UD_VECTOR); |
3013 | return 1; | 3085 | return 1; |
@@ -3026,7 +3098,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3026 | 3098 | ||
3027 | if (kvm_event_needs_reinjection(vcpu)) | 3099 | if (kvm_event_needs_reinjection(vcpu)) |
3028 | kvm_mmu_unprotect_page_virt(vcpu, cr2); | 3100 | kvm_mmu_unprotect_page_virt(vcpu, cr2); |
3029 | return kvm_mmu_page_fault(vcpu, cr2, error_code); | 3101 | return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); |
3030 | } | 3102 | } |
3031 | 3103 | ||
3032 | if (vmx->rmode.vm86_active && | 3104 | if (vmx->rmode.vm86_active && |
@@ -3098,7 +3170,7 @@ static int handle_io(struct kvm_vcpu *vcpu) | |||
3098 | ++vcpu->stat.io_exits; | 3170 | ++vcpu->stat.io_exits; |
3099 | 3171 | ||
3100 | if (string || in) | 3172 | if (string || in) |
3101 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; | 3173 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
3102 | 3174 | ||
3103 | port = exit_qualification >> 16; | 3175 | port = exit_qualification >> 16; |
3104 | size = (exit_qualification & 7) + 1; | 3176 | size = (exit_qualification & 7) + 1; |
@@ -3118,14 +3190,6 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
3118 | hypercall[2] = 0xc1; | 3190 | hypercall[2] = 0xc1; |
3119 | } | 3191 | } |
3120 | 3192 | ||
3121 | static void complete_insn_gp(struct kvm_vcpu *vcpu, int err) | ||
3122 | { | ||
3123 | if (err) | ||
3124 | kvm_inject_gp(vcpu, 0); | ||
3125 | else | ||
3126 | skip_emulated_instruction(vcpu); | ||
3127 | } | ||
3128 | |||
3129 | static int handle_cr(struct kvm_vcpu *vcpu) | 3193 | static int handle_cr(struct kvm_vcpu *vcpu) |
3130 | { | 3194 | { |
3131 | unsigned long exit_qualification, val; | 3195 | unsigned long exit_qualification, val; |
@@ -3143,21 +3207,21 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3143 | switch (cr) { | 3207 | switch (cr) { |
3144 | case 0: | 3208 | case 0: |
3145 | err = kvm_set_cr0(vcpu, val); | 3209 | err = kvm_set_cr0(vcpu, val); |
3146 | complete_insn_gp(vcpu, err); | 3210 | kvm_complete_insn_gp(vcpu, err); |
3147 | return 1; | 3211 | return 1; |
3148 | case 3: | 3212 | case 3: |
3149 | err = kvm_set_cr3(vcpu, val); | 3213 | err = kvm_set_cr3(vcpu, val); |
3150 | complete_insn_gp(vcpu, err); | 3214 | kvm_complete_insn_gp(vcpu, err); |
3151 | return 1; | 3215 | return 1; |
3152 | case 4: | 3216 | case 4: |
3153 | err = kvm_set_cr4(vcpu, val); | 3217 | err = kvm_set_cr4(vcpu, val); |
3154 | complete_insn_gp(vcpu, err); | 3218 | kvm_complete_insn_gp(vcpu, err); |
3155 | return 1; | 3219 | return 1; |
3156 | case 8: { | 3220 | case 8: { |
3157 | u8 cr8_prev = kvm_get_cr8(vcpu); | 3221 | u8 cr8_prev = kvm_get_cr8(vcpu); |
3158 | u8 cr8 = kvm_register_read(vcpu, reg); | 3222 | u8 cr8 = kvm_register_read(vcpu, reg); |
3159 | kvm_set_cr8(vcpu, cr8); | 3223 | err = kvm_set_cr8(vcpu, cr8); |
3160 | skip_emulated_instruction(vcpu); | 3224 | kvm_complete_insn_gp(vcpu, err); |
3161 | if (irqchip_in_kernel(vcpu->kvm)) | 3225 | if (irqchip_in_kernel(vcpu->kvm)) |
3162 | return 1; | 3226 | return 1; |
3163 | if (cr8_prev <= cr8) | 3227 | if (cr8_prev <= cr8) |
@@ -3176,8 +3240,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3176 | case 1: /*mov from cr*/ | 3240 | case 1: /*mov from cr*/ |
3177 | switch (cr) { | 3241 | switch (cr) { |
3178 | case 3: | 3242 | case 3: |
3179 | kvm_register_write(vcpu, reg, vcpu->arch.cr3); | 3243 | val = kvm_read_cr3(vcpu); |
3180 | trace_kvm_cr_read(cr, vcpu->arch.cr3); | 3244 | kvm_register_write(vcpu, reg, val); |
3245 | trace_kvm_cr_read(cr, val); | ||
3181 | skip_emulated_instruction(vcpu); | 3246 | skip_emulated_instruction(vcpu); |
3182 | return 1; | 3247 | return 1; |
3183 | case 8: | 3248 | case 8: |
@@ -3349,6 +3414,11 @@ static int handle_vmx_insn(struct kvm_vcpu *vcpu) | |||
3349 | return 1; | 3414 | return 1; |
3350 | } | 3415 | } |
3351 | 3416 | ||
3417 | static int handle_invd(struct kvm_vcpu *vcpu) | ||
3418 | { | ||
3419 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; | ||
3420 | } | ||
3421 | |||
3352 | static int handle_invlpg(struct kvm_vcpu *vcpu) | 3422 | static int handle_invlpg(struct kvm_vcpu *vcpu) |
3353 | { | 3423 | { |
3354 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3424 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
@@ -3377,7 +3447,7 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) | |||
3377 | 3447 | ||
3378 | static int handle_apic_access(struct kvm_vcpu *vcpu) | 3448 | static int handle_apic_access(struct kvm_vcpu *vcpu) |
3379 | { | 3449 | { |
3380 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; | 3450 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
3381 | } | 3451 | } |
3382 | 3452 | ||
3383 | static int handle_task_switch(struct kvm_vcpu *vcpu) | 3453 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
@@ -3476,7 +3546,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
3476 | 3546 | ||
3477 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | 3547 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); |
3478 | trace_kvm_page_fault(gpa, exit_qualification); | 3548 | trace_kvm_page_fault(gpa, exit_qualification); |
3479 | return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); | 3549 | return kvm_mmu_page_fault(vcpu, gpa, exit_qualification & 0x3, NULL, 0); |
3480 | } | 3550 | } |
3481 | 3551 | ||
3482 | static u64 ept_rsvd_mask(u64 spte, int level) | 3552 | static u64 ept_rsvd_mask(u64 spte, int level) |
@@ -3592,7 +3662,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
3592 | && (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF)) | 3662 | && (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF)) |
3593 | return handle_interrupt_window(&vmx->vcpu); | 3663 | return handle_interrupt_window(&vmx->vcpu); |
3594 | 3664 | ||
3595 | err = emulate_instruction(vcpu, 0, 0, 0); | 3665 | err = emulate_instruction(vcpu, 0); |
3596 | 3666 | ||
3597 | if (err == EMULATE_DO_MMIO) { | 3667 | if (err == EMULATE_DO_MMIO) { |
3598 | ret = 0; | 3668 | ret = 0; |
@@ -3649,6 +3719,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
3649 | [EXIT_REASON_MSR_WRITE] = handle_wrmsr, | 3719 | [EXIT_REASON_MSR_WRITE] = handle_wrmsr, |
3650 | [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, | 3720 | [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, |
3651 | [EXIT_REASON_HLT] = handle_halt, | 3721 | [EXIT_REASON_HLT] = handle_halt, |
3722 | [EXIT_REASON_INVD] = handle_invd, | ||
3652 | [EXIT_REASON_INVLPG] = handle_invlpg, | 3723 | [EXIT_REASON_INVLPG] = handle_invlpg, |
3653 | [EXIT_REASON_VMCALL] = handle_vmcall, | 3724 | [EXIT_REASON_VMCALL] = handle_vmcall, |
3654 | [EXIT_REASON_VMCLEAR] = handle_vmx_insn, | 3725 | [EXIT_REASON_VMCLEAR] = handle_vmx_insn, |
@@ -3676,6 +3747,12 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
3676 | static const int kvm_vmx_max_exit_handlers = | 3747 | static const int kvm_vmx_max_exit_handlers = |
3677 | ARRAY_SIZE(kvm_vmx_exit_handlers); | 3748 | ARRAY_SIZE(kvm_vmx_exit_handlers); |
3678 | 3749 | ||
3750 | static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) | ||
3751 | { | ||
3752 | *info1 = vmcs_readl(EXIT_QUALIFICATION); | ||
3753 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); | ||
3754 | } | ||
3755 | |||
3679 | /* | 3756 | /* |
3680 | * The guest has exited. See if we can fix it or if we need userspace | 3757 | * The guest has exited. See if we can fix it or if we need userspace |
3681 | * assistance. | 3758 | * assistance. |
@@ -3686,17 +3763,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
3686 | u32 exit_reason = vmx->exit_reason; | 3763 | u32 exit_reason = vmx->exit_reason; |
3687 | u32 vectoring_info = vmx->idt_vectoring_info; | 3764 | u32 vectoring_info = vmx->idt_vectoring_info; |
3688 | 3765 | ||
3689 | trace_kvm_exit(exit_reason, vcpu); | 3766 | trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); |
3690 | 3767 | ||
3691 | /* If guest state is invalid, start emulating */ | 3768 | /* If guest state is invalid, start emulating */ |
3692 | if (vmx->emulation_required && emulate_invalid_guest_state) | 3769 | if (vmx->emulation_required && emulate_invalid_guest_state) |
3693 | return handle_invalid_guest_state(vcpu); | 3770 | return handle_invalid_guest_state(vcpu); |
3694 | 3771 | ||
3695 | /* Access CR3 don't cause VMExit in paging mode, so we need | ||
3696 | * to sync with guest real CR3. */ | ||
3697 | if (enable_ept && is_paging(vcpu)) | ||
3698 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | ||
3699 | |||
3700 | if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { | 3772 | if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { |
3701 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 3773 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
3702 | vcpu->run->fail_entry.hardware_entry_failure_reason | 3774 | vcpu->run->fail_entry.hardware_entry_failure_reason |
@@ -4013,7 +4085,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4013 | ); | 4085 | ); |
4014 | 4086 | ||
4015 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) | 4087 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) |
4016 | | (1 << VCPU_EXREG_PDPTR)); | 4088 | | (1 << VCPU_EXREG_PDPTR) |
4089 | | (1 << VCPU_EXREG_CR3)); | ||
4017 | vcpu->arch.regs_dirty = 0; | 4090 | vcpu->arch.regs_dirty = 0; |
4018 | 4091 | ||
4019 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 4092 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
@@ -4280,6 +4353,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4280 | .get_cpl = vmx_get_cpl, | 4353 | .get_cpl = vmx_get_cpl, |
4281 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, | 4354 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, |
4282 | .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, | 4355 | .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, |
4356 | .decache_cr3 = vmx_decache_cr3, | ||
4283 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, | 4357 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, |
4284 | .set_cr0 = vmx_set_cr0, | 4358 | .set_cr0 = vmx_set_cr0, |
4285 | .set_cr3 = vmx_set_cr3, | 4359 | .set_cr3 = vmx_set_cr3, |
@@ -4320,7 +4394,9 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4320 | .get_tdp_level = get_ept_level, | 4394 | .get_tdp_level = get_ept_level, |
4321 | .get_mt_mask = vmx_get_mt_mask, | 4395 | .get_mt_mask = vmx_get_mt_mask, |
4322 | 4396 | ||
4397 | .get_exit_info = vmx_get_exit_info, | ||
4323 | .exit_reasons_str = vmx_exit_reasons_str, | 4398 | .exit_reasons_str = vmx_exit_reasons_str, |
4399 | |||
4324 | .get_lpage_level = vmx_get_lpage_level, | 4400 | .get_lpage_level = vmx_get_lpage_level, |
4325 | 4401 | ||
4326 | .cpuid_update = vmx_cpuid_update, | 4402 | .cpuid_update = vmx_cpuid_update, |
@@ -4396,8 +4472,6 @@ static int __init vmx_init(void) | |||
4396 | 4472 | ||
4397 | if (enable_ept) { | 4473 | if (enable_ept) { |
4398 | bypass_guest_pf = 0; | 4474 | bypass_guest_pf = 0; |
4399 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | ||
4400 | VMX_EPT_WRITABLE_MASK); | ||
4401 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, | 4475 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, |
4402 | VMX_EPT_EXECUTABLE_MASK); | 4476 | VMX_EPT_EXECUTABLE_MASK); |
4403 | kvm_enable_tdp(); | 4477 | kvm_enable_tdp(); |