aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c396
1 files changed, 274 insertions, 122 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d4918d6fc924..14873b9f8430 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -61,6 +61,21 @@ module_param_named(unrestricted_guest,
61static int __read_mostly emulate_invalid_guest_state = 0; 61static int __read_mostly emulate_invalid_guest_state = 0;
62module_param(emulate_invalid_guest_state, bool, S_IRUGO); 62module_param(emulate_invalid_guest_state, bool, S_IRUGO);
63 63
64#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
65 (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
66#define KVM_GUEST_CR0_MASK \
67 (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
68#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
69 (X86_CR0_WP | X86_CR0_NE)
70#define KVM_VM_CR0_ALWAYS_ON \
71 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
72#define KVM_CR4_GUEST_OWNED_BITS \
73 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
74 | X86_CR4_OSXMMEXCPT)
75
76#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
77#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
78
64/* 79/*
65 * These 2 parameters are used to config the controls for Pause-Loop Exiting: 80 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
66 * ple_gap: upper bound on the amount of time between two successive 81 * ple_gap: upper bound on the amount of time between two successive
@@ -136,6 +151,8 @@ struct vcpu_vmx {
136 ktime_t entry_time; 151 ktime_t entry_time;
137 s64 vnmi_blocked_time; 152 s64 vnmi_blocked_time;
138 u32 exit_reason; 153 u32 exit_reason;
154
155 bool rdtscp_enabled;
139}; 156};
140 157
141static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 158static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -210,7 +227,7 @@ static const u32 vmx_msr_index[] = {
210#ifdef CONFIG_X86_64 227#ifdef CONFIG_X86_64
211 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, 228 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
212#endif 229#endif
213 MSR_EFER, MSR_K6_STAR, 230 MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR,
214}; 231};
215#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) 232#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
216 233
@@ -301,6 +318,11 @@ static inline bool cpu_has_vmx_ept_2m_page(void)
301 return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); 318 return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT);
302} 319}
303 320
321static inline bool cpu_has_vmx_ept_1g_page(void)
322{
323 return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT);
324}
325
304static inline int cpu_has_vmx_invept_individual_addr(void) 326static inline int cpu_has_vmx_invept_individual_addr(void)
305{ 327{
306 return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); 328 return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT);
@@ -336,9 +358,7 @@ static inline int cpu_has_vmx_ple(void)
336 358
337static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) 359static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
338{ 360{
339 return flexpriority_enabled && 361 return flexpriority_enabled && irqchip_in_kernel(kvm);
340 (cpu_has_vmx_virtualize_apic_accesses()) &&
341 (irqchip_in_kernel(kvm));
342} 362}
343 363
344static inline int cpu_has_vmx_vpid(void) 364static inline int cpu_has_vmx_vpid(void)
@@ -347,6 +367,12 @@ static inline int cpu_has_vmx_vpid(void)
347 SECONDARY_EXEC_ENABLE_VPID; 367 SECONDARY_EXEC_ENABLE_VPID;
348} 368}
349 369
370static inline int cpu_has_vmx_rdtscp(void)
371{
372 return vmcs_config.cpu_based_2nd_exec_ctrl &
373 SECONDARY_EXEC_RDTSCP;
374}
375
350static inline int cpu_has_virtual_nmis(void) 376static inline int cpu_has_virtual_nmis(void)
351{ 377{
352 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; 378 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
@@ -551,22 +577,18 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
551{ 577{
552 u32 eb; 578 u32 eb;
553 579
554 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR); 580 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
555 if (!vcpu->fpu_active) 581 (1u << NM_VECTOR) | (1u << DB_VECTOR);
556 eb |= 1u << NM_VECTOR; 582 if ((vcpu->guest_debug &
557 /* 583 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
558 * Unconditionally intercept #DB so we can maintain dr6 without 584 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
559 * reading it every exit. 585 eb |= 1u << BP_VECTOR;
560 */
561 eb |= 1u << DB_VECTOR;
562 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
563 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
564 eb |= 1u << BP_VECTOR;
565 }
566 if (to_vmx(vcpu)->rmode.vm86_active) 586 if (to_vmx(vcpu)->rmode.vm86_active)
567 eb = ~0; 587 eb = ~0;
568 if (enable_ept) 588 if (enable_ept)
569 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ 589 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
590 if (vcpu->fpu_active)
591 eb &= ~(1u << NM_VECTOR);
570 vmcs_write32(EXCEPTION_BITMAP, eb); 592 vmcs_write32(EXCEPTION_BITMAP, eb);
571} 593}
572 594
@@ -589,7 +611,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
589 u64 guest_efer; 611 u64 guest_efer;
590 u64 ignore_bits; 612 u64 ignore_bits;
591 613
592 guest_efer = vmx->vcpu.arch.shadow_efer; 614 guest_efer = vmx->vcpu.arch.efer;
593 615
594 /* 616 /*
595 * NX is emulated; LMA and LME handled by hardware; SCE meaninless 617 * NX is emulated; LMA and LME handled by hardware; SCE meaninless
@@ -767,22 +789,30 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
767 789
768static void vmx_fpu_activate(struct kvm_vcpu *vcpu) 790static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
769{ 791{
792 ulong cr0;
793
770 if (vcpu->fpu_active) 794 if (vcpu->fpu_active)
771 return; 795 return;
772 vcpu->fpu_active = 1; 796 vcpu->fpu_active = 1;
773 vmcs_clear_bits(GUEST_CR0, X86_CR0_TS); 797 cr0 = vmcs_readl(GUEST_CR0);
774 if (vcpu->arch.cr0 & X86_CR0_TS) 798 cr0 &= ~(X86_CR0_TS | X86_CR0_MP);
775 vmcs_set_bits(GUEST_CR0, X86_CR0_TS); 799 cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP);
800 vmcs_writel(GUEST_CR0, cr0);
776 update_exception_bitmap(vcpu); 801 update_exception_bitmap(vcpu);
802 vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
803 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
777} 804}
778 805
806static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
807
779static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) 808static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
780{ 809{
781 if (!vcpu->fpu_active) 810 vmx_decache_cr0_guest_bits(vcpu);
782 return; 811 vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP);
783 vcpu->fpu_active = 0;
784 vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
785 update_exception_bitmap(vcpu); 812 update_exception_bitmap(vcpu);
813 vcpu->arch.cr0_guest_owned_bits = 0;
814 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
815 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
786} 816}
787 817
788static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 818static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
@@ -878,6 +908,11 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
878 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); 908 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
879} 909}
880 910
911static bool vmx_rdtscp_supported(void)
912{
913 return cpu_has_vmx_rdtscp();
914}
915
881/* 916/*
882 * Swap MSR entry in host/guest MSR entry array. 917 * Swap MSR entry in host/guest MSR entry array.
883 */ 918 */
@@ -913,12 +948,15 @@ static void setup_msrs(struct vcpu_vmx *vmx)
913 index = __find_msr_index(vmx, MSR_CSTAR); 948 index = __find_msr_index(vmx, MSR_CSTAR);
914 if (index >= 0) 949 if (index >= 0)
915 move_msr_up(vmx, index, save_nmsrs++); 950 move_msr_up(vmx, index, save_nmsrs++);
951 index = __find_msr_index(vmx, MSR_TSC_AUX);
952 if (index >= 0 && vmx->rdtscp_enabled)
953 move_msr_up(vmx, index, save_nmsrs++);
916 /* 954 /*
917 * MSR_K6_STAR is only needed on long mode guests, and only 955 * MSR_K6_STAR is only needed on long mode guests, and only
918 * if efer.sce is enabled. 956 * if efer.sce is enabled.
919 */ 957 */
920 index = __find_msr_index(vmx, MSR_K6_STAR); 958 index = __find_msr_index(vmx, MSR_K6_STAR);
921 if ((index >= 0) && (vmx->vcpu.arch.shadow_efer & EFER_SCE)) 959 if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))
922 move_msr_up(vmx, index, save_nmsrs++); 960 move_msr_up(vmx, index, save_nmsrs++);
923 } 961 }
924#endif 962#endif
@@ -1002,6 +1040,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1002 case MSR_IA32_SYSENTER_ESP: 1040 case MSR_IA32_SYSENTER_ESP:
1003 data = vmcs_readl(GUEST_SYSENTER_ESP); 1041 data = vmcs_readl(GUEST_SYSENTER_ESP);
1004 break; 1042 break;
1043 case MSR_TSC_AUX:
1044 if (!to_vmx(vcpu)->rdtscp_enabled)
1045 return 1;
1046 /* Otherwise falls through */
1005 default: 1047 default:
1006 vmx_load_host_state(to_vmx(vcpu)); 1048 vmx_load_host_state(to_vmx(vcpu));
1007 msr = find_msr_entry(to_vmx(vcpu), msr_index); 1049 msr = find_msr_entry(to_vmx(vcpu), msr_index);
@@ -1065,7 +1107,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1065 vcpu->arch.pat = data; 1107 vcpu->arch.pat = data;
1066 break; 1108 break;
1067 } 1109 }
1068 /* Otherwise falls through to kvm_set_msr_common */ 1110 ret = kvm_set_msr_common(vcpu, msr_index, data);
1111 break;
1112 case MSR_TSC_AUX:
1113 if (!vmx->rdtscp_enabled)
1114 return 1;
1115 /* Check reserved bit, higher 32 bits should be zero */
1116 if ((data >> 32) != 0)
1117 return 1;
1118 /* Otherwise falls through */
1069 default: 1119 default:
1070 msr = find_msr_entry(vmx, msr_index); 1120 msr = find_msr_entry(vmx, msr_index);
1071 if (msr) { 1121 if (msr) {
@@ -1224,6 +1274,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1224 CPU_BASED_USE_IO_BITMAPS | 1274 CPU_BASED_USE_IO_BITMAPS |
1225 CPU_BASED_MOV_DR_EXITING | 1275 CPU_BASED_MOV_DR_EXITING |
1226 CPU_BASED_USE_TSC_OFFSETING | 1276 CPU_BASED_USE_TSC_OFFSETING |
1277 CPU_BASED_MWAIT_EXITING |
1278 CPU_BASED_MONITOR_EXITING |
1227 CPU_BASED_INVLPG_EXITING; 1279 CPU_BASED_INVLPG_EXITING;
1228 opt = CPU_BASED_TPR_SHADOW | 1280 opt = CPU_BASED_TPR_SHADOW |
1229 CPU_BASED_USE_MSR_BITMAPS | 1281 CPU_BASED_USE_MSR_BITMAPS |
@@ -1243,7 +1295,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1243 SECONDARY_EXEC_ENABLE_VPID | 1295 SECONDARY_EXEC_ENABLE_VPID |
1244 SECONDARY_EXEC_ENABLE_EPT | 1296 SECONDARY_EXEC_ENABLE_EPT |
1245 SECONDARY_EXEC_UNRESTRICTED_GUEST | 1297 SECONDARY_EXEC_UNRESTRICTED_GUEST |
1246 SECONDARY_EXEC_PAUSE_LOOP_EXITING; 1298 SECONDARY_EXEC_PAUSE_LOOP_EXITING |
1299 SECONDARY_EXEC_RDTSCP;
1247 if (adjust_vmx_controls(min2, opt2, 1300 if (adjust_vmx_controls(min2, opt2,
1248 MSR_IA32_VMX_PROCBASED_CTLS2, 1301 MSR_IA32_VMX_PROCBASED_CTLS2,
1249 &_cpu_based_2nd_exec_control) < 0) 1302 &_cpu_based_2nd_exec_control) < 0)
@@ -1457,8 +1510,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1457static gva_t rmode_tss_base(struct kvm *kvm) 1510static gva_t rmode_tss_base(struct kvm *kvm)
1458{ 1511{
1459 if (!kvm->arch.tss_addr) { 1512 if (!kvm->arch.tss_addr) {
1460 gfn_t base_gfn = kvm->memslots[0].base_gfn + 1513 struct kvm_memslots *slots;
1461 kvm->memslots[0].npages - 3; 1514 gfn_t base_gfn;
1515
1516 slots = rcu_dereference(kvm->memslots);
1517 base_gfn = kvm->memslots->memslots[0].base_gfn +
1518 kvm->memslots->memslots[0].npages - 3;
1462 return base_gfn << PAGE_SHIFT; 1519 return base_gfn << PAGE_SHIFT;
1463 } 1520 }
1464 return kvm->arch.tss_addr; 1521 return kvm->arch.tss_addr;
@@ -1544,9 +1601,7 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
1544 * of this msr depends on is_long_mode(). 1601 * of this msr depends on is_long_mode().
1545 */ 1602 */
1546 vmx_load_host_state(to_vmx(vcpu)); 1603 vmx_load_host_state(to_vmx(vcpu));
1547 vcpu->arch.shadow_efer = efer; 1604 vcpu->arch.efer = efer;
1548 if (!msr)
1549 return;
1550 if (efer & EFER_LMA) { 1605 if (efer & EFER_LMA) {
1551 vmcs_write32(VM_ENTRY_CONTROLS, 1606 vmcs_write32(VM_ENTRY_CONTROLS,
1552 vmcs_read32(VM_ENTRY_CONTROLS) | 1607 vmcs_read32(VM_ENTRY_CONTROLS) |
@@ -1576,13 +1631,13 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
1576 (guest_tr_ar & ~AR_TYPE_MASK) 1631 (guest_tr_ar & ~AR_TYPE_MASK)
1577 | AR_TYPE_BUSY_64_TSS); 1632 | AR_TYPE_BUSY_64_TSS);
1578 } 1633 }
1579 vcpu->arch.shadow_efer |= EFER_LMA; 1634 vcpu->arch.efer |= EFER_LMA;
1580 vmx_set_efer(vcpu, vcpu->arch.shadow_efer); 1635 vmx_set_efer(vcpu, vcpu->arch.efer);
1581} 1636}
1582 1637
1583static void exit_lmode(struct kvm_vcpu *vcpu) 1638static void exit_lmode(struct kvm_vcpu *vcpu)
1584{ 1639{
1585 vcpu->arch.shadow_efer &= ~EFER_LMA; 1640 vcpu->arch.efer &= ~EFER_LMA;
1586 1641
1587 vmcs_write32(VM_ENTRY_CONTROLS, 1642 vmcs_write32(VM_ENTRY_CONTROLS,
1588 vmcs_read32(VM_ENTRY_CONTROLS) 1643 vmcs_read32(VM_ENTRY_CONTROLS)
@@ -1598,10 +1653,20 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
1598 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); 1653 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
1599} 1654}
1600 1655
1656static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
1657{
1658 ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
1659
1660 vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
1661 vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
1662}
1663
1601static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 1664static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1602{ 1665{
1603 vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK; 1666 ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
1604 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; 1667
1668 vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
1669 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
1605} 1670}
1606 1671
1607static void ept_load_pdptrs(struct kvm_vcpu *vcpu) 1672static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
@@ -1646,7 +1711,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1646 (CPU_BASED_CR3_LOAD_EXITING | 1711 (CPU_BASED_CR3_LOAD_EXITING |
1647 CPU_BASED_CR3_STORE_EXITING)); 1712 CPU_BASED_CR3_STORE_EXITING));
1648 vcpu->arch.cr0 = cr0; 1713 vcpu->arch.cr0 = cr0;
1649 vmx_set_cr4(vcpu, vcpu->arch.cr4); 1714 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
1650 } else if (!is_paging(vcpu)) { 1715 } else if (!is_paging(vcpu)) {
1651 /* From nonpaging to paging */ 1716 /* From nonpaging to paging */
1652 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 1717 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
@@ -1654,23 +1719,13 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1654 ~(CPU_BASED_CR3_LOAD_EXITING | 1719 ~(CPU_BASED_CR3_LOAD_EXITING |
1655 CPU_BASED_CR3_STORE_EXITING)); 1720 CPU_BASED_CR3_STORE_EXITING));
1656 vcpu->arch.cr0 = cr0; 1721 vcpu->arch.cr0 = cr0;
1657 vmx_set_cr4(vcpu, vcpu->arch.cr4); 1722 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
1658 } 1723 }
1659 1724
1660 if (!(cr0 & X86_CR0_WP)) 1725 if (!(cr0 & X86_CR0_WP))
1661 *hw_cr0 &= ~X86_CR0_WP; 1726 *hw_cr0 &= ~X86_CR0_WP;
1662} 1727}
1663 1728
1664static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
1665 struct kvm_vcpu *vcpu)
1666{
1667 if (!is_paging(vcpu)) {
1668 *hw_cr4 &= ~X86_CR4_PAE;
1669 *hw_cr4 |= X86_CR4_PSE;
1670 } else if (!(vcpu->arch.cr4 & X86_CR4_PAE))
1671 *hw_cr4 &= ~X86_CR4_PAE;
1672}
1673
1674static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1729static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1675{ 1730{
1676 struct vcpu_vmx *vmx = to_vmx(vcpu); 1731 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -1682,8 +1737,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1682 else 1737 else
1683 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; 1738 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
1684 1739
1685 vmx_fpu_deactivate(vcpu);
1686
1687 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) 1740 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
1688 enter_pmode(vcpu); 1741 enter_pmode(vcpu);
1689 1742
@@ -1691,7 +1744,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1691 enter_rmode(vcpu); 1744 enter_rmode(vcpu);
1692 1745
1693#ifdef CONFIG_X86_64 1746#ifdef CONFIG_X86_64
1694 if (vcpu->arch.shadow_efer & EFER_LME) { 1747 if (vcpu->arch.efer & EFER_LME) {
1695 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) 1748 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
1696 enter_lmode(vcpu); 1749 enter_lmode(vcpu);
1697 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) 1750 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
@@ -1702,12 +1755,12 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1702 if (enable_ept) 1755 if (enable_ept)
1703 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); 1756 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
1704 1757
1758 if (!vcpu->fpu_active)
1759 hw_cr0 |= X86_CR0_TS | X86_CR0_MP;
1760
1705 vmcs_writel(CR0_READ_SHADOW, cr0); 1761 vmcs_writel(CR0_READ_SHADOW, cr0);
1706 vmcs_writel(GUEST_CR0, hw_cr0); 1762 vmcs_writel(GUEST_CR0, hw_cr0);
1707 vcpu->arch.cr0 = cr0; 1763 vcpu->arch.cr0 = cr0;
1708
1709 if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
1710 vmx_fpu_activate(vcpu);
1711} 1764}
1712 1765
1713static u64 construct_eptp(unsigned long root_hpa) 1766static u64 construct_eptp(unsigned long root_hpa)
@@ -1738,8 +1791,6 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1738 1791
1739 vmx_flush_tlb(vcpu); 1792 vmx_flush_tlb(vcpu);
1740 vmcs_writel(GUEST_CR3, guest_cr3); 1793 vmcs_writel(GUEST_CR3, guest_cr3);
1741 if (vcpu->arch.cr0 & X86_CR0_PE)
1742 vmx_fpu_deactivate(vcpu);
1743} 1794}
1744 1795
1745static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1796static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -1748,8 +1799,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1748 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); 1799 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
1749 1800
1750 vcpu->arch.cr4 = cr4; 1801 vcpu->arch.cr4 = cr4;
1751 if (enable_ept) 1802 if (enable_ept) {
1752 ept_update_paging_mode_cr4(&hw_cr4, vcpu); 1803 if (!is_paging(vcpu)) {
1804 hw_cr4 &= ~X86_CR4_PAE;
1805 hw_cr4 |= X86_CR4_PSE;
1806 } else if (!(cr4 & X86_CR4_PAE)) {
1807 hw_cr4 &= ~X86_CR4_PAE;
1808 }
1809 }
1753 1810
1754 vmcs_writel(CR4_READ_SHADOW, cr4); 1811 vmcs_writel(CR4_READ_SHADOW, cr4);
1755 vmcs_writel(GUEST_CR4, hw_cr4); 1812 vmcs_writel(GUEST_CR4, hw_cr4);
@@ -1787,7 +1844,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
1787 1844
1788static int vmx_get_cpl(struct kvm_vcpu *vcpu) 1845static int vmx_get_cpl(struct kvm_vcpu *vcpu)
1789{ 1846{
1790 if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */ 1847 if (!is_protmode(vcpu))
1791 return 0; 1848 return 0;
1792 1849
1793 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ 1850 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
@@ -2042,7 +2099,7 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
2042static bool guest_state_valid(struct kvm_vcpu *vcpu) 2099static bool guest_state_valid(struct kvm_vcpu *vcpu)
2043{ 2100{
2044 /* real mode guest state checks */ 2101 /* real mode guest state checks */
2045 if (!(vcpu->arch.cr0 & X86_CR0_PE)) { 2102 if (!is_protmode(vcpu)) {
2046 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) 2103 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
2047 return false; 2104 return false;
2048 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) 2105 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
@@ -2175,7 +2232,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
2175 struct kvm_userspace_memory_region kvm_userspace_mem; 2232 struct kvm_userspace_memory_region kvm_userspace_mem;
2176 int r = 0; 2233 int r = 0;
2177 2234
2178 down_write(&kvm->slots_lock); 2235 mutex_lock(&kvm->slots_lock);
2179 if (kvm->arch.apic_access_page) 2236 if (kvm->arch.apic_access_page)
2180 goto out; 2237 goto out;
2181 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; 2238 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
@@ -2188,7 +2245,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
2188 2245
2189 kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); 2246 kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
2190out: 2247out:
2191 up_write(&kvm->slots_lock); 2248 mutex_unlock(&kvm->slots_lock);
2192 return r; 2249 return r;
2193} 2250}
2194 2251
@@ -2197,7 +2254,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
2197 struct kvm_userspace_memory_region kvm_userspace_mem; 2254 struct kvm_userspace_memory_region kvm_userspace_mem;
2198 int r = 0; 2255 int r = 0;
2199 2256
2200 down_write(&kvm->slots_lock); 2257 mutex_lock(&kvm->slots_lock);
2201 if (kvm->arch.ept_identity_pagetable) 2258 if (kvm->arch.ept_identity_pagetable)
2202 goto out; 2259 goto out;
2203 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; 2260 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
@@ -2212,7 +2269,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
2212 kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, 2269 kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
2213 kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); 2270 kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
2214out: 2271out:
2215 up_write(&kvm->slots_lock); 2272 mutex_unlock(&kvm->slots_lock);
2216 return r; 2273 return r;
2217} 2274}
2218 2275
@@ -2384,14 +2441,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2384 for (i = 0; i < NR_VMX_MSR; ++i) { 2441 for (i = 0; i < NR_VMX_MSR; ++i) {
2385 u32 index = vmx_msr_index[i]; 2442 u32 index = vmx_msr_index[i];
2386 u32 data_low, data_high; 2443 u32 data_low, data_high;
2387 u64 data;
2388 int j = vmx->nmsrs; 2444 int j = vmx->nmsrs;
2389 2445
2390 if (rdmsr_safe(index, &data_low, &data_high) < 0) 2446 if (rdmsr_safe(index, &data_low, &data_high) < 0)
2391 continue; 2447 continue;
2392 if (wrmsr_safe(index, data_low, data_high) < 0) 2448 if (wrmsr_safe(index, data_low, data_high) < 0)
2393 continue; 2449 continue;
2394 data = data_low | ((u64)data_high << 32);
2395 vmx->guest_msrs[j].index = i; 2450 vmx->guest_msrs[j].index = i;
2396 vmx->guest_msrs[j].data = 0; 2451 vmx->guest_msrs[j].data = 0;
2397 vmx->guest_msrs[j].mask = -1ull; 2452 vmx->guest_msrs[j].mask = -1ull;
@@ -2404,7 +2459,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2404 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); 2459 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl);
2405 2460
2406 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 2461 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
2407 vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); 2462 vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
2463 if (enable_ept)
2464 vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
2465 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
2408 2466
2409 tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; 2467 tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
2410 rdtscll(tsc_this); 2468 rdtscll(tsc_this);
@@ -2429,10 +2487,10 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2429{ 2487{
2430 struct vcpu_vmx *vmx = to_vmx(vcpu); 2488 struct vcpu_vmx *vmx = to_vmx(vcpu);
2431 u64 msr; 2489 u64 msr;
2432 int ret; 2490 int ret, idx;
2433 2491
2434 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); 2492 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
2435 down_read(&vcpu->kvm->slots_lock); 2493 idx = srcu_read_lock(&vcpu->kvm->srcu);
2436 if (!init_rmode(vmx->vcpu.kvm)) { 2494 if (!init_rmode(vmx->vcpu.kvm)) {
2437 ret = -ENOMEM; 2495 ret = -ENOMEM;
2438 goto out; 2496 goto out;
@@ -2526,7 +2584,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2526 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); 2584 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
2527 2585
2528 vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; 2586 vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
2529 vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ 2587 vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
2530 vmx_set_cr4(&vmx->vcpu, 0); 2588 vmx_set_cr4(&vmx->vcpu, 0);
2531 vmx_set_efer(&vmx->vcpu, 0); 2589 vmx_set_efer(&vmx->vcpu, 0);
2532 vmx_fpu_activate(&vmx->vcpu); 2590 vmx_fpu_activate(&vmx->vcpu);
@@ -2540,7 +2598,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2540 vmx->emulation_required = 0; 2598 vmx->emulation_required = 0;
2541 2599
2542out: 2600out:
2543 up_read(&vcpu->kvm->slots_lock); 2601 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2544 return ret; 2602 return ret;
2545} 2603}
2546 2604
@@ -2717,6 +2775,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2717 kvm_queue_exception(vcpu, vec); 2775 kvm_queue_exception(vcpu, vec);
2718 return 1; 2776 return 1;
2719 case BP_VECTOR: 2777 case BP_VECTOR:
2778 /*
2779 * Update instruction length as we may reinject the exception
2780 * from user space while in guest debugging mode.
2781 */
2782 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
2783 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
2720 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 2784 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
2721 return 0; 2785 return 0;
2722 /* fall through */ 2786 /* fall through */
@@ -2839,6 +2903,13 @@ static int handle_exception(struct kvm_vcpu *vcpu)
2839 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); 2903 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
2840 /* fall through */ 2904 /* fall through */
2841 case BP_VECTOR: 2905 case BP_VECTOR:
2906 /*
2907 * Update instruction length as we may reinject #BP from
2908 * user space while in guest debugging mode. Reading it for
2909 * #DB as well causes no harm, it is not used in that case.
2910 */
2911 vmx->vcpu.arch.event_exit_inst_len =
2912 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
2842 kvm_run->exit_reason = KVM_EXIT_DEBUG; 2913 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2843 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; 2914 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
2844 kvm_run->debug.arch.exception = ex_no; 2915 kvm_run->debug.arch.exception = ex_no;
@@ -2940,11 +3011,10 @@ static int handle_cr(struct kvm_vcpu *vcpu)
2940 }; 3011 };
2941 break; 3012 break;
2942 case 2: /* clts */ 3013 case 2: /* clts */
2943 vmx_fpu_deactivate(vcpu); 3014 vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
2944 vcpu->arch.cr0 &= ~X86_CR0_TS; 3015 trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
2945 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
2946 vmx_fpu_activate(vcpu);
2947 skip_emulated_instruction(vcpu); 3016 skip_emulated_instruction(vcpu);
3017 vmx_fpu_activate(vcpu);
2948 return 1; 3018 return 1;
2949 case 1: /*mov from cr*/ 3019 case 1: /*mov from cr*/
2950 switch (cr) { 3020 switch (cr) {
@@ -2962,7 +3032,9 @@ static int handle_cr(struct kvm_vcpu *vcpu)
2962 } 3032 }
2963 break; 3033 break;
2964 case 3: /* lmsw */ 3034 case 3: /* lmsw */
2965 kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); 3035 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
3036 trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
3037 kvm_lmsw(vcpu, val);
2966 3038
2967 skip_emulated_instruction(vcpu); 3039 skip_emulated_instruction(vcpu);
2968 return 1; 3040 return 1;
@@ -2975,12 +3047,22 @@ static int handle_cr(struct kvm_vcpu *vcpu)
2975 return 0; 3047 return 0;
2976} 3048}
2977 3049
3050static int check_dr_alias(struct kvm_vcpu *vcpu)
3051{
3052 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
3053 kvm_queue_exception(vcpu, UD_VECTOR);
3054 return -1;
3055 }
3056 return 0;
3057}
3058
2978static int handle_dr(struct kvm_vcpu *vcpu) 3059static int handle_dr(struct kvm_vcpu *vcpu)
2979{ 3060{
2980 unsigned long exit_qualification; 3061 unsigned long exit_qualification;
2981 unsigned long val; 3062 unsigned long val;
2982 int dr, reg; 3063 int dr, reg;
2983 3064
3065 /* Do not handle if the CPL > 0, will trigger GP on re-entry */
2984 if (!kvm_require_cpl(vcpu, 0)) 3066 if (!kvm_require_cpl(vcpu, 0))
2985 return 1; 3067 return 1;
2986 dr = vmcs_readl(GUEST_DR7); 3068 dr = vmcs_readl(GUEST_DR7);
@@ -3016,14 +3098,20 @@ static int handle_dr(struct kvm_vcpu *vcpu)
3016 case 0 ... 3: 3098 case 0 ... 3:
3017 val = vcpu->arch.db[dr]; 3099 val = vcpu->arch.db[dr];
3018 break; 3100 break;
3101 case 4:
3102 if (check_dr_alias(vcpu) < 0)
3103 return 1;
3104 /* fall through */
3019 case 6: 3105 case 6:
3020 val = vcpu->arch.dr6; 3106 val = vcpu->arch.dr6;
3021 break; 3107 break;
3022 case 7: 3108 case 5:
3109 if (check_dr_alias(vcpu) < 0)
3110 return 1;
3111 /* fall through */
3112 default: /* 7 */
3023 val = vcpu->arch.dr7; 3113 val = vcpu->arch.dr7;
3024 break; 3114 break;
3025 default:
3026 val = 0;
3027 } 3115 }
3028 kvm_register_write(vcpu, reg, val); 3116 kvm_register_write(vcpu, reg, val);
3029 } else { 3117 } else {
@@ -3034,21 +3122,25 @@ static int handle_dr(struct kvm_vcpu *vcpu)
3034 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) 3122 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
3035 vcpu->arch.eff_db[dr] = val; 3123 vcpu->arch.eff_db[dr] = val;
3036 break; 3124 break;
3037 case 4 ... 5: 3125 case 4:
3038 if (vcpu->arch.cr4 & X86_CR4_DE) 3126 if (check_dr_alias(vcpu) < 0)
3039 kvm_queue_exception(vcpu, UD_VECTOR); 3127 return 1;
3040 break; 3128 /* fall through */
3041 case 6: 3129 case 6:
3042 if (val & 0xffffffff00000000ULL) { 3130 if (val & 0xffffffff00000000ULL) {
3043 kvm_queue_exception(vcpu, GP_VECTOR); 3131 kvm_inject_gp(vcpu, 0);
3044 break; 3132 return 1;
3045 } 3133 }
3046 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 3134 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
3047 break; 3135 break;
3048 case 7: 3136 case 5:
3137 if (check_dr_alias(vcpu) < 0)
3138 return 1;
3139 /* fall through */
3140 default: /* 7 */
3049 if (val & 0xffffffff00000000ULL) { 3141 if (val & 0xffffffff00000000ULL) {
3050 kvm_queue_exception(vcpu, GP_VECTOR); 3142 kvm_inject_gp(vcpu, 0);
3051 break; 3143 return 1;
3052 } 3144 }
3053 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; 3145 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
3054 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { 3146 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
@@ -3075,6 +3167,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
3075 u64 data; 3167 u64 data;
3076 3168
3077 if (vmx_get_msr(vcpu, ecx, &data)) { 3169 if (vmx_get_msr(vcpu, ecx, &data)) {
3170 trace_kvm_msr_read_ex(ecx);
3078 kvm_inject_gp(vcpu, 0); 3171 kvm_inject_gp(vcpu, 0);
3079 return 1; 3172 return 1;
3080 } 3173 }
@@ -3094,13 +3187,13 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
3094 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) 3187 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
3095 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); 3188 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
3096 3189
3097 trace_kvm_msr_write(ecx, data);
3098
3099 if (vmx_set_msr(vcpu, ecx, data) != 0) { 3190 if (vmx_set_msr(vcpu, ecx, data) != 0) {
3191 trace_kvm_msr_write_ex(ecx, data);
3100 kvm_inject_gp(vcpu, 0); 3192 kvm_inject_gp(vcpu, 0);
3101 return 1; 3193 return 1;
3102 } 3194 }
3103 3195
3196 trace_kvm_msr_write(ecx, data);
3104 skip_emulated_instruction(vcpu); 3197 skip_emulated_instruction(vcpu);
3105 return 1; 3198 return 1;
3106} 3199}
@@ -3385,7 +3478,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
3385 } 3478 }
3386 3479
3387 if (err != EMULATE_DONE) { 3480 if (err != EMULATE_DONE) {
3388 kvm_report_emulation_failure(vcpu, "emulation failure");
3389 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 3481 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3390 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 3482 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
3391 vcpu->run->internal.ndata = 0; 3483 vcpu->run->internal.ndata = 0;
@@ -3416,6 +3508,12 @@ static int handle_pause(struct kvm_vcpu *vcpu)
3416 return 1; 3508 return 1;
3417} 3509}
3418 3510
3511static int handle_invalid_op(struct kvm_vcpu *vcpu)
3512{
3513 kvm_queue_exception(vcpu, UD_VECTOR);
3514 return 1;
3515}
3516
3419/* 3517/*
3420 * The exit handlers return 1 if the exit was handled fully and guest execution 3518 * The exit handlers return 1 if the exit was handled fully and guest execution
3421 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 3519 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -3453,6 +3551,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
3453 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, 3551 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
3454 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, 3552 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
3455 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, 3553 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
3554 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op,
3555 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op,
3456}; 3556};
3457 3557
3458static const int kvm_vmx_max_exit_handlers = 3558static const int kvm_vmx_max_exit_handlers =
@@ -3686,9 +3786,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
3686 */ 3786 */
3687 vmcs_writel(HOST_CR0, read_cr0()); 3787 vmcs_writel(HOST_CR0, read_cr0());
3688 3788
3689 if (vcpu->arch.switch_db_regs)
3690 set_debugreg(vcpu->arch.dr6, 6);
3691
3692 asm( 3789 asm(
3693 /* Store host registers */ 3790 /* Store host registers */
3694 "push %%"R"dx; push %%"R"bp;" 3791 "push %%"R"dx; push %%"R"bp;"
@@ -3789,9 +3886,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
3789 | (1 << VCPU_EXREG_PDPTR)); 3886 | (1 << VCPU_EXREG_PDPTR));
3790 vcpu->arch.regs_dirty = 0; 3887 vcpu->arch.regs_dirty = 0;
3791 3888
3792 if (vcpu->arch.switch_db_regs)
3793 get_debugreg(vcpu->arch.dr6, 6);
3794
3795 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 3889 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
3796 if (vmx->rmode.irq.pending) 3890 if (vmx->rmode.irq.pending)
3797 fixup_rmode_irq(vmx); 3891 fixup_rmode_irq(vmx);
@@ -3920,7 +4014,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3920 * b. VT-d with snooping control feature: snooping control feature of 4014 * b. VT-d with snooping control feature: snooping control feature of
3921 * VT-d engine can guarantee the cache correctness. Just set it 4015 * VT-d engine can guarantee the cache correctness. Just set it
3922 * to WB to keep consistent with host. So the same as item 3. 4016 * to WB to keep consistent with host. So the same as item 3.
3923 * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep 4017 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
3924 * consistent with host MTRR 4018 * consistent with host MTRR
3925 */ 4019 */
3926 if (is_mmio) 4020 if (is_mmio)
@@ -3931,37 +4025,88 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3931 VMX_EPT_MT_EPTE_SHIFT; 4025 VMX_EPT_MT_EPTE_SHIFT;
3932 else 4026 else
3933 ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) 4027 ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
3934 | VMX_EPT_IGMT_BIT; 4028 | VMX_EPT_IPAT_BIT;
3935 4029
3936 return ret; 4030 return ret;
3937} 4031}
3938 4032
4033#define _ER(x) { EXIT_REASON_##x, #x }
4034
3939static const struct trace_print_flags vmx_exit_reasons_str[] = { 4035static const struct trace_print_flags vmx_exit_reasons_str[] = {
3940 { EXIT_REASON_EXCEPTION_NMI, "exception" }, 4036 _ER(EXCEPTION_NMI),
3941 { EXIT_REASON_EXTERNAL_INTERRUPT, "ext_irq" }, 4037 _ER(EXTERNAL_INTERRUPT),
3942 { EXIT_REASON_TRIPLE_FAULT, "triple_fault" }, 4038 _ER(TRIPLE_FAULT),
3943 { EXIT_REASON_NMI_WINDOW, "nmi_window" }, 4039 _ER(PENDING_INTERRUPT),
3944 { EXIT_REASON_IO_INSTRUCTION, "io_instruction" }, 4040 _ER(NMI_WINDOW),
3945 { EXIT_REASON_CR_ACCESS, "cr_access" }, 4041 _ER(TASK_SWITCH),
3946 { EXIT_REASON_DR_ACCESS, "dr_access" }, 4042 _ER(CPUID),
3947 { EXIT_REASON_CPUID, "cpuid" }, 4043 _ER(HLT),
3948 { EXIT_REASON_MSR_READ, "rdmsr" }, 4044 _ER(INVLPG),
3949 { EXIT_REASON_MSR_WRITE, "wrmsr" }, 4045 _ER(RDPMC),
3950 { EXIT_REASON_PENDING_INTERRUPT, "interrupt_window" }, 4046 _ER(RDTSC),
3951 { EXIT_REASON_HLT, "halt" }, 4047 _ER(VMCALL),
3952 { EXIT_REASON_INVLPG, "invlpg" }, 4048 _ER(VMCLEAR),
3953 { EXIT_REASON_VMCALL, "hypercall" }, 4049 _ER(VMLAUNCH),
3954 { EXIT_REASON_TPR_BELOW_THRESHOLD, "tpr_below_thres" }, 4050 _ER(VMPTRLD),
3955 { EXIT_REASON_APIC_ACCESS, "apic_access" }, 4051 _ER(VMPTRST),
3956 { EXIT_REASON_WBINVD, "wbinvd" }, 4052 _ER(VMREAD),
3957 { EXIT_REASON_TASK_SWITCH, "task_switch" }, 4053 _ER(VMRESUME),
3958 { EXIT_REASON_EPT_VIOLATION, "ept_violation" }, 4054 _ER(VMWRITE),
4055 _ER(VMOFF),
4056 _ER(VMON),
4057 _ER(CR_ACCESS),
4058 _ER(DR_ACCESS),
4059 _ER(IO_INSTRUCTION),
4060 _ER(MSR_READ),
4061 _ER(MSR_WRITE),
4062 _ER(MWAIT_INSTRUCTION),
4063 _ER(MONITOR_INSTRUCTION),
4064 _ER(PAUSE_INSTRUCTION),
4065 _ER(MCE_DURING_VMENTRY),
4066 _ER(TPR_BELOW_THRESHOLD),
4067 _ER(APIC_ACCESS),
4068 _ER(EPT_VIOLATION),
4069 _ER(EPT_MISCONFIG),
4070 _ER(WBINVD),
3959 { -1, NULL } 4071 { -1, NULL }
3960}; 4072};
3961 4073
3962static bool vmx_gb_page_enable(void) 4074#undef _ER
4075
4076static int vmx_get_lpage_level(void)
4077{
4078 if (enable_ept && !cpu_has_vmx_ept_1g_page())
4079 return PT_DIRECTORY_LEVEL;
4080 else
4081 /* For shadow and EPT supported 1GB page */
4082 return PT_PDPE_LEVEL;
4083}
4084
4085static inline u32 bit(int bitno)
4086{
4087 return 1 << (bitno & 31);
4088}
4089
4090static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
3963{ 4091{
3964 return false; 4092 struct kvm_cpuid_entry2 *best;
4093 struct vcpu_vmx *vmx = to_vmx(vcpu);
4094 u32 exec_control;
4095
4096 vmx->rdtscp_enabled = false;
4097 if (vmx_rdtscp_supported()) {
4098 exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
4099 if (exec_control & SECONDARY_EXEC_RDTSCP) {
4100 best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
4101 if (best && (best->edx & bit(X86_FEATURE_RDTSCP)))
4102 vmx->rdtscp_enabled = true;
4103 else {
4104 exec_control &= ~SECONDARY_EXEC_RDTSCP;
4105 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
4106 exec_control);
4107 }
4108 }
4109 }
3965} 4110}
3966 4111
3967static struct kvm_x86_ops vmx_x86_ops = { 4112static struct kvm_x86_ops vmx_x86_ops = {
@@ -3990,6 +4135,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
3990 .set_segment = vmx_set_segment, 4135 .set_segment = vmx_set_segment,
3991 .get_cpl = vmx_get_cpl, 4136 .get_cpl = vmx_get_cpl,
3992 .get_cs_db_l_bits = vmx_get_cs_db_l_bits, 4137 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
4138 .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
3993 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, 4139 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
3994 .set_cr0 = vmx_set_cr0, 4140 .set_cr0 = vmx_set_cr0,
3995 .set_cr3 = vmx_set_cr3, 4141 .set_cr3 = vmx_set_cr3,
@@ -4002,6 +4148,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
4002 .cache_reg = vmx_cache_reg, 4148 .cache_reg = vmx_cache_reg,
4003 .get_rflags = vmx_get_rflags, 4149 .get_rflags = vmx_get_rflags,
4004 .set_rflags = vmx_set_rflags, 4150 .set_rflags = vmx_set_rflags,
4151 .fpu_activate = vmx_fpu_activate,
4152 .fpu_deactivate = vmx_fpu_deactivate,
4005 4153
4006 .tlb_flush = vmx_flush_tlb, 4154 .tlb_flush = vmx_flush_tlb,
4007 4155
@@ -4027,7 +4175,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
4027 .get_mt_mask = vmx_get_mt_mask, 4175 .get_mt_mask = vmx_get_mt_mask,
4028 4176
4029 .exit_reasons_str = vmx_exit_reasons_str, 4177 .exit_reasons_str = vmx_exit_reasons_str,
4030 .gb_page_enable = vmx_gb_page_enable, 4178 .get_lpage_level = vmx_get_lpage_level,
4179
4180 .cpuid_update = vmx_cpuid_update,
4181
4182 .rdtscp_supported = vmx_rdtscp_supported,
4031}; 4183};
4032 4184
4033static int __init vmx_init(void) 4185static int __init vmx_init(void)