diff options
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx.c | 714 |
1 files changed, 486 insertions, 228 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9120ae1901e4..6667042714cc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -84,6 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO); | |||
84 | static bool __read_mostly fasteoi = 1; | 84 | static bool __read_mostly fasteoi = 1; |
85 | module_param(fasteoi, bool, S_IRUGO); | 85 | module_param(fasteoi, bool, S_IRUGO); |
86 | 86 | ||
87 | static bool __read_mostly enable_apicv_reg_vid; | ||
88 | |||
87 | /* | 89 | /* |
88 | * If nested=1, nested virtualization is supported, i.e., guests may use | 90 | * If nested=1, nested virtualization is supported, i.e., guests may use |
89 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not | 91 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not |
@@ -92,12 +94,8 @@ module_param(fasteoi, bool, S_IRUGO); | |||
92 | static bool __read_mostly nested = 0; | 94 | static bool __read_mostly nested = 0; |
93 | module_param(nested, bool, S_IRUGO); | 95 | module_param(nested, bool, S_IRUGO); |
94 | 96 | ||
95 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | 97 | #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) |
96 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | 98 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) |
97 | #define KVM_GUEST_CR0_MASK \ | ||
98 | (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | ||
99 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \ | ||
100 | (X86_CR0_WP | X86_CR0_NE) | ||
101 | #define KVM_VM_CR0_ALWAYS_ON \ | 99 | #define KVM_VM_CR0_ALWAYS_ON \ |
102 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | 100 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) |
103 | #define KVM_CR4_GUEST_OWNED_BITS \ | 101 | #define KVM_CR4_GUEST_OWNED_BITS \ |
@@ -624,6 +622,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
624 | struct kvm_segment *var, int seg); | 622 | struct kvm_segment *var, int seg); |
625 | static void vmx_get_segment(struct kvm_vcpu *vcpu, | 623 | static void vmx_get_segment(struct kvm_vcpu *vcpu, |
626 | struct kvm_segment *var, int seg); | 624 | struct kvm_segment *var, int seg); |
625 | static bool guest_state_valid(struct kvm_vcpu *vcpu); | ||
626 | static u32 vmx_segment_access_rights(struct kvm_segment *var); | ||
627 | 627 | ||
628 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 628 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
629 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 629 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -638,6 +638,8 @@ static unsigned long *vmx_io_bitmap_a; | |||
638 | static unsigned long *vmx_io_bitmap_b; | 638 | static unsigned long *vmx_io_bitmap_b; |
639 | static unsigned long *vmx_msr_bitmap_legacy; | 639 | static unsigned long *vmx_msr_bitmap_legacy; |
640 | static unsigned long *vmx_msr_bitmap_longmode; | 640 | static unsigned long *vmx_msr_bitmap_longmode; |
641 | static unsigned long *vmx_msr_bitmap_legacy_x2apic; | ||
642 | static unsigned long *vmx_msr_bitmap_longmode_x2apic; | ||
641 | 643 | ||
642 | static bool cpu_has_load_ia32_efer; | 644 | static bool cpu_has_load_ia32_efer; |
643 | static bool cpu_has_load_perf_global_ctrl; | 645 | static bool cpu_has_load_perf_global_ctrl; |
@@ -762,6 +764,24 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void) | |||
762 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 764 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
763 | } | 765 | } |
764 | 766 | ||
767 | static inline bool cpu_has_vmx_virtualize_x2apic_mode(void) | ||
768 | { | ||
769 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
770 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
771 | } | ||
772 | |||
773 | static inline bool cpu_has_vmx_apic_register_virt(void) | ||
774 | { | ||
775 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
776 | SECONDARY_EXEC_APIC_REGISTER_VIRT; | ||
777 | } | ||
778 | |||
779 | static inline bool cpu_has_vmx_virtual_intr_delivery(void) | ||
780 | { | ||
781 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
782 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; | ||
783 | } | ||
784 | |||
765 | static inline bool cpu_has_vmx_flexpriority(void) | 785 | static inline bool cpu_has_vmx_flexpriority(void) |
766 | { | 786 | { |
767 | return cpu_has_vmx_tpr_shadow() && | 787 | return cpu_has_vmx_tpr_shadow() && |
@@ -1694,7 +1714,6 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | |||
1694 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 1714 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
1695 | { | 1715 | { |
1696 | __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); | 1716 | __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); |
1697 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
1698 | to_vmx(vcpu)->rflags = rflags; | 1717 | to_vmx(vcpu)->rflags = rflags; |
1699 | if (to_vmx(vcpu)->rmode.vm86_active) { | 1718 | if (to_vmx(vcpu)->rmode.vm86_active) { |
1700 | to_vmx(vcpu)->rmode.save_rflags = rflags; | 1719 | to_vmx(vcpu)->rmode.save_rflags = rflags; |
@@ -1820,6 +1839,25 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | |||
1820 | vmx->guest_msrs[from] = tmp; | 1839 | vmx->guest_msrs[from] = tmp; |
1821 | } | 1840 | } |
1822 | 1841 | ||
1842 | static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) | ||
1843 | { | ||
1844 | unsigned long *msr_bitmap; | ||
1845 | |||
1846 | if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) { | ||
1847 | if (is_long_mode(vcpu)) | ||
1848 | msr_bitmap = vmx_msr_bitmap_longmode_x2apic; | ||
1849 | else | ||
1850 | msr_bitmap = vmx_msr_bitmap_legacy_x2apic; | ||
1851 | } else { | ||
1852 | if (is_long_mode(vcpu)) | ||
1853 | msr_bitmap = vmx_msr_bitmap_longmode; | ||
1854 | else | ||
1855 | msr_bitmap = vmx_msr_bitmap_legacy; | ||
1856 | } | ||
1857 | |||
1858 | vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); | ||
1859 | } | ||
1860 | |||
1823 | /* | 1861 | /* |
1824 | * Set up the vmcs to automatically save and restore system | 1862 | * Set up the vmcs to automatically save and restore system |
1825 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy | 1863 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy |
@@ -1828,7 +1866,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | |||
1828 | static void setup_msrs(struct vcpu_vmx *vmx) | 1866 | static void setup_msrs(struct vcpu_vmx *vmx) |
1829 | { | 1867 | { |
1830 | int save_nmsrs, index; | 1868 | int save_nmsrs, index; |
1831 | unsigned long *msr_bitmap; | ||
1832 | 1869 | ||
1833 | save_nmsrs = 0; | 1870 | save_nmsrs = 0; |
1834 | #ifdef CONFIG_X86_64 | 1871 | #ifdef CONFIG_X86_64 |
@@ -1860,14 +1897,8 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
1860 | 1897 | ||
1861 | vmx->save_nmsrs = save_nmsrs; | 1898 | vmx->save_nmsrs = save_nmsrs; |
1862 | 1899 | ||
1863 | if (cpu_has_vmx_msr_bitmap()) { | 1900 | if (cpu_has_vmx_msr_bitmap()) |
1864 | if (is_long_mode(&vmx->vcpu)) | 1901 | vmx_set_msr_bitmap(&vmx->vcpu); |
1865 | msr_bitmap = vmx_msr_bitmap_longmode; | ||
1866 | else | ||
1867 | msr_bitmap = vmx_msr_bitmap_legacy; | ||
1868 | |||
1869 | vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); | ||
1870 | } | ||
1871 | } | 1902 | } |
1872 | 1903 | ||
1873 | /* | 1904 | /* |
@@ -2533,13 +2564,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2533 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { | 2564 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { |
2534 | min2 = 0; | 2565 | min2 = 0; |
2535 | opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2566 | opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2567 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | ||
2536 | SECONDARY_EXEC_WBINVD_EXITING | | 2568 | SECONDARY_EXEC_WBINVD_EXITING | |
2537 | SECONDARY_EXEC_ENABLE_VPID | | 2569 | SECONDARY_EXEC_ENABLE_VPID | |
2538 | SECONDARY_EXEC_ENABLE_EPT | | 2570 | SECONDARY_EXEC_ENABLE_EPT | |
2539 | SECONDARY_EXEC_UNRESTRICTED_GUEST | | 2571 | SECONDARY_EXEC_UNRESTRICTED_GUEST | |
2540 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | | 2572 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | |
2541 | SECONDARY_EXEC_RDTSCP | | 2573 | SECONDARY_EXEC_RDTSCP | |
2542 | SECONDARY_EXEC_ENABLE_INVPCID; | 2574 | SECONDARY_EXEC_ENABLE_INVPCID | |
2575 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | ||
2576 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; | ||
2543 | if (adjust_vmx_controls(min2, opt2, | 2577 | if (adjust_vmx_controls(min2, opt2, |
2544 | MSR_IA32_VMX_PROCBASED_CTLS2, | 2578 | MSR_IA32_VMX_PROCBASED_CTLS2, |
2545 | &_cpu_based_2nd_exec_control) < 0) | 2579 | &_cpu_based_2nd_exec_control) < 0) |
@@ -2550,6 +2584,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2550 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) | 2584 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) |
2551 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; | 2585 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; |
2552 | #endif | 2586 | #endif |
2587 | |||
2588 | if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) | ||
2589 | _cpu_based_2nd_exec_control &= ~( | ||
2590 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | ||
2591 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | ||
2592 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | ||
2593 | |||
2553 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { | 2594 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { |
2554 | /* CR3 accesses and invlpg don't need to cause VM Exits when EPT | 2595 | /* CR3 accesses and invlpg don't need to cause VM Exits when EPT |
2555 | enabled */ | 2596 | enabled */ |
@@ -2747,6 +2788,15 @@ static __init int hardware_setup(void) | |||
2747 | if (!cpu_has_vmx_ple()) | 2788 | if (!cpu_has_vmx_ple()) |
2748 | ple_gap = 0; | 2789 | ple_gap = 0; |
2749 | 2790 | ||
2791 | if (!cpu_has_vmx_apic_register_virt() || | ||
2792 | !cpu_has_vmx_virtual_intr_delivery()) | ||
2793 | enable_apicv_reg_vid = 0; | ||
2794 | |||
2795 | if (enable_apicv_reg_vid) | ||
2796 | kvm_x86_ops->update_cr8_intercept = NULL; | ||
2797 | else | ||
2798 | kvm_x86_ops->hwapic_irr_update = NULL; | ||
2799 | |||
2750 | if (nested) | 2800 | if (nested) |
2751 | nested_vmx_setup_ctls_msrs(); | 2801 | nested_vmx_setup_ctls_msrs(); |
2752 | 2802 | ||
@@ -2758,18 +2808,28 @@ static __exit void hardware_unsetup(void) | |||
2758 | free_kvm_area(); | 2808 | free_kvm_area(); |
2759 | } | 2809 | } |
2760 | 2810 | ||
2761 | static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) | 2811 | static bool emulation_required(struct kvm_vcpu *vcpu) |
2762 | { | 2812 | { |
2763 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2813 | return emulate_invalid_guest_state && !guest_state_valid(vcpu); |
2764 | struct kvm_segment tmp = *save; | 2814 | } |
2765 | 2815 | ||
2766 | if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) { | 2816 | static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, |
2767 | tmp.base = vmcs_readl(sf->base); | 2817 | struct kvm_segment *save) |
2768 | tmp.selector = vmcs_read16(sf->selector); | 2818 | { |
2769 | tmp.dpl = tmp.selector & SELECTOR_RPL_MASK; | 2819 | if (!emulate_invalid_guest_state) { |
2770 | tmp.s = 1; | 2820 | /* |
2821 | * CS and SS RPL should be equal during guest entry according | ||
2822 | * to VMX spec, but in reality it is not always so. Since vcpu | ||
2823 | * is in the middle of the transition from real mode to | ||
2824 | * protected mode it is safe to assume that RPL 0 is a good | ||
2825 | * default value. | ||
2826 | */ | ||
2827 | if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) | ||
2828 | save->selector &= ~SELECTOR_RPL_MASK; | ||
2829 | save->dpl = save->selector & SELECTOR_RPL_MASK; | ||
2830 | save->s = 1; | ||
2771 | } | 2831 | } |
2772 | vmx_set_segment(vcpu, &tmp, seg); | 2832 | vmx_set_segment(vcpu, save, seg); |
2773 | } | 2833 | } |
2774 | 2834 | ||
2775 | static void enter_pmode(struct kvm_vcpu *vcpu) | 2835 | static void enter_pmode(struct kvm_vcpu *vcpu) |
@@ -2777,7 +2837,17 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
2777 | unsigned long flags; | 2837 | unsigned long flags; |
2778 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2838 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2779 | 2839 | ||
2780 | vmx->emulation_required = 1; | 2840 | /* |
2841 | * Update real mode segment cache. It may be not up-to-date if sement | ||
2842 | * register was written while vcpu was in a guest mode. | ||
2843 | */ | ||
2844 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); | ||
2845 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); | ||
2846 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); | ||
2847 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); | ||
2848 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); | ||
2849 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); | ||
2850 | |||
2781 | vmx->rmode.vm86_active = 0; | 2851 | vmx->rmode.vm86_active = 0; |
2782 | 2852 | ||
2783 | vmx_segment_cache_clear(vmx); | 2853 | vmx_segment_cache_clear(vmx); |
@@ -2794,22 +2864,16 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
2794 | 2864 | ||
2795 | update_exception_bitmap(vcpu); | 2865 | update_exception_bitmap(vcpu); |
2796 | 2866 | ||
2797 | if (emulate_invalid_guest_state) | 2867 | fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); |
2798 | return; | 2868 | fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); |
2799 | 2869 | fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); | |
2800 | fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); | 2870 | fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); |
2801 | fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); | 2871 | fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); |
2802 | fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); | 2872 | fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); |
2803 | fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); | ||
2804 | |||
2805 | vmx_segment_cache_clear(vmx); | ||
2806 | 2873 | ||
2807 | vmcs_write16(GUEST_SS_SELECTOR, 0); | 2874 | /* CPL is always 0 when CPU enters protected mode */ |
2808 | vmcs_write32(GUEST_SS_AR_BYTES, 0x93); | 2875 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); |
2809 | 2876 | vmx->cpl = 0; | |
2810 | vmcs_write16(GUEST_CS_SELECTOR, | ||
2811 | vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK); | ||
2812 | vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); | ||
2813 | } | 2877 | } |
2814 | 2878 | ||
2815 | static gva_t rmode_tss_base(struct kvm *kvm) | 2879 | static gva_t rmode_tss_base(struct kvm *kvm) |
@@ -2831,36 +2895,51 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
2831 | static void fix_rmode_seg(int seg, struct kvm_segment *save) | 2895 | static void fix_rmode_seg(int seg, struct kvm_segment *save) |
2832 | { | 2896 | { |
2833 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2897 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
2834 | 2898 | struct kvm_segment var = *save; | |
2835 | vmcs_write16(sf->selector, save->base >> 4); | 2899 | |
2836 | vmcs_write32(sf->base, save->base & 0xffff0); | 2900 | var.dpl = 0x3; |
2837 | vmcs_write32(sf->limit, 0xffff); | 2901 | if (seg == VCPU_SREG_CS) |
2838 | vmcs_write32(sf->ar_bytes, 0xf3); | 2902 | var.type = 0x3; |
2839 | if (save->base & 0xf) | 2903 | |
2840 | printk_once(KERN_WARNING "kvm: segment base is not paragraph" | 2904 | if (!emulate_invalid_guest_state) { |
2841 | " aligned when entering protected mode (seg=%d)", | 2905 | var.selector = var.base >> 4; |
2842 | seg); | 2906 | var.base = var.base & 0xffff0; |
2907 | var.limit = 0xffff; | ||
2908 | var.g = 0; | ||
2909 | var.db = 0; | ||
2910 | var.present = 1; | ||
2911 | var.s = 1; | ||
2912 | var.l = 0; | ||
2913 | var.unusable = 0; | ||
2914 | var.type = 0x3; | ||
2915 | var.avl = 0; | ||
2916 | if (save->base & 0xf) | ||
2917 | printk_once(KERN_WARNING "kvm: segment base is not " | ||
2918 | "paragraph aligned when entering " | ||
2919 | "protected mode (seg=%d)", seg); | ||
2920 | } | ||
2921 | |||
2922 | vmcs_write16(sf->selector, var.selector); | ||
2923 | vmcs_write32(sf->base, var.base); | ||
2924 | vmcs_write32(sf->limit, var.limit); | ||
2925 | vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); | ||
2843 | } | 2926 | } |
2844 | 2927 | ||
2845 | static void enter_rmode(struct kvm_vcpu *vcpu) | 2928 | static void enter_rmode(struct kvm_vcpu *vcpu) |
2846 | { | 2929 | { |
2847 | unsigned long flags; | 2930 | unsigned long flags; |
2848 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2931 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2849 | struct kvm_segment var; | ||
2850 | |||
2851 | if (enable_unrestricted_guest) | ||
2852 | return; | ||
2853 | 2932 | ||
2854 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); | 2933 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); |
2855 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); | 2934 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); |
2856 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); | 2935 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); |
2857 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); | 2936 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); |
2858 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); | 2937 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); |
2938 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); | ||
2939 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); | ||
2859 | 2940 | ||
2860 | vmx->emulation_required = 1; | ||
2861 | vmx->rmode.vm86_active = 1; | 2941 | vmx->rmode.vm86_active = 1; |
2862 | 2942 | ||
2863 | |||
2864 | /* | 2943 | /* |
2865 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering | 2944 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering |
2866 | * vcpu. Call it here with phys address pointing 16M below 4G. | 2945 | * vcpu. Call it here with phys address pointing 16M below 4G. |
@@ -2888,28 +2967,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
2888 | vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); | 2967 | vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); |
2889 | update_exception_bitmap(vcpu); | 2968 | update_exception_bitmap(vcpu); |
2890 | 2969 | ||
2891 | if (emulate_invalid_guest_state) | 2970 | fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); |
2892 | goto continue_rmode; | 2971 | fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); |
2893 | 2972 | fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); | |
2894 | vmx_get_segment(vcpu, &var, VCPU_SREG_SS); | 2973 | fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); |
2895 | vmx_set_segment(vcpu, &var, VCPU_SREG_SS); | 2974 | fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); |
2896 | 2975 | fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); | |
2897 | vmx_get_segment(vcpu, &var, VCPU_SREG_CS); | ||
2898 | vmx_set_segment(vcpu, &var, VCPU_SREG_CS); | ||
2899 | |||
2900 | vmx_get_segment(vcpu, &var, VCPU_SREG_ES); | ||
2901 | vmx_set_segment(vcpu, &var, VCPU_SREG_ES); | ||
2902 | |||
2903 | vmx_get_segment(vcpu, &var, VCPU_SREG_DS); | ||
2904 | vmx_set_segment(vcpu, &var, VCPU_SREG_DS); | ||
2905 | 2976 | ||
2906 | vmx_get_segment(vcpu, &var, VCPU_SREG_GS); | ||
2907 | vmx_set_segment(vcpu, &var, VCPU_SREG_GS); | ||
2908 | |||
2909 | vmx_get_segment(vcpu, &var, VCPU_SREG_FS); | ||
2910 | vmx_set_segment(vcpu, &var, VCPU_SREG_FS); | ||
2911 | |||
2912 | continue_rmode: | ||
2913 | kvm_mmu_reset_context(vcpu); | 2977 | kvm_mmu_reset_context(vcpu); |
2914 | } | 2978 | } |
2915 | 2979 | ||
@@ -3068,17 +3132,18 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
3068 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3132 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3069 | unsigned long hw_cr0; | 3133 | unsigned long hw_cr0; |
3070 | 3134 | ||
3135 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK); | ||
3071 | if (enable_unrestricted_guest) | 3136 | if (enable_unrestricted_guest) |
3072 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST) | 3137 | hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; |
3073 | | KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; | 3138 | else { |
3074 | else | 3139 | hw_cr0 |= KVM_VM_CR0_ALWAYS_ON; |
3075 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; | ||
3076 | 3140 | ||
3077 | if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) | 3141 | if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) |
3078 | enter_pmode(vcpu); | 3142 | enter_pmode(vcpu); |
3079 | 3143 | ||
3080 | if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) | 3144 | if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) |
3081 | enter_rmode(vcpu); | 3145 | enter_rmode(vcpu); |
3146 | } | ||
3082 | 3147 | ||
3083 | #ifdef CONFIG_X86_64 | 3148 | #ifdef CONFIG_X86_64 |
3084 | if (vcpu->arch.efer & EFER_LME) { | 3149 | if (vcpu->arch.efer & EFER_LME) { |
@@ -3098,7 +3163,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
3098 | vmcs_writel(CR0_READ_SHADOW, cr0); | 3163 | vmcs_writel(CR0_READ_SHADOW, cr0); |
3099 | vmcs_writel(GUEST_CR0, hw_cr0); | 3164 | vmcs_writel(GUEST_CR0, hw_cr0); |
3100 | vcpu->arch.cr0 = cr0; | 3165 | vcpu->arch.cr0 = cr0; |
3101 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | 3166 | |
3167 | /* depends on vcpu->arch.cr0 to be set to a new value */ | ||
3168 | vmx->emulation_required = emulation_required(vcpu); | ||
3102 | } | 3169 | } |
3103 | 3170 | ||
3104 | static u64 construct_eptp(unsigned long root_hpa) | 3171 | static u64 construct_eptp(unsigned long root_hpa) |
@@ -3155,6 +3222,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
3155 | if (!is_paging(vcpu)) { | 3222 | if (!is_paging(vcpu)) { |
3156 | hw_cr4 &= ~X86_CR4_PAE; | 3223 | hw_cr4 &= ~X86_CR4_PAE; |
3157 | hw_cr4 |= X86_CR4_PSE; | 3224 | hw_cr4 |= X86_CR4_PSE; |
3225 | /* | ||
3226 | * SMEP is disabled if CPU is in non-paging mode in | ||
3227 | * hardware. However KVM always uses paging mode to | ||
3228 | * emulate guest non-paging mode with TDP. | ||
3229 | * To emulate this behavior, SMEP needs to be manually | ||
3230 | * disabled when guest switches to non-paging mode. | ||
3231 | */ | ||
3232 | hw_cr4 &= ~X86_CR4_SMEP; | ||
3158 | } else if (!(cr4 & X86_CR4_PAE)) { | 3233 | } else if (!(cr4 & X86_CR4_PAE)) { |
3159 | hw_cr4 &= ~X86_CR4_PAE; | 3234 | hw_cr4 &= ~X86_CR4_PAE; |
3160 | } | 3235 | } |
@@ -3171,10 +3246,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
3171 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3246 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3172 | u32 ar; | 3247 | u32 ar; |
3173 | 3248 | ||
3174 | if (vmx->rmode.vm86_active | 3249 | if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { |
3175 | && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES | ||
3176 | || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS | ||
3177 | || seg == VCPU_SREG_GS)) { | ||
3178 | *var = vmx->rmode.segs[seg]; | 3250 | *var = vmx->rmode.segs[seg]; |
3179 | if (seg == VCPU_SREG_TR | 3251 | if (seg == VCPU_SREG_TR |
3180 | || var->selector == vmx_read_guest_seg_selector(vmx, seg)) | 3252 | || var->selector == vmx_read_guest_seg_selector(vmx, seg)) |
@@ -3187,8 +3259,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
3187 | var->limit = vmx_read_guest_seg_limit(vmx, seg); | 3259 | var->limit = vmx_read_guest_seg_limit(vmx, seg); |
3188 | var->selector = vmx_read_guest_seg_selector(vmx, seg); | 3260 | var->selector = vmx_read_guest_seg_selector(vmx, seg); |
3189 | ar = vmx_read_guest_seg_ar(vmx, seg); | 3261 | ar = vmx_read_guest_seg_ar(vmx, seg); |
3190 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) | ||
3191 | ar = 0; | ||
3192 | var->type = ar & 15; | 3262 | var->type = ar & 15; |
3193 | var->s = (ar >> 4) & 1; | 3263 | var->s = (ar >> 4) & 1; |
3194 | var->dpl = (ar >> 5) & 3; | 3264 | var->dpl = (ar >> 5) & 3; |
@@ -3211,8 +3281,10 @@ static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) | |||
3211 | return vmx_read_guest_seg_base(to_vmx(vcpu), seg); | 3281 | return vmx_read_guest_seg_base(to_vmx(vcpu), seg); |
3212 | } | 3282 | } |
3213 | 3283 | ||
3214 | static int __vmx_get_cpl(struct kvm_vcpu *vcpu) | 3284 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) |
3215 | { | 3285 | { |
3286 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
3287 | |||
3216 | if (!is_protmode(vcpu)) | 3288 | if (!is_protmode(vcpu)) |
3217 | return 0; | 3289 | return 0; |
3218 | 3290 | ||
@@ -3220,24 +3292,9 @@ static int __vmx_get_cpl(struct kvm_vcpu *vcpu) | |||
3220 | && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ | 3292 | && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ |
3221 | return 3; | 3293 | return 3; |
3222 | 3294 | ||
3223 | return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3; | ||
3224 | } | ||
3225 | |||
3226 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | ||
3227 | { | ||
3228 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
3229 | |||
3230 | /* | ||
3231 | * If we enter real mode with cs.sel & 3 != 0, the normal CPL calculations | ||
3232 | * fail; use the cache instead. | ||
3233 | */ | ||
3234 | if (unlikely(vmx->emulation_required && emulate_invalid_guest_state)) { | ||
3235 | return vmx->cpl; | ||
3236 | } | ||
3237 | |||
3238 | if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { | 3295 | if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { |
3239 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | 3296 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); |
3240 | vmx->cpl = __vmx_get_cpl(vcpu); | 3297 | vmx->cpl = vmx_read_guest_seg_selector(vmx, VCPU_SREG_CS) & 3; |
3241 | } | 3298 | } |
3242 | 3299 | ||
3243 | return vmx->cpl; | 3300 | return vmx->cpl; |
@@ -3269,28 +3326,23 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
3269 | { | 3326 | { |
3270 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3327 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3271 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 3328 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
3272 | u32 ar; | ||
3273 | 3329 | ||
3274 | vmx_segment_cache_clear(vmx); | 3330 | vmx_segment_cache_clear(vmx); |
3331 | if (seg == VCPU_SREG_CS) | ||
3332 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
3275 | 3333 | ||
3276 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { | 3334 | if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { |
3277 | vmcs_write16(sf->selector, var->selector); | 3335 | vmx->rmode.segs[seg] = *var; |
3278 | vmx->rmode.segs[VCPU_SREG_TR] = *var; | 3336 | if (seg == VCPU_SREG_TR) |
3279 | return; | 3337 | vmcs_write16(sf->selector, var->selector); |
3338 | else if (var->s) | ||
3339 | fix_rmode_seg(seg, &vmx->rmode.segs[seg]); | ||
3340 | goto out; | ||
3280 | } | 3341 | } |
3342 | |||
3281 | vmcs_writel(sf->base, var->base); | 3343 | vmcs_writel(sf->base, var->base); |
3282 | vmcs_write32(sf->limit, var->limit); | 3344 | vmcs_write32(sf->limit, var->limit); |
3283 | vmcs_write16(sf->selector, var->selector); | 3345 | vmcs_write16(sf->selector, var->selector); |
3284 | if (vmx->rmode.vm86_active && var->s) { | ||
3285 | vmx->rmode.segs[seg] = *var; | ||
3286 | /* | ||
3287 | * Hack real-mode segments into vm86 compatibility. | ||
3288 | */ | ||
3289 | if (var->base == 0xffff0000 && var->selector == 0xf000) | ||
3290 | vmcs_writel(sf->base, 0xf0000); | ||
3291 | ar = 0xf3; | ||
3292 | } else | ||
3293 | ar = vmx_segment_access_rights(var); | ||
3294 | 3346 | ||
3295 | /* | 3347 | /* |
3296 | * Fix the "Accessed" bit in AR field of segment registers for older | 3348 | * Fix the "Accessed" bit in AR field of segment registers for older |
@@ -3304,42 +3356,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
3304 | * kvm hack. | 3356 | * kvm hack. |
3305 | */ | 3357 | */ |
3306 | if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) | 3358 | if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) |
3307 | ar |= 0x1; /* Accessed */ | 3359 | var->type |= 0x1; /* Accessed */ |
3308 | 3360 | ||
3309 | vmcs_write32(sf->ar_bytes, ar); | 3361 | vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); |
3310 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
3311 | 3362 | ||
3312 | /* | 3363 | out: |
3313 | * Fix segments for real mode guest in hosts that don't have | 3364 | vmx->emulation_required |= emulation_required(vcpu); |
3314 | * "unrestricted_mode" or it was disabled. | ||
3315 | * This is done to allow migration of the guests from hosts with | ||
3316 | * unrestricted guest like Westmere to older host that don't have | ||
3317 | * unrestricted guest like Nehelem. | ||
3318 | */ | ||
3319 | if (vmx->rmode.vm86_active) { | ||
3320 | switch (seg) { | ||
3321 | case VCPU_SREG_CS: | ||
3322 | vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); | ||
3323 | vmcs_write32(GUEST_CS_LIMIT, 0xffff); | ||
3324 | if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000) | ||
3325 | vmcs_writel(GUEST_CS_BASE, 0xf0000); | ||
3326 | vmcs_write16(GUEST_CS_SELECTOR, | ||
3327 | vmcs_readl(GUEST_CS_BASE) >> 4); | ||
3328 | break; | ||
3329 | case VCPU_SREG_ES: | ||
3330 | case VCPU_SREG_DS: | ||
3331 | case VCPU_SREG_GS: | ||
3332 | case VCPU_SREG_FS: | ||
3333 | fix_rmode_seg(seg, &vmx->rmode.segs[seg]); | ||
3334 | break; | ||
3335 | case VCPU_SREG_SS: | ||
3336 | vmcs_write16(GUEST_SS_SELECTOR, | ||
3337 | vmcs_readl(GUEST_SS_BASE) >> 4); | ||
3338 | vmcs_write32(GUEST_SS_LIMIT, 0xffff); | ||
3339 | vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); | ||
3340 | break; | ||
3341 | } | ||
3342 | } | ||
3343 | } | 3365 | } |
3344 | 3366 | ||
3345 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 3367 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
@@ -3380,13 +3402,16 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) | |||
3380 | u32 ar; | 3402 | u32 ar; |
3381 | 3403 | ||
3382 | vmx_get_segment(vcpu, &var, seg); | 3404 | vmx_get_segment(vcpu, &var, seg); |
3405 | var.dpl = 0x3; | ||
3406 | if (seg == VCPU_SREG_CS) | ||
3407 | var.type = 0x3; | ||
3383 | ar = vmx_segment_access_rights(&var); | 3408 | ar = vmx_segment_access_rights(&var); |
3384 | 3409 | ||
3385 | if (var.base != (var.selector << 4)) | 3410 | if (var.base != (var.selector << 4)) |
3386 | return false; | 3411 | return false; |
3387 | if (var.limit < 0xffff) | 3412 | if (var.limit != 0xffff) |
3388 | return false; | 3413 | return false; |
3389 | if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3) | 3414 | if (ar != 0xf3) |
3390 | return false; | 3415 | return false; |
3391 | 3416 | ||
3392 | return true; | 3417 | return true; |
@@ -3521,6 +3546,9 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) | |||
3521 | */ | 3546 | */ |
3522 | static bool guest_state_valid(struct kvm_vcpu *vcpu) | 3547 | static bool guest_state_valid(struct kvm_vcpu *vcpu) |
3523 | { | 3548 | { |
3549 | if (enable_unrestricted_guest) | ||
3550 | return true; | ||
3551 | |||
3524 | /* real mode guest state checks */ | 3552 | /* real mode guest state checks */ |
3525 | if (!is_protmode(vcpu)) { | 3553 | if (!is_protmode(vcpu)) { |
3526 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) | 3554 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) |
@@ -3644,12 +3672,9 @@ static void seg_setup(int seg) | |||
3644 | vmcs_write16(sf->selector, 0); | 3672 | vmcs_write16(sf->selector, 0); |
3645 | vmcs_writel(sf->base, 0); | 3673 | vmcs_writel(sf->base, 0); |
3646 | vmcs_write32(sf->limit, 0xffff); | 3674 | vmcs_write32(sf->limit, 0xffff); |
3647 | if (enable_unrestricted_guest) { | 3675 | ar = 0x93; |
3648 | ar = 0x93; | 3676 | if (seg == VCPU_SREG_CS) |
3649 | if (seg == VCPU_SREG_CS) | 3677 | ar |= 0x08; /* code segment */ |
3650 | ar |= 0x08; /* code segment */ | ||
3651 | } else | ||
3652 | ar = 0xf3; | ||
3653 | 3678 | ||
3654 | vmcs_write32(sf->ar_bytes, ar); | 3679 | vmcs_write32(sf->ar_bytes, ar); |
3655 | } | 3680 | } |
@@ -3667,7 +3692,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
3667 | kvm_userspace_mem.flags = 0; | 3692 | kvm_userspace_mem.flags = 0; |
3668 | kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; | 3693 | kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; |
3669 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 3694 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
3670 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); | 3695 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); |
3671 | if (r) | 3696 | if (r) |
3672 | goto out; | 3697 | goto out; |
3673 | 3698 | ||
@@ -3697,7 +3722,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
3697 | kvm_userspace_mem.guest_phys_addr = | 3722 | kvm_userspace_mem.guest_phys_addr = |
3698 | kvm->arch.ept_identity_map_addr; | 3723 | kvm->arch.ept_identity_map_addr; |
3699 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 3724 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
3700 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); | 3725 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); |
3701 | if (r) | 3726 | if (r) |
3702 | goto out; | 3727 | goto out; |
3703 | 3728 | ||
@@ -3739,7 +3764,10 @@ static void free_vpid(struct vcpu_vmx *vmx) | |||
3739 | spin_unlock(&vmx_vpid_lock); | 3764 | spin_unlock(&vmx_vpid_lock); |
3740 | } | 3765 | } |
3741 | 3766 | ||
3742 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) | 3767 | #define MSR_TYPE_R 1 |
3768 | #define MSR_TYPE_W 2 | ||
3769 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, | ||
3770 | u32 msr, int type) | ||
3743 | { | 3771 | { |
3744 | int f = sizeof(unsigned long); | 3772 | int f = sizeof(unsigned long); |
3745 | 3773 | ||
@@ -3752,20 +3780,93 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) | |||
3752 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. | 3780 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. |
3753 | */ | 3781 | */ |
3754 | if (msr <= 0x1fff) { | 3782 | if (msr <= 0x1fff) { |
3755 | __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ | 3783 | if (type & MSR_TYPE_R) |
3756 | __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ | 3784 | /* read-low */ |
3785 | __clear_bit(msr, msr_bitmap + 0x000 / f); | ||
3786 | |||
3787 | if (type & MSR_TYPE_W) | ||
3788 | /* write-low */ | ||
3789 | __clear_bit(msr, msr_bitmap + 0x800 / f); | ||
3790 | |||
3757 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | 3791 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { |
3758 | msr &= 0x1fff; | 3792 | msr &= 0x1fff; |
3759 | __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ | 3793 | if (type & MSR_TYPE_R) |
3760 | __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ | 3794 | /* read-high */ |
3795 | __clear_bit(msr, msr_bitmap + 0x400 / f); | ||
3796 | |||
3797 | if (type & MSR_TYPE_W) | ||
3798 | /* write-high */ | ||
3799 | __clear_bit(msr, msr_bitmap + 0xc00 / f); | ||
3800 | |||
3801 | } | ||
3802 | } | ||
3803 | |||
3804 | static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, | ||
3805 | u32 msr, int type) | ||
3806 | { | ||
3807 | int f = sizeof(unsigned long); | ||
3808 | |||
3809 | if (!cpu_has_vmx_msr_bitmap()) | ||
3810 | return; | ||
3811 | |||
3812 | /* | ||
3813 | * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals | ||
3814 | * have the write-low and read-high bitmap offsets the wrong way round. | ||
3815 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. | ||
3816 | */ | ||
3817 | if (msr <= 0x1fff) { | ||
3818 | if (type & MSR_TYPE_R) | ||
3819 | /* read-low */ | ||
3820 | __set_bit(msr, msr_bitmap + 0x000 / f); | ||
3821 | |||
3822 | if (type & MSR_TYPE_W) | ||
3823 | /* write-low */ | ||
3824 | __set_bit(msr, msr_bitmap + 0x800 / f); | ||
3825 | |||
3826 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | ||
3827 | msr &= 0x1fff; | ||
3828 | if (type & MSR_TYPE_R) | ||
3829 | /* read-high */ | ||
3830 | __set_bit(msr, msr_bitmap + 0x400 / f); | ||
3831 | |||
3832 | if (type & MSR_TYPE_W) | ||
3833 | /* write-high */ | ||
3834 | __set_bit(msr, msr_bitmap + 0xc00 / f); | ||
3835 | |||
3761 | } | 3836 | } |
3762 | } | 3837 | } |
3763 | 3838 | ||
3764 | static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) | 3839 | static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) |
3765 | { | 3840 | { |
3766 | if (!longmode_only) | 3841 | if (!longmode_only) |
3767 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); | 3842 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, |
3768 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); | 3843 | msr, MSR_TYPE_R | MSR_TYPE_W); |
3844 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, | ||
3845 | msr, MSR_TYPE_R | MSR_TYPE_W); | ||
3846 | } | ||
3847 | |||
3848 | static void vmx_enable_intercept_msr_read_x2apic(u32 msr) | ||
3849 | { | ||
3850 | __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
3851 | msr, MSR_TYPE_R); | ||
3852 | __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
3853 | msr, MSR_TYPE_R); | ||
3854 | } | ||
3855 | |||
3856 | static void vmx_disable_intercept_msr_read_x2apic(u32 msr) | ||
3857 | { | ||
3858 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
3859 | msr, MSR_TYPE_R); | ||
3860 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
3861 | msr, MSR_TYPE_R); | ||
3862 | } | ||
3863 | |||
3864 | static void vmx_disable_intercept_msr_write_x2apic(u32 msr) | ||
3865 | { | ||
3866 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
3867 | msr, MSR_TYPE_W); | ||
3868 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
3869 | msr, MSR_TYPE_W); | ||
3769 | } | 3870 | } |
3770 | 3871 | ||
3771 | /* | 3872 | /* |
@@ -3844,6 +3945,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) | |||
3844 | return exec_control; | 3945 | return exec_control; |
3845 | } | 3946 | } |
3846 | 3947 | ||
3948 | static int vmx_vm_has_apicv(struct kvm *kvm) | ||
3949 | { | ||
3950 | return enable_apicv_reg_vid && irqchip_in_kernel(kvm); | ||
3951 | } | ||
3952 | |||
3847 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | 3953 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) |
3848 | { | 3954 | { |
3849 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; | 3955 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; |
@@ -3861,6 +3967,10 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
3861 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 3967 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
3862 | if (!ple_gap) | 3968 | if (!ple_gap) |
3863 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 3969 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
3970 | if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) | ||
3971 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | | ||
3972 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | ||
3973 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
3864 | return exec_control; | 3974 | return exec_control; |
3865 | } | 3975 | } |
3866 | 3976 | ||
@@ -3905,6 +4015,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
3905 | vmx_secondary_exec_control(vmx)); | 4015 | vmx_secondary_exec_control(vmx)); |
3906 | } | 4016 | } |
3907 | 4017 | ||
4018 | if (enable_apicv_reg_vid) { | ||
4019 | vmcs_write64(EOI_EXIT_BITMAP0, 0); | ||
4020 | vmcs_write64(EOI_EXIT_BITMAP1, 0); | ||
4021 | vmcs_write64(EOI_EXIT_BITMAP2, 0); | ||
4022 | vmcs_write64(EOI_EXIT_BITMAP3, 0); | ||
4023 | |||
4024 | vmcs_write16(GUEST_INTR_STATUS, 0); | ||
4025 | } | ||
4026 | |||
3908 | if (ple_gap) { | 4027 | if (ple_gap) { |
3909 | vmcs_write32(PLE_GAP, ple_gap); | 4028 | vmcs_write32(PLE_GAP, ple_gap); |
3910 | vmcs_write32(PLE_WINDOW, ple_window); | 4029 | vmcs_write32(PLE_WINDOW, ple_window); |
@@ -3990,14 +4109,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
3990 | vmx_segment_cache_clear(vmx); | 4109 | vmx_segment_cache_clear(vmx); |
3991 | 4110 | ||
3992 | seg_setup(VCPU_SREG_CS); | 4111 | seg_setup(VCPU_SREG_CS); |
3993 | /* | 4112 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) |
3994 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode | ||
3995 | * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. | ||
3996 | */ | ||
3997 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) { | ||
3998 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); | 4113 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); |
3999 | vmcs_writel(GUEST_CS_BASE, 0x000f0000); | 4114 | else { |
4000 | } else { | ||
4001 | vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); | 4115 | vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); |
4002 | vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); | 4116 | vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); |
4003 | } | 4117 | } |
@@ -4073,9 +4187,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4073 | 4187 | ||
4074 | ret = 0; | 4188 | ret = 0; |
4075 | 4189 | ||
4076 | /* HACK: Don't enable emulation on guest boot/reset */ | ||
4077 | vmx->emulation_required = 0; | ||
4078 | |||
4079 | return ret; | 4190 | return ret; |
4080 | } | 4191 | } |
4081 | 4192 | ||
@@ -4251,7 +4362,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
4251 | .flags = 0, | 4362 | .flags = 0, |
4252 | }; | 4363 | }; |
4253 | 4364 | ||
4254 | ret = kvm_set_memory_region(kvm, &tss_mem, 0); | 4365 | ret = kvm_set_memory_region(kvm, &tss_mem, false); |
4255 | if (ret) | 4366 | if (ret) |
4256 | return ret; | 4367 | return ret; |
4257 | kvm->arch.tss_addr = addr; | 4368 | kvm->arch.tss_addr = addr; |
@@ -4261,28 +4372,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
4261 | return 0; | 4372 | return 0; |
4262 | } | 4373 | } |
4263 | 4374 | ||
4264 | static int handle_rmode_exception(struct kvm_vcpu *vcpu, | 4375 | static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) |
4265 | int vec, u32 err_code) | ||
4266 | { | 4376 | { |
4267 | /* | ||
4268 | * Instruction with address size override prefix opcode 0x67 | ||
4269 | * Cause the #SS fault with 0 error code in VM86 mode. | ||
4270 | */ | ||
4271 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) | ||
4272 | if (emulate_instruction(vcpu, 0) == EMULATE_DONE) | ||
4273 | return 1; | ||
4274 | /* | ||
4275 | * Forward all other exceptions that are valid in real mode. | ||
4276 | * FIXME: Breaks guest debugging in real mode, needs to be fixed with | ||
4277 | * the required debugging infrastructure rework. | ||
4278 | */ | ||
4279 | switch (vec) { | 4377 | switch (vec) { |
4280 | case DB_VECTOR: | ||
4281 | if (vcpu->guest_debug & | ||
4282 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) | ||
4283 | return 0; | ||
4284 | kvm_queue_exception(vcpu, vec); | ||
4285 | return 1; | ||
4286 | case BP_VECTOR: | 4378 | case BP_VECTOR: |
4287 | /* | 4379 | /* |
4288 | * Update instruction length as we may reinject the exception | 4380 | * Update instruction length as we may reinject the exception |
@@ -4291,7 +4383,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
4291 | to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = | 4383 | to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = |
4292 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | 4384 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
4293 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | 4385 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) |
4294 | return 0; | 4386 | return false; |
4387 | /* fall through */ | ||
4388 | case DB_VECTOR: | ||
4389 | if (vcpu->guest_debug & | ||
4390 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) | ||
4391 | return false; | ||
4295 | /* fall through */ | 4392 | /* fall through */ |
4296 | case DE_VECTOR: | 4393 | case DE_VECTOR: |
4297 | case OF_VECTOR: | 4394 | case OF_VECTOR: |
@@ -4301,10 +4398,37 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
4301 | case SS_VECTOR: | 4398 | case SS_VECTOR: |
4302 | case GP_VECTOR: | 4399 | case GP_VECTOR: |
4303 | case MF_VECTOR: | 4400 | case MF_VECTOR: |
4304 | kvm_queue_exception(vcpu, vec); | 4401 | return true; |
4305 | return 1; | 4402 | break; |
4306 | } | 4403 | } |
4307 | return 0; | 4404 | return false; |
4405 | } | ||
4406 | |||
4407 | static int handle_rmode_exception(struct kvm_vcpu *vcpu, | ||
4408 | int vec, u32 err_code) | ||
4409 | { | ||
4410 | /* | ||
4411 | * Instruction with address size override prefix opcode 0x67 | ||
4412 | * Cause the #SS fault with 0 error code in VM86 mode. | ||
4413 | */ | ||
4414 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { | ||
4415 | if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { | ||
4416 | if (vcpu->arch.halt_request) { | ||
4417 | vcpu->arch.halt_request = 0; | ||
4418 | return kvm_emulate_halt(vcpu); | ||
4419 | } | ||
4420 | return 1; | ||
4421 | } | ||
4422 | return 0; | ||
4423 | } | ||
4424 | |||
4425 | /* | ||
4426 | * Forward all other exceptions that are valid in real mode. | ||
4427 | * FIXME: Breaks guest debugging in real mode, needs to be fixed with | ||
4428 | * the required debugging infrastructure rework. | ||
4429 | */ | ||
4430 | kvm_queue_exception(vcpu, vec); | ||
4431 | return 1; | ||
4308 | } | 4432 | } |
4309 | 4433 | ||
4310 | /* | 4434 | /* |
@@ -4392,17 +4516,11 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
4392 | return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); | 4516 | return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); |
4393 | } | 4517 | } |
4394 | 4518 | ||
4395 | if (vmx->rmode.vm86_active && | ||
4396 | handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, | ||
4397 | error_code)) { | ||
4398 | if (vcpu->arch.halt_request) { | ||
4399 | vcpu->arch.halt_request = 0; | ||
4400 | return kvm_emulate_halt(vcpu); | ||
4401 | } | ||
4402 | return 1; | ||
4403 | } | ||
4404 | |||
4405 | ex_no = intr_info & INTR_INFO_VECTOR_MASK; | 4519 | ex_no = intr_info & INTR_INFO_VECTOR_MASK; |
4520 | |||
4521 | if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no)) | ||
4522 | return handle_rmode_exception(vcpu, ex_no, error_code); | ||
4523 | |||
4406 | switch (ex_no) { | 4524 | switch (ex_no) { |
4407 | case DB_VECTOR: | 4525 | case DB_VECTOR: |
4408 | dr6 = vmcs_readl(EXIT_QUALIFICATION); | 4526 | dr6 = vmcs_readl(EXIT_QUALIFICATION); |
@@ -4820,6 +4938,26 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) | |||
4820 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; | 4938 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
4821 | } | 4939 | } |
4822 | 4940 | ||
4941 | static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) | ||
4942 | { | ||
4943 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
4944 | int vector = exit_qualification & 0xff; | ||
4945 | |||
4946 | /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ | ||
4947 | kvm_apic_set_eoi_accelerated(vcpu, vector); | ||
4948 | return 1; | ||
4949 | } | ||
4950 | |||
4951 | static int handle_apic_write(struct kvm_vcpu *vcpu) | ||
4952 | { | ||
4953 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
4954 | u32 offset = exit_qualification & 0xfff; | ||
4955 | |||
4956 | /* APIC-write VM exit is trap-like and thus no need to adjust IP */ | ||
4957 | kvm_apic_write_nodecode(vcpu, offset); | ||
4958 | return 1; | ||
4959 | } | ||
4960 | |||
4823 | static int handle_task_switch(struct kvm_vcpu *vcpu) | 4961 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
4824 | { | 4962 | { |
4825 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4963 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -5065,7 +5203,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
5065 | schedule(); | 5203 | schedule(); |
5066 | } | 5204 | } |
5067 | 5205 | ||
5068 | vmx->emulation_required = !guest_state_valid(vcpu); | 5206 | vmx->emulation_required = emulation_required(vcpu); |
5069 | out: | 5207 | out: |
5070 | return ret; | 5208 | return ret; |
5071 | } | 5209 | } |
@@ -5754,6 +5892,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
5754 | [EXIT_REASON_VMON] = handle_vmon, | 5892 | [EXIT_REASON_VMON] = handle_vmon, |
5755 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, | 5893 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, |
5756 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 5894 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
5895 | [EXIT_REASON_APIC_WRITE] = handle_apic_write, | ||
5896 | [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced, | ||
5757 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 5897 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
5758 | [EXIT_REASON_XSETBV] = handle_xsetbv, | 5898 | [EXIT_REASON_XSETBV] = handle_xsetbv, |
5759 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, | 5899 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, |
@@ -5780,7 +5920,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, | |||
5780 | u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; | 5920 | u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; |
5781 | gpa_t bitmap; | 5921 | gpa_t bitmap; |
5782 | 5922 | ||
5783 | if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS)) | 5923 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) |
5784 | return 1; | 5924 | return 1; |
5785 | 5925 | ||
5786 | /* | 5926 | /* |
@@ -6008,7 +6148,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
6008 | u32 vectoring_info = vmx->idt_vectoring_info; | 6148 | u32 vectoring_info = vmx->idt_vectoring_info; |
6009 | 6149 | ||
6010 | /* If guest state is invalid, start emulating */ | 6150 | /* If guest state is invalid, start emulating */ |
6011 | if (vmx->emulation_required && emulate_invalid_guest_state) | 6151 | if (vmx->emulation_required) |
6012 | return handle_invalid_guest_state(vcpu); | 6152 | return handle_invalid_guest_state(vcpu); |
6013 | 6153 | ||
6014 | /* | 6154 | /* |
@@ -6103,6 +6243,85 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
6103 | vmcs_write32(TPR_THRESHOLD, irr); | 6243 | vmcs_write32(TPR_THRESHOLD, irr); |
6104 | } | 6244 | } |
6105 | 6245 | ||
6246 | static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | ||
6247 | { | ||
6248 | u32 sec_exec_control; | ||
6249 | |||
6250 | /* | ||
6251 | * There is not point to enable virtualize x2apic without enable | ||
6252 | * apicv | ||
6253 | */ | ||
6254 | if (!cpu_has_vmx_virtualize_x2apic_mode() || | ||
6255 | !vmx_vm_has_apicv(vcpu->kvm)) | ||
6256 | return; | ||
6257 | |||
6258 | if (!vm_need_tpr_shadow(vcpu->kvm)) | ||
6259 | return; | ||
6260 | |||
6261 | sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
6262 | |||
6263 | if (set) { | ||
6264 | sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
6265 | sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
6266 | } else { | ||
6267 | sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
6268 | sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
6269 | } | ||
6270 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); | ||
6271 | |||
6272 | vmx_set_msr_bitmap(vcpu); | ||
6273 | } | ||
6274 | |||
6275 | static void vmx_hwapic_isr_update(struct kvm *kvm, int isr) | ||
6276 | { | ||
6277 | u16 status; | ||
6278 | u8 old; | ||
6279 | |||
6280 | if (!vmx_vm_has_apicv(kvm)) | ||
6281 | return; | ||
6282 | |||
6283 | if (isr == -1) | ||
6284 | isr = 0; | ||
6285 | |||
6286 | status = vmcs_read16(GUEST_INTR_STATUS); | ||
6287 | old = status >> 8; | ||
6288 | if (isr != old) { | ||
6289 | status &= 0xff; | ||
6290 | status |= isr << 8; | ||
6291 | vmcs_write16(GUEST_INTR_STATUS, status); | ||
6292 | } | ||
6293 | } | ||
6294 | |||
6295 | static void vmx_set_rvi(int vector) | ||
6296 | { | ||
6297 | u16 status; | ||
6298 | u8 old; | ||
6299 | |||
6300 | status = vmcs_read16(GUEST_INTR_STATUS); | ||
6301 | old = (u8)status & 0xff; | ||
6302 | if ((u8)vector != old) { | ||
6303 | status &= ~0xff; | ||
6304 | status |= (u8)vector; | ||
6305 | vmcs_write16(GUEST_INTR_STATUS, status); | ||
6306 | } | ||
6307 | } | ||
6308 | |||
6309 | static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | ||
6310 | { | ||
6311 | if (max_irr == -1) | ||
6312 | return; | ||
6313 | |||
6314 | vmx_set_rvi(max_irr); | ||
6315 | } | ||
6316 | |||
6317 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | ||
6318 | { | ||
6319 | vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); | ||
6320 | vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); | ||
6321 | vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); | ||
6322 | vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); | ||
6323 | } | ||
6324 | |||
6106 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) | 6325 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) |
6107 | { | 6326 | { |
6108 | u32 exit_intr_info; | 6327 | u32 exit_intr_info; |
@@ -6291,7 +6510,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6291 | 6510 | ||
6292 | /* Don't enter VMX if guest state is invalid, let the exit handler | 6511 | /* Don't enter VMX if guest state is invalid, let the exit handler |
6293 | start emulation until we arrive back to a valid state */ | 6512 | start emulation until we arrive back to a valid state */ |
6294 | if (vmx->emulation_required && emulate_invalid_guest_state) | 6513 | if (vmx->emulation_required) |
6295 | return; | 6514 | return; |
6296 | 6515 | ||
6297 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) | 6516 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) |
@@ -7366,6 +7585,11 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
7366 | .enable_nmi_window = enable_nmi_window, | 7585 | .enable_nmi_window = enable_nmi_window, |
7367 | .enable_irq_window = enable_irq_window, | 7586 | .enable_irq_window = enable_irq_window, |
7368 | .update_cr8_intercept = update_cr8_intercept, | 7587 | .update_cr8_intercept = update_cr8_intercept, |
7588 | .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, | ||
7589 | .vm_has_apicv = vmx_vm_has_apicv, | ||
7590 | .load_eoi_exitmap = vmx_load_eoi_exitmap, | ||
7591 | .hwapic_irr_update = vmx_hwapic_irr_update, | ||
7592 | .hwapic_isr_update = vmx_hwapic_isr_update, | ||
7369 | 7593 | ||
7370 | .set_tss_addr = vmx_set_tss_addr, | 7594 | .set_tss_addr = vmx_set_tss_addr, |
7371 | .get_tdp_level = get_ept_level, | 7595 | .get_tdp_level = get_ept_level, |
@@ -7398,7 +7622,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
7398 | 7622 | ||
7399 | static int __init vmx_init(void) | 7623 | static int __init vmx_init(void) |
7400 | { | 7624 | { |
7401 | int r, i; | 7625 | int r, i, msr; |
7402 | 7626 | ||
7403 | rdmsrl_safe(MSR_EFER, &host_efer); | 7627 | rdmsrl_safe(MSR_EFER, &host_efer); |
7404 | 7628 | ||
@@ -7419,11 +7643,19 @@ static int __init vmx_init(void) | |||
7419 | if (!vmx_msr_bitmap_legacy) | 7643 | if (!vmx_msr_bitmap_legacy) |
7420 | goto out1; | 7644 | goto out1; |
7421 | 7645 | ||
7646 | vmx_msr_bitmap_legacy_x2apic = | ||
7647 | (unsigned long *)__get_free_page(GFP_KERNEL); | ||
7648 | if (!vmx_msr_bitmap_legacy_x2apic) | ||
7649 | goto out2; | ||
7422 | 7650 | ||
7423 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); | 7651 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); |
7424 | if (!vmx_msr_bitmap_longmode) | 7652 | if (!vmx_msr_bitmap_longmode) |
7425 | goto out2; | 7653 | goto out3; |
7426 | 7654 | ||
7655 | vmx_msr_bitmap_longmode_x2apic = | ||
7656 | (unsigned long *)__get_free_page(GFP_KERNEL); | ||
7657 | if (!vmx_msr_bitmap_longmode_x2apic) | ||
7658 | goto out4; | ||
7427 | 7659 | ||
7428 | /* | 7660 | /* |
7429 | * Allow direct access to the PC debug port (it is often used for I/O | 7661 | * Allow direct access to the PC debug port (it is often used for I/O |
@@ -7455,6 +7687,28 @@ static int __init vmx_init(void) | |||
7455 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); | 7687 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); |
7456 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); | 7688 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); |
7457 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | 7689 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); |
7690 | memcpy(vmx_msr_bitmap_legacy_x2apic, | ||
7691 | vmx_msr_bitmap_legacy, PAGE_SIZE); | ||
7692 | memcpy(vmx_msr_bitmap_longmode_x2apic, | ||
7693 | vmx_msr_bitmap_longmode, PAGE_SIZE); | ||
7694 | |||
7695 | if (enable_apicv_reg_vid) { | ||
7696 | for (msr = 0x800; msr <= 0x8ff; msr++) | ||
7697 | vmx_disable_intercept_msr_read_x2apic(msr); | ||
7698 | |||
7699 | /* According SDM, in x2apic mode, the whole id reg is used. | ||
7700 | * But in KVM, it only use the highest eight bits. Need to | ||
7701 | * intercept it */ | ||
7702 | vmx_enable_intercept_msr_read_x2apic(0x802); | ||
7703 | /* TMCCT */ | ||
7704 | vmx_enable_intercept_msr_read_x2apic(0x839); | ||
7705 | /* TPR */ | ||
7706 | vmx_disable_intercept_msr_write_x2apic(0x808); | ||
7707 | /* EOI */ | ||
7708 | vmx_disable_intercept_msr_write_x2apic(0x80b); | ||
7709 | /* SELF-IPI */ | ||
7710 | vmx_disable_intercept_msr_write_x2apic(0x83f); | ||
7711 | } | ||
7458 | 7712 | ||
7459 | if (enable_ept) { | 7713 | if (enable_ept) { |
7460 | kvm_mmu_set_mask_ptes(0ull, | 7714 | kvm_mmu_set_mask_ptes(0ull, |
@@ -7468,8 +7722,10 @@ static int __init vmx_init(void) | |||
7468 | 7722 | ||
7469 | return 0; | 7723 | return 0; |
7470 | 7724 | ||
7471 | out3: | 7725 | out4: |
7472 | free_page((unsigned long)vmx_msr_bitmap_longmode); | 7726 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
7727 | out3: | ||
7728 | free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); | ||
7473 | out2: | 7729 | out2: |
7474 | free_page((unsigned long)vmx_msr_bitmap_legacy); | 7730 | free_page((unsigned long)vmx_msr_bitmap_legacy); |
7475 | out1: | 7731 | out1: |
@@ -7481,6 +7737,8 @@ out: | |||
7481 | 7737 | ||
7482 | static void __exit vmx_exit(void) | 7738 | static void __exit vmx_exit(void) |
7483 | { | 7739 | { |
7740 | free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); | ||
7741 | free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); | ||
7484 | free_page((unsigned long)vmx_msr_bitmap_legacy); | 7742 | free_page((unsigned long)vmx_msr_bitmap_legacy); |
7485 | free_page((unsigned long)vmx_msr_bitmap_longmode); | 7743 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
7486 | free_page((unsigned long)vmx_io_bitmap_b); | 7744 | free_page((unsigned long)vmx_io_bitmap_b); |