aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/x86/kvm/vmx.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c724
1 files changed, 488 insertions, 236 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7bddfab12013..d48ec60ea421 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5,7 +5,7 @@
5 * machines without emulation or binary translation. 5 * machines without emulation or binary translation.
6 * 6 *
7 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright (C) 2006 Qumranet, Inc.
8 * Copyright 2010 Red Hat, Inc. and/or its affilates. 8 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
9 * 9 *
10 * Authors: 10 * Authors:
11 * Avi Kivity <avi@qumranet.com> 11 * Avi Kivity <avi@qumranet.com>
@@ -69,6 +69,9 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
69static int __read_mostly vmm_exclusive = 1; 69static int __read_mostly vmm_exclusive = 1;
70module_param(vmm_exclusive, bool, S_IRUGO); 70module_param(vmm_exclusive, bool, S_IRUGO);
71 71
72static int __read_mostly yield_on_hlt = 1;
73module_param(yield_on_hlt, bool, S_IRUGO);
74
72#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ 75#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
73 (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) 76 (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
74#define KVM_GUEST_CR0_MASK \ 77#define KVM_GUEST_CR0_MASK \
@@ -90,14 +93,14 @@ module_param(vmm_exclusive, bool, S_IRUGO);
90 * These 2 parameters are used to config the controls for Pause-Loop Exiting: 93 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
91 * ple_gap: upper bound on the amount of time between two successive 94 * ple_gap: upper bound on the amount of time between two successive
92 * executions of PAUSE in a loop. Also indicate if ple enabled. 95 * executions of PAUSE in a loop. Also indicate if ple enabled.
93 * According to test, this time is usually small than 41 cycles. 96 * According to test, this time is usually smaller than 128 cycles.
94 * ple_window: upper bound on the amount of time a guest is allowed to execute 97 * ple_window: upper bound on the amount of time a guest is allowed to execute
95 * in a PAUSE loop. Tests indicate that most spinlocks are held for 98 * in a PAUSE loop. Tests indicate that most spinlocks are held for
96 * less than 2^12 cycles 99 * less than 2^12 cycles
97 * Time is measured based on a counter that runs at the same rate as the TSC, 100 * Time is measured based on a counter that runs at the same rate as the TSC,
98 * refer SDM volume 3b section 21.6.13 & 22.1.3. 101 * refer SDM volume 3b section 21.6.13 & 22.1.3.
99 */ 102 */
100#define KVM_VMX_DEFAULT_PLE_GAP 41 103#define KVM_VMX_DEFAULT_PLE_GAP 128
101#define KVM_VMX_DEFAULT_PLE_WINDOW 4096 104#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
102static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; 105static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
103module_param(ple_gap, int, S_IRUGO); 106module_param(ple_gap, int, S_IRUGO);
@@ -125,7 +128,11 @@ struct vcpu_vmx {
125 unsigned long host_rsp; 128 unsigned long host_rsp;
126 int launched; 129 int launched;
127 u8 fail; 130 u8 fail;
131 u8 cpl;
132 bool nmi_known_unmasked;
133 u32 exit_intr_info;
128 u32 idt_vectoring_info; 134 u32 idt_vectoring_info;
135 ulong rflags;
129 struct shared_msr_entry *guest_msrs; 136 struct shared_msr_entry *guest_msrs;
130 int nmsrs; 137 int nmsrs;
131 int save_nmsrs; 138 int save_nmsrs;
@@ -154,12 +161,11 @@ struct vcpu_vmx {
154 u32 limit; 161 u32 limit;
155 u32 ar; 162 u32 ar;
156 } tr, es, ds, fs, gs; 163 } tr, es, ds, fs, gs;
157 struct {
158 bool pending;
159 u8 vector;
160 unsigned rip;
161 } irq;
162 } rmode; 164 } rmode;
165 struct {
166 u32 bitmask; /* 4 bits per segment (1 bit per field) */
167 struct kvm_save_segment seg[8];
168 } segment_cache;
163 int vpid; 169 int vpid;
164 bool emulation_required; 170 bool emulation_required;
165 171
@@ -172,15 +178,25 @@ struct vcpu_vmx {
172 bool rdtscp_enabled; 178 bool rdtscp_enabled;
173}; 179};
174 180
181enum segment_cache_field {
182 SEG_FIELD_SEL = 0,
183 SEG_FIELD_BASE = 1,
184 SEG_FIELD_LIMIT = 2,
185 SEG_FIELD_AR = 3,
186
187 SEG_FIELD_NR = 4
188};
189
175static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 190static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
176{ 191{
177 return container_of(vcpu, struct vcpu_vmx, vcpu); 192 return container_of(vcpu, struct vcpu_vmx, vcpu);
178} 193}
179 194
180static int init_rmode(struct kvm *kvm);
181static u64 construct_eptp(unsigned long root_hpa); 195static u64 construct_eptp(unsigned long root_hpa);
182static void kvm_cpu_vmxon(u64 addr); 196static void kvm_cpu_vmxon(u64 addr);
183static void kvm_cpu_vmxoff(void); 197static void kvm_cpu_vmxoff(void);
198static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
199static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
184 200
185static DEFINE_PER_CPU(struct vmcs *, vmxarea); 201static DEFINE_PER_CPU(struct vmcs *, vmxarea);
186static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 202static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -192,6 +208,8 @@ static unsigned long *vmx_io_bitmap_b;
192static unsigned long *vmx_msr_bitmap_legacy; 208static unsigned long *vmx_msr_bitmap_legacy;
193static unsigned long *vmx_msr_bitmap_longmode; 209static unsigned long *vmx_msr_bitmap_longmode;
194 210
211static bool cpu_has_load_ia32_efer;
212
195static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); 213static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
196static DEFINE_SPINLOCK(vmx_vpid_lock); 214static DEFINE_SPINLOCK(vmx_vpid_lock);
197 215
@@ -476,7 +494,7 @@ static void vmcs_clear(struct vmcs *vmcs)
476 u8 error; 494 u8 error;
477 495
478 asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" 496 asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0"
479 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) 497 : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
480 : "cc", "memory"); 498 : "cc", "memory");
481 if (error) 499 if (error)
482 printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", 500 printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
@@ -489,7 +507,7 @@ static void vmcs_load(struct vmcs *vmcs)
489 u8 error; 507 u8 error;
490 508
491 asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" 509 asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
492 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) 510 : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
493 : "cc", "memory"); 511 : "cc", "memory");
494 if (error) 512 if (error)
495 printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", 513 printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
@@ -505,7 +523,6 @@ static void __vcpu_clear(void *arg)
505 vmcs_clear(vmx->vmcs); 523 vmcs_clear(vmx->vmcs);
506 if (per_cpu(current_vmcs, cpu) == vmx->vmcs) 524 if (per_cpu(current_vmcs, cpu) == vmx->vmcs)
507 per_cpu(current_vmcs, cpu) = NULL; 525 per_cpu(current_vmcs, cpu) = NULL;
508 rdtscll(vmx->vcpu.arch.host_tsc);
509 list_del(&vmx->local_vcpus_link); 526 list_del(&vmx->local_vcpus_link);
510 vmx->vcpu.cpu = -1; 527 vmx->vcpu.cpu = -1;
511 vmx->launched = 0; 528 vmx->launched = 0;
@@ -570,10 +587,10 @@ static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
570 587
571static unsigned long vmcs_readl(unsigned long field) 588static unsigned long vmcs_readl(unsigned long field)
572{ 589{
573 unsigned long value; 590 unsigned long value = 0;
574 591
575 asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) 592 asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX)
576 : "=a"(value) : "d"(field) : "cc"); 593 : "+a"(value) : "d"(field) : "cc");
577 return value; 594 return value;
578} 595}
579 596
@@ -642,6 +659,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask)
642 vmcs_writel(field, vmcs_readl(field) | mask); 659 vmcs_writel(field, vmcs_readl(field) | mask);
643} 660}
644 661
662static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
663{
664 vmx->segment_cache.bitmask = 0;
665}
666
667static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
668 unsigned field)
669{
670 bool ret;
671 u32 mask = 1 << (seg * SEG_FIELD_NR + field);
672
673 if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
674 vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
675 vmx->segment_cache.bitmask = 0;
676 }
677 ret = vmx->segment_cache.bitmask & mask;
678 vmx->segment_cache.bitmask |= mask;
679 return ret;
680}
681
682static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
683{
684 u16 *p = &vmx->segment_cache.seg[seg].selector;
685
686 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
687 *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
688 return *p;
689}
690
691static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
692{
693 ulong *p = &vmx->segment_cache.seg[seg].base;
694
695 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
696 *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
697 return *p;
698}
699
700static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
701{
702 u32 *p = &vmx->segment_cache.seg[seg].limit;
703
704 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
705 *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
706 return *p;
707}
708
709static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
710{
711 u32 *p = &vmx->segment_cache.seg[seg].ar;
712
713 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
714 *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
715 return *p;
716}
717
645static void update_exception_bitmap(struct kvm_vcpu *vcpu) 718static void update_exception_bitmap(struct kvm_vcpu *vcpu)
646{ 719{
647 u32 eb; 720 u32 eb;
@@ -666,6 +739,12 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
666 unsigned i; 739 unsigned i;
667 struct msr_autoload *m = &vmx->msr_autoload; 740 struct msr_autoload *m = &vmx->msr_autoload;
668 741
742 if (msr == MSR_EFER && cpu_has_load_ia32_efer) {
743 vmcs_clear_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER);
744 vmcs_clear_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER);
745 return;
746 }
747
669 for (i = 0; i < m->nr; ++i) 748 for (i = 0; i < m->nr; ++i)
670 if (m->guest[i].index == msr) 749 if (m->guest[i].index == msr)
671 break; 750 break;
@@ -685,6 +764,14 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
685 unsigned i; 764 unsigned i;
686 struct msr_autoload *m = &vmx->msr_autoload; 765 struct msr_autoload *m = &vmx->msr_autoload;
687 766
767 if (msr == MSR_EFER && cpu_has_load_ia32_efer) {
768 vmcs_write64(GUEST_IA32_EFER, guest_val);
769 vmcs_write64(HOST_IA32_EFER, host_val);
770 vmcs_set_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER);
771 vmcs_set_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER);
772 return;
773 }
774
688 for (i = 0; i < m->nr; ++i) 775 for (i = 0; i < m->nr; ++i)
689 if (m->guest[i].index == msr) 776 if (m->guest[i].index == msr)
690 break; 777 break;
@@ -706,11 +793,10 @@ static void reload_tss(void)
706 /* 793 /*
707 * VT restores TR but not its size. Useless. 794 * VT restores TR but not its size. Useless.
708 */ 795 */
709 struct desc_ptr gdt; 796 struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
710 struct desc_struct *descs; 797 struct desc_struct *descs;
711 798
712 native_store_gdt(&gdt); 799 descs = (void *)gdt->address;
713 descs = (void *)gdt.address;
714 descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ 800 descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
715 load_TR_desc(); 801 load_TR_desc();
716} 802}
@@ -753,7 +839,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
753 839
754static unsigned long segment_base(u16 selector) 840static unsigned long segment_base(u16 selector)
755{ 841{
756 struct desc_ptr gdt; 842 struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
757 struct desc_struct *d; 843 struct desc_struct *d;
758 unsigned long table_base; 844 unsigned long table_base;
759 unsigned long v; 845 unsigned long v;
@@ -761,8 +847,7 @@ static unsigned long segment_base(u16 selector)
761 if (!(selector & ~3)) 847 if (!(selector & ~3))
762 return 0; 848 return 0;
763 849
764 native_store_gdt(&gdt); 850 table_base = gdt->address;
765 table_base = gdt.address;
766 851
767 if (selector & 4) { /* from ldt */ 852 if (selector & 4) { /* from ldt */
768 u16 ldt_selector = kvm_read_ldt(); 853 u16 ldt_selector = kvm_read_ldt();
@@ -828,10 +913,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
828#endif 913#endif
829 914
830#ifdef CONFIG_X86_64 915#ifdef CONFIG_X86_64
831 if (is_long_mode(&vmx->vcpu)) { 916 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
832 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); 917 if (is_long_mode(&vmx->vcpu))
833 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); 918 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
834 }
835#endif 919#endif
836 for (i = 0; i < vmx->save_nmsrs; ++i) 920 for (i = 0; i < vmx->save_nmsrs; ++i)
837 kvm_set_shared_msr(vmx->guest_msrs[i].index, 921 kvm_set_shared_msr(vmx->guest_msrs[i].index,
@@ -846,23 +930,23 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
846 930
847 ++vmx->vcpu.stat.host_state_reload; 931 ++vmx->vcpu.stat.host_state_reload;
848 vmx->host_state.loaded = 0; 932 vmx->host_state.loaded = 0;
849 if (vmx->host_state.fs_reload_needed) 933#ifdef CONFIG_X86_64
850 loadsegment(fs, vmx->host_state.fs_sel); 934 if (is_long_mode(&vmx->vcpu))
935 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
936#endif
851 if (vmx->host_state.gs_ldt_reload_needed) { 937 if (vmx->host_state.gs_ldt_reload_needed) {
852 kvm_load_ldt(vmx->host_state.ldt_sel); 938 kvm_load_ldt(vmx->host_state.ldt_sel);
853#ifdef CONFIG_X86_64 939#ifdef CONFIG_X86_64
854 load_gs_index(vmx->host_state.gs_sel); 940 load_gs_index(vmx->host_state.gs_sel);
855 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
856#else 941#else
857 loadsegment(gs, vmx->host_state.gs_sel); 942 loadsegment(gs, vmx->host_state.gs_sel);
858#endif 943#endif
859 } 944 }
945 if (vmx->host_state.fs_reload_needed)
946 loadsegment(fs, vmx->host_state.fs_sel);
860 reload_tss(); 947 reload_tss();
861#ifdef CONFIG_X86_64 948#ifdef CONFIG_X86_64
862 if (is_long_mode(&vmx->vcpu)) { 949 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
863 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
864 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
865 }
866#endif 950#endif
867 if (current_thread_info()->status & TS_USEDFPU) 951 if (current_thread_info()->status & TS_USEDFPU)
868 clts(); 952 clts();
@@ -883,7 +967,6 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
883static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 967static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
884{ 968{
885 struct vcpu_vmx *vmx = to_vmx(vcpu); 969 struct vcpu_vmx *vmx = to_vmx(vcpu);
886 u64 tsc_this, delta, new_offset;
887 u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); 970 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
888 971
889 if (!vmm_exclusive) 972 if (!vmm_exclusive)
@@ -897,37 +980,24 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
897 } 980 }
898 981
899 if (vcpu->cpu != cpu) { 982 if (vcpu->cpu != cpu) {
900 struct desc_ptr dt; 983 struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
901 unsigned long sysenter_esp; 984 unsigned long sysenter_esp;
902 985
903 kvm_migrate_timers(vcpu);
904 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 986 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
905 local_irq_disable(); 987 local_irq_disable();
906 list_add(&vmx->local_vcpus_link, 988 list_add(&vmx->local_vcpus_link,
907 &per_cpu(vcpus_on_cpu, cpu)); 989 &per_cpu(vcpus_on_cpu, cpu));
908 local_irq_enable(); 990 local_irq_enable();
909 991
910 vcpu->cpu = cpu;
911 /* 992 /*
912 * Linux uses per-cpu TSS and GDT, so set these when switching 993 * Linux uses per-cpu TSS and GDT, so set these when switching
913 * processors. 994 * processors.
914 */ 995 */
915 vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ 996 vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
916 native_store_gdt(&dt); 997 vmcs_writel(HOST_GDTR_BASE, gdt->address); /* 22.2.4 */
917 vmcs_writel(HOST_GDTR_BASE, dt.address); /* 22.2.4 */
918 998
919 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); 999 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
920 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ 1000 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
921
922 /*
923 * Make sure the time stamp counter is monotonous.
924 */
925 rdtscll(tsc_this);
926 if (tsc_this < vcpu->arch.host_tsc) {
927 delta = vcpu->arch.host_tsc - tsc_this;
928 new_offset = vmcs_read64(TSC_OFFSET) + delta;
929 vmcs_write64(TSC_OFFSET, new_offset);
930 }
931 } 1001 }
932} 1002}
933 1003
@@ -972,17 +1042,24 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
972{ 1042{
973 unsigned long rflags, save_rflags; 1043 unsigned long rflags, save_rflags;
974 1044
975 rflags = vmcs_readl(GUEST_RFLAGS); 1045 if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
976 if (to_vmx(vcpu)->rmode.vm86_active) { 1046 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
977 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; 1047 rflags = vmcs_readl(GUEST_RFLAGS);
978 save_rflags = to_vmx(vcpu)->rmode.save_rflags; 1048 if (to_vmx(vcpu)->rmode.vm86_active) {
979 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; 1049 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
1050 save_rflags = to_vmx(vcpu)->rmode.save_rflags;
1051 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
1052 }
1053 to_vmx(vcpu)->rflags = rflags;
980 } 1054 }
981 return rflags; 1055 return to_vmx(vcpu)->rflags;
982} 1056}
983 1057
984static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 1058static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
985{ 1059{
1060 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
1061 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
1062 to_vmx(vcpu)->rflags = rflags;
986 if (to_vmx(vcpu)->rmode.vm86_active) { 1063 if (to_vmx(vcpu)->rmode.vm86_active) {
987 to_vmx(vcpu)->rmode.save_rflags = rflags; 1064 to_vmx(vcpu)->rmode.save_rflags = rflags;
988 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 1065 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
@@ -1031,6 +1108,17 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
1031 vmx_set_interrupt_shadow(vcpu, 0); 1108 vmx_set_interrupt_shadow(vcpu, 0);
1032} 1109}
1033 1110
1111static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
1112{
1113 /* Ensure that we clear the HLT state in the VMCS. We don't need to
1114 * explicitly skip the instruction because if the HLT state is set, then
1115 * the instruction is already executing and RIP has already been
1116 * advanced. */
1117 if (!yield_on_hlt &&
1118 vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
1119 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
1120}
1121
1034static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, 1122static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
1035 bool has_error_code, u32 error_code, 1123 bool has_error_code, u32 error_code,
1036 bool reinject) 1124 bool reinject)
@@ -1044,16 +1132,11 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
1044 } 1132 }
1045 1133
1046 if (vmx->rmode.vm86_active) { 1134 if (vmx->rmode.vm86_active) {
1047 vmx->rmode.irq.pending = true; 1135 int inc_eip = 0;
1048 vmx->rmode.irq.vector = nr;
1049 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
1050 if (kvm_exception_is_soft(nr)) 1136 if (kvm_exception_is_soft(nr))
1051 vmx->rmode.irq.rip += 1137 inc_eip = vcpu->arch.event_exit_inst_len;
1052 vmx->vcpu.arch.event_exit_inst_len; 1138 if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
1053 intr_info |= INTR_TYPE_SOFT_INTR; 1139 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
1054 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
1055 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
1056 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
1057 return; 1140 return;
1058 } 1141 }
1059 1142
@@ -1065,6 +1148,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
1065 intr_info |= INTR_TYPE_HARD_EXCEPTION; 1148 intr_info |= INTR_TYPE_HARD_EXCEPTION;
1066 1149
1067 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); 1150 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
1151 vmx_clear_hlt(vcpu);
1068} 1152}
1069 1153
1070static bool vmx_rdtscp_supported(void) 1154static bool vmx_rdtscp_supported(void)
@@ -1149,12 +1233,32 @@ static u64 guest_read_tsc(void)
1149} 1233}
1150 1234
1151/* 1235/*
1152 * writes 'guest_tsc' into guest's timestamp counter "register" 1236 * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ
1153 * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc 1237 * ioctl. In this case the call-back should update internal vmx state to make
1238 * the changes effective.
1239 */
1240static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
1241{
1242 /* Nothing to do here */
1243}
1244
1245/*
1246 * writes 'offset' into guest's timestamp counter offset register
1154 */ 1247 */
1155static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) 1248static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1156{ 1249{
1157 vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); 1250 vmcs_write64(TSC_OFFSET, offset);
1251}
1252
1253static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
1254{
1255 u64 offset = vmcs_read64(TSC_OFFSET);
1256 vmcs_write64(TSC_OFFSET, offset + adjustment);
1257}
1258
1259static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
1260{
1261 return target_tsc - native_read_tsc();
1158} 1262}
1159 1263
1160/* 1264/*
@@ -1227,7 +1331,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1227{ 1331{
1228 struct vcpu_vmx *vmx = to_vmx(vcpu); 1332 struct vcpu_vmx *vmx = to_vmx(vcpu);
1229 struct shared_msr_entry *msr; 1333 struct shared_msr_entry *msr;
1230 u64 host_tsc;
1231 int ret = 0; 1334 int ret = 0;
1232 1335
1233 switch (msr_index) { 1336 switch (msr_index) {
@@ -1237,9 +1340,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1237 break; 1340 break;
1238#ifdef CONFIG_X86_64 1341#ifdef CONFIG_X86_64
1239 case MSR_FS_BASE: 1342 case MSR_FS_BASE:
1343 vmx_segment_cache_clear(vmx);
1240 vmcs_writel(GUEST_FS_BASE, data); 1344 vmcs_writel(GUEST_FS_BASE, data);
1241 break; 1345 break;
1242 case MSR_GS_BASE: 1346 case MSR_GS_BASE:
1347 vmx_segment_cache_clear(vmx);
1243 vmcs_writel(GUEST_GS_BASE, data); 1348 vmcs_writel(GUEST_GS_BASE, data);
1244 break; 1349 break;
1245 case MSR_KERNEL_GS_BASE: 1350 case MSR_KERNEL_GS_BASE:
@@ -1257,8 +1362,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1257 vmcs_writel(GUEST_SYSENTER_ESP, data); 1362 vmcs_writel(GUEST_SYSENTER_ESP, data);
1258 break; 1363 break;
1259 case MSR_IA32_TSC: 1364 case MSR_IA32_TSC:
1260 rdtscll(host_tsc); 1365 kvm_write_tsc(vcpu, data);
1261 guest_write_tsc(data, host_tsc);
1262 break; 1366 break;
1263 case MSR_IA32_CR_PAT: 1367 case MSR_IA32_CR_PAT:
1264 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { 1368 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
@@ -1328,16 +1432,25 @@ static __init int vmx_disabled_by_bios(void)
1328 1432
1329 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); 1433 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1330 if (msr & FEATURE_CONTROL_LOCKED) { 1434 if (msr & FEATURE_CONTROL_LOCKED) {
1435 /* launched w/ TXT and VMX disabled */
1331 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) 1436 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
1332 && tboot_enabled()) 1437 && tboot_enabled())
1333 return 1; 1438 return 1;
1439 /* launched w/o TXT and VMX only enabled w/ TXT */
1440 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
1441 && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
1442 && !tboot_enabled()) {
1443 printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
1444 "activate TXT before enabling KVM\n");
1445 return 1;
1446 }
1447 /* launched w/o TXT and VMX disabled */
1334 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) 1448 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
1335 && !tboot_enabled()) 1449 && !tboot_enabled())
1336 return 1; 1450 return 1;
1337 } 1451 }
1338 1452
1339 return 0; 1453 return 0;
1340 /* locked but not enabled */
1341} 1454}
1342 1455
1343static void kvm_cpu_vmxon(u64 addr) 1456static void kvm_cpu_vmxon(u64 addr)
@@ -1427,6 +1540,14 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
1427 return 0; 1540 return 0;
1428} 1541}
1429 1542
1543static __init bool allow_1_setting(u32 msr, u32 ctl)
1544{
1545 u32 vmx_msr_low, vmx_msr_high;
1546
1547 rdmsr(msr, vmx_msr_low, vmx_msr_high);
1548 return vmx_msr_high & ctl;
1549}
1550
1430static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) 1551static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1431{ 1552{
1432 u32 vmx_msr_low, vmx_msr_high; 1553 u32 vmx_msr_low, vmx_msr_high;
@@ -1443,7 +1564,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1443 &_pin_based_exec_control) < 0) 1564 &_pin_based_exec_control) < 0)
1444 return -EIO; 1565 return -EIO;
1445 1566
1446 min = CPU_BASED_HLT_EXITING | 1567 min =
1447#ifdef CONFIG_X86_64 1568#ifdef CONFIG_X86_64
1448 CPU_BASED_CR8_LOAD_EXITING | 1569 CPU_BASED_CR8_LOAD_EXITING |
1449 CPU_BASED_CR8_STORE_EXITING | 1570 CPU_BASED_CR8_STORE_EXITING |
@@ -1456,6 +1577,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1456 CPU_BASED_MWAIT_EXITING | 1577 CPU_BASED_MWAIT_EXITING |
1457 CPU_BASED_MONITOR_EXITING | 1578 CPU_BASED_MONITOR_EXITING |
1458 CPU_BASED_INVLPG_EXITING; 1579 CPU_BASED_INVLPG_EXITING;
1580
1581 if (yield_on_hlt)
1582 min |= CPU_BASED_HLT_EXITING;
1583
1459 opt = CPU_BASED_TPR_SHADOW | 1584 opt = CPU_BASED_TPR_SHADOW |
1460 CPU_BASED_USE_MSR_BITMAPS | 1585 CPU_BASED_USE_MSR_BITMAPS |
1461 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; 1586 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -1537,6 +1662,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1537 vmcs_conf->vmexit_ctrl = _vmexit_control; 1662 vmcs_conf->vmexit_ctrl = _vmexit_control;
1538 vmcs_conf->vmentry_ctrl = _vmentry_control; 1663 vmcs_conf->vmentry_ctrl = _vmentry_control;
1539 1664
1665 cpu_has_load_ia32_efer =
1666 allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
1667 VM_ENTRY_LOAD_IA32_EFER)
1668 && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
1669 VM_EXIT_LOAD_IA32_EFER);
1670
1540 return 0; 1671 return 0;
1541} 1672}
1542 1673
@@ -1657,6 +1788,9 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1657 vmx->emulation_required = 1; 1788 vmx->emulation_required = 1;
1658 vmx->rmode.vm86_active = 0; 1789 vmx->rmode.vm86_active = 0;
1659 1790
1791 vmx_segment_cache_clear(vmx);
1792
1793 vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector);
1660 vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); 1794 vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base);
1661 vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); 1795 vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit);
1662 vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); 1796 vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
@@ -1679,6 +1813,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1679 fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); 1813 fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs);
1680 fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); 1814 fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs);
1681 1815
1816 vmx_segment_cache_clear(vmx);
1817
1682 vmcs_write16(GUEST_SS_SELECTOR, 0); 1818 vmcs_write16(GUEST_SS_SELECTOR, 0);
1683 vmcs_write32(GUEST_SS_AR_BYTES, 0x93); 1819 vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
1684 1820
@@ -1710,9 +1846,13 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
1710 save->limit = vmcs_read32(sf->limit); 1846 save->limit = vmcs_read32(sf->limit);
1711 save->ar = vmcs_read32(sf->ar_bytes); 1847 save->ar = vmcs_read32(sf->ar_bytes);
1712 vmcs_write16(sf->selector, save->base >> 4); 1848 vmcs_write16(sf->selector, save->base >> 4);
1713 vmcs_write32(sf->base, save->base & 0xfffff); 1849 vmcs_write32(sf->base, save->base & 0xffff0);
1714 vmcs_write32(sf->limit, 0xffff); 1850 vmcs_write32(sf->limit, 0xffff);
1715 vmcs_write32(sf->ar_bytes, 0xf3); 1851 vmcs_write32(sf->ar_bytes, 0xf3);
1852 if (save->base & 0xf)
1853 printk_once(KERN_WARNING "kvm: segment base is not paragraph"
1854 " aligned when entering protected mode (seg=%d)",
1855 seg);
1716} 1856}
1717 1857
1718static void enter_rmode(struct kvm_vcpu *vcpu) 1858static void enter_rmode(struct kvm_vcpu *vcpu)
@@ -1726,6 +1866,21 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1726 vmx->emulation_required = 1; 1866 vmx->emulation_required = 1;
1727 vmx->rmode.vm86_active = 1; 1867 vmx->rmode.vm86_active = 1;
1728 1868
1869 /*
1870 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
1871 * vcpu. Call it here with phys address pointing 16M below 4G.
1872 */
1873 if (!vcpu->kvm->arch.tss_addr) {
1874 printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
1875 "called before entering vcpu\n");
1876 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
1877 vmx_set_tss_addr(vcpu->kvm, 0xfeffd000);
1878 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1879 }
1880
1881 vmx_segment_cache_clear(vmx);
1882
1883 vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR);
1729 vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); 1884 vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
1730 vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); 1885 vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
1731 1886
@@ -1764,7 +1919,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1764 1919
1765continue_rmode: 1920continue_rmode:
1766 kvm_mmu_reset_context(vcpu); 1921 kvm_mmu_reset_context(vcpu);
1767 init_rmode(vcpu->kvm);
1768} 1922}
1769 1923
1770static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) 1924static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -1802,6 +1956,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
1802{ 1956{
1803 u32 guest_tr_ar; 1957 u32 guest_tr_ar;
1804 1958
1959 vmx_segment_cache_clear(to_vmx(vcpu));
1960
1805 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); 1961 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
1806 if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { 1962 if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
1807 printk(KERN_DEBUG "%s: tss fixup for long mode. \n", 1963 printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
@@ -1841,6 +1997,13 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
1841 vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; 1997 vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
1842} 1998}
1843 1999
2000static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
2001{
2002 if (enable_ept && is_paging(vcpu))
2003 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
2004 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
2005}
2006
1844static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 2007static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1845{ 2008{
1846 ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; 2009 ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
@@ -1856,20 +2019,20 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
1856 return; 2019 return;
1857 2020
1858 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { 2021 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
1859 vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]); 2022 vmcs_write64(GUEST_PDPTR0, vcpu->arch.mmu.pdptrs[0]);
1860 vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]); 2023 vmcs_write64(GUEST_PDPTR1, vcpu->arch.mmu.pdptrs[1]);
1861 vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]); 2024 vmcs_write64(GUEST_PDPTR2, vcpu->arch.mmu.pdptrs[2]);
1862 vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]); 2025 vmcs_write64(GUEST_PDPTR3, vcpu->arch.mmu.pdptrs[3]);
1863 } 2026 }
1864} 2027}
1865 2028
1866static void ept_save_pdptrs(struct kvm_vcpu *vcpu) 2029static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
1867{ 2030{
1868 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { 2031 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
1869 vcpu->arch.pdptrs[0] = vmcs_read64(GUEST_PDPTR0); 2032 vcpu->arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
1870 vcpu->arch.pdptrs[1] = vmcs_read64(GUEST_PDPTR1); 2033 vcpu->arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
1871 vcpu->arch.pdptrs[2] = vmcs_read64(GUEST_PDPTR2); 2034 vcpu->arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
1872 vcpu->arch.pdptrs[3] = vmcs_read64(GUEST_PDPTR3); 2035 vcpu->arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
1873 } 2036 }
1874 2037
1875 __set_bit(VCPU_EXREG_PDPTR, 2038 __set_bit(VCPU_EXREG_PDPTR,
@@ -1884,6 +2047,8 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1884 unsigned long cr0, 2047 unsigned long cr0,
1885 struct kvm_vcpu *vcpu) 2048 struct kvm_vcpu *vcpu)
1886{ 2049{
2050 if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
2051 vmx_decache_cr3(vcpu);
1887 if (!(cr0 & X86_CR0_PG)) { 2052 if (!(cr0 & X86_CR0_PG)) {
1888 /* From paging/starting to nonpaging */ 2053 /* From paging/starting to nonpaging */
1889 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 2054 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
@@ -1941,6 +2106,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1941 vmcs_writel(CR0_READ_SHADOW, cr0); 2106 vmcs_writel(CR0_READ_SHADOW, cr0);
1942 vmcs_writel(GUEST_CR0, hw_cr0); 2107 vmcs_writel(GUEST_CR0, hw_cr0);
1943 vcpu->arch.cr0 = cr0; 2108 vcpu->arch.cr0 = cr0;
2109 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
1944} 2110}
1945 2111
1946static u64 construct_eptp(unsigned long root_hpa) 2112static u64 construct_eptp(unsigned long root_hpa)
@@ -1964,7 +2130,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1964 if (enable_ept) { 2130 if (enable_ept) {
1965 eptp = construct_eptp(cr3); 2131 eptp = construct_eptp(cr3);
1966 vmcs_write64(EPT_POINTER, eptp); 2132 vmcs_write64(EPT_POINTER, eptp);
1967 guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : 2133 guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) :
1968 vcpu->kvm->arch.ept_identity_map_addr; 2134 vcpu->kvm->arch.ept_identity_map_addr;
1969 ept_load_pdptrs(vcpu); 2135 ept_load_pdptrs(vcpu);
1970 } 2136 }
@@ -1992,23 +2158,39 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1992 vmcs_writel(GUEST_CR4, hw_cr4); 2158 vmcs_writel(GUEST_CR4, hw_cr4);
1993} 2159}
1994 2160
1995static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1996{
1997 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
1998
1999 return vmcs_readl(sf->base);
2000}
2001
2002static void vmx_get_segment(struct kvm_vcpu *vcpu, 2161static void vmx_get_segment(struct kvm_vcpu *vcpu,
2003 struct kvm_segment *var, int seg) 2162 struct kvm_segment *var, int seg)
2004{ 2163{
2005 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2164 struct vcpu_vmx *vmx = to_vmx(vcpu);
2165 struct kvm_save_segment *save;
2006 u32 ar; 2166 u32 ar;
2007 2167
2008 var->base = vmcs_readl(sf->base); 2168 if (vmx->rmode.vm86_active
2009 var->limit = vmcs_read32(sf->limit); 2169 && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES
2010 var->selector = vmcs_read16(sf->selector); 2170 || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
2011 ar = vmcs_read32(sf->ar_bytes); 2171 || seg == VCPU_SREG_GS)
2172 && !emulate_invalid_guest_state) {
2173 switch (seg) {
2174 case VCPU_SREG_TR: save = &vmx->rmode.tr; break;
2175 case VCPU_SREG_ES: save = &vmx->rmode.es; break;
2176 case VCPU_SREG_DS: save = &vmx->rmode.ds; break;
2177 case VCPU_SREG_FS: save = &vmx->rmode.fs; break;
2178 case VCPU_SREG_GS: save = &vmx->rmode.gs; break;
2179 default: BUG();
2180 }
2181 var->selector = save->selector;
2182 var->base = save->base;
2183 var->limit = save->limit;
2184 ar = save->ar;
2185 if (seg == VCPU_SREG_TR
2186 || var->selector == vmx_read_guest_seg_selector(vmx, seg))
2187 goto use_saved_rmode_seg;
2188 }
2189 var->base = vmx_read_guest_seg_base(vmx, seg);
2190 var->limit = vmx_read_guest_seg_limit(vmx, seg);
2191 var->selector = vmx_read_guest_seg_selector(vmx, seg);
2192 ar = vmx_read_guest_seg_ar(vmx, seg);
2193use_saved_rmode_seg:
2012 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) 2194 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
2013 ar = 0; 2195 ar = 0;
2014 var->type = ar & 15; 2196 var->type = ar & 15;
@@ -2022,17 +2204,39 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
2022 var->unusable = (ar >> 16) & 1; 2204 var->unusable = (ar >> 16) & 1;
2023} 2205}
2024 2206
2025static int vmx_get_cpl(struct kvm_vcpu *vcpu) 2207static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
2208{
2209 struct kvm_segment s;
2210
2211 if (to_vmx(vcpu)->rmode.vm86_active) {
2212 vmx_get_segment(vcpu, &s, seg);
2213 return s.base;
2214 }
2215 return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
2216}
2217
2218static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
2026{ 2219{
2027 if (!is_protmode(vcpu)) 2220 if (!is_protmode(vcpu))
2028 return 0; 2221 return 0;
2029 2222
2030 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ 2223 if (!is_long_mode(vcpu)
2224 && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */
2031 return 3; 2225 return 3;
2032 2226
2033 return vmcs_read16(GUEST_CS_SELECTOR) & 3; 2227 return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3;
2034} 2228}
2035 2229
2230static int vmx_get_cpl(struct kvm_vcpu *vcpu)
2231{
2232 if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
2233 __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
2234 to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu);
2235 }
2236 return to_vmx(vcpu)->cpl;
2237}
2238
2239
2036static u32 vmx_segment_access_rights(struct kvm_segment *var) 2240static u32 vmx_segment_access_rights(struct kvm_segment *var)
2037{ 2241{
2038 u32 ar; 2242 u32 ar;
@@ -2062,7 +2266,10 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
2062 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2266 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2063 u32 ar; 2267 u32 ar;
2064 2268
2269 vmx_segment_cache_clear(vmx);
2270
2065 if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { 2271 if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
2272 vmcs_write16(sf->selector, var->selector);
2066 vmx->rmode.tr.selector = var->selector; 2273 vmx->rmode.tr.selector = var->selector;
2067 vmx->rmode.tr.base = var->base; 2274 vmx->rmode.tr.base = var->base;
2068 vmx->rmode.tr.limit = var->limit; 2275 vmx->rmode.tr.limit = var->limit;
@@ -2097,11 +2304,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
2097 ar |= 0x1; /* Accessed */ 2304 ar |= 0x1; /* Accessed */
2098 2305
2099 vmcs_write32(sf->ar_bytes, ar); 2306 vmcs_write32(sf->ar_bytes, ar);
2307 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
2100} 2308}
2101 2309
2102static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 2310static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
2103{ 2311{
2104 u32 ar = vmcs_read32(GUEST_CS_AR_BYTES); 2312 u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
2105 2313
2106 *db = (ar >> 14) & 1; 2314 *db = (ar >> 14) & 1;
2107 *l = (ar >> 13) & 1; 2315 *l = (ar >> 13) & 1;
@@ -2323,11 +2531,12 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)
2323 2531
2324static int init_rmode_tss(struct kvm *kvm) 2532static int init_rmode_tss(struct kvm *kvm)
2325{ 2533{
2326 gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; 2534 gfn_t fn;
2327 u16 data = 0; 2535 u16 data = 0;
2328 int ret = 0; 2536 int r, idx, ret = 0;
2329 int r;
2330 2537
2538 idx = srcu_read_lock(&kvm->srcu);
2539 fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
2331 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); 2540 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
2332 if (r < 0) 2541 if (r < 0)
2333 goto out; 2542 goto out;
@@ -2351,12 +2560,13 @@ static int init_rmode_tss(struct kvm *kvm)
2351 2560
2352 ret = 1; 2561 ret = 1;
2353out: 2562out:
2563 srcu_read_unlock(&kvm->srcu, idx);
2354 return ret; 2564 return ret;
2355} 2565}
2356 2566
2357static int init_rmode_identity_map(struct kvm *kvm) 2567static int init_rmode_identity_map(struct kvm *kvm)
2358{ 2568{
2359 int i, r, ret; 2569 int i, idx, r, ret;
2360 pfn_t identity_map_pfn; 2570 pfn_t identity_map_pfn;
2361 u32 tmp; 2571 u32 tmp;
2362 2572
@@ -2371,6 +2581,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
2371 return 1; 2581 return 1;
2372 ret = 0; 2582 ret = 0;
2373 identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; 2583 identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
2584 idx = srcu_read_lock(&kvm->srcu);
2374 r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); 2585 r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
2375 if (r < 0) 2586 if (r < 0)
2376 goto out; 2587 goto out;
@@ -2386,6 +2597,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
2386 kvm->arch.ept_identity_pagetable_done = true; 2597 kvm->arch.ept_identity_pagetable_done = true;
2387 ret = 1; 2598 ret = 1;
2388out: 2599out:
2600 srcu_read_unlock(&kvm->srcu, idx);
2389 return ret; 2601 return ret;
2390} 2602}
2391 2603
@@ -2515,7 +2727,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2515{ 2727{
2516 u32 host_sysenter_cs, msr_low, msr_high; 2728 u32 host_sysenter_cs, msr_low, msr_high;
2517 u32 junk; 2729 u32 junk;
2518 u64 host_pat, tsc_this, tsc_base; 2730 u64 host_pat;
2519 unsigned long a; 2731 unsigned long a;
2520 struct desc_ptr dt; 2732 struct desc_ptr dt;
2521 int i; 2733 int i;
@@ -2656,32 +2868,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2656 vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; 2868 vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
2657 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); 2869 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
2658 2870
2659 tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; 2871 kvm_write_tsc(&vmx->vcpu, 0);
2660 rdtscll(tsc_this);
2661 if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc)
2662 tsc_base = tsc_this;
2663
2664 guest_write_tsc(0, tsc_base);
2665 2872
2666 return 0; 2873 return 0;
2667} 2874}
2668 2875
2669static int init_rmode(struct kvm *kvm)
2670{
2671 int idx, ret = 0;
2672
2673 idx = srcu_read_lock(&kvm->srcu);
2674 if (!init_rmode_tss(kvm))
2675 goto exit;
2676 if (!init_rmode_identity_map(kvm))
2677 goto exit;
2678
2679 ret = 1;
2680exit:
2681 srcu_read_unlock(&kvm->srcu, idx);
2682 return ret;
2683}
2684
2685static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) 2876static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2686{ 2877{
2687 struct vcpu_vmx *vmx = to_vmx(vcpu); 2878 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2689,10 +2880,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2689 int ret; 2880 int ret;
2690 2881
2691 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); 2882 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
2692 if (!init_rmode(vmx->vcpu.kvm)) {
2693 ret = -ENOMEM;
2694 goto out;
2695 }
2696 2883
2697 vmx->rmode.vm86_active = 0; 2884 vmx->rmode.vm86_active = 0;
2698 2885
@@ -2709,6 +2896,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2709 if (ret != 0) 2896 if (ret != 0)
2710 goto out; 2897 goto out;
2711 2898
2899 vmx_segment_cache_clear(vmx);
2900
2712 seg_setup(VCPU_SREG_CS); 2901 seg_setup(VCPU_SREG_CS);
2713 /* 2902 /*
2714 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode 2903 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
@@ -2757,7 +2946,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2757 vmcs_writel(GUEST_IDTR_BASE, 0); 2946 vmcs_writel(GUEST_IDTR_BASE, 0);
2758 vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); 2947 vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
2759 2948
2760 vmcs_write32(GUEST_ACTIVITY_STATE, 0); 2949 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
2761 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); 2950 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
2762 vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); 2951 vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
2763 2952
@@ -2772,7 +2961,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2772 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); 2961 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
2773 if (vm_need_tpr_shadow(vmx->vcpu.kvm)) 2962 if (vm_need_tpr_shadow(vmx->vcpu.kvm))
2774 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 2963 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
2775 page_to_phys(vmx->vcpu.arch.apic->regs_page)); 2964 __pa(vmx->vcpu.arch.apic->regs));
2776 vmcs_write32(TPR_THRESHOLD, 0); 2965 vmcs_write32(TPR_THRESHOLD, 0);
2777 } 2966 }
2778 2967
@@ -2819,6 +3008,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
2819 return; 3008 return;
2820 } 3009 }
2821 3010
3011 if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
3012 enable_irq_window(vcpu);
3013 return;
3014 }
2822 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 3015 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2823 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; 3016 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
2824 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 3017 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
@@ -2834,16 +3027,11 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
2834 3027
2835 ++vcpu->stat.irq_injections; 3028 ++vcpu->stat.irq_injections;
2836 if (vmx->rmode.vm86_active) { 3029 if (vmx->rmode.vm86_active) {
2837 vmx->rmode.irq.pending = true; 3030 int inc_eip = 0;
2838 vmx->rmode.irq.vector = irq;
2839 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
2840 if (vcpu->arch.interrupt.soft) 3031 if (vcpu->arch.interrupt.soft)
2841 vmx->rmode.irq.rip += 3032 inc_eip = vcpu->arch.event_exit_inst_len;
2842 vmx->vcpu.arch.event_exit_inst_len; 3033 if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
2843 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 3034 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2844 irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK);
2845 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
2846 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
2847 return; 3035 return;
2848 } 3036 }
2849 intr = irq | INTR_INFO_VALID_MASK; 3037 intr = irq | INTR_INFO_VALID_MASK;
@@ -2854,6 +3042,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
2854 } else 3042 } else
2855 intr |= INTR_TYPE_EXT_INTR; 3043 intr |= INTR_TYPE_EXT_INTR;
2856 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); 3044 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
3045 vmx_clear_hlt(vcpu);
2857} 3046}
2858 3047
2859static void vmx_inject_nmi(struct kvm_vcpu *vcpu) 3048static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -2874,19 +3063,15 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
2874 } 3063 }
2875 3064
2876 ++vcpu->stat.nmi_injections; 3065 ++vcpu->stat.nmi_injections;
3066 vmx->nmi_known_unmasked = false;
2877 if (vmx->rmode.vm86_active) { 3067 if (vmx->rmode.vm86_active) {
2878 vmx->rmode.irq.pending = true; 3068 if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
2879 vmx->rmode.irq.vector = NMI_VECTOR; 3069 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2880 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
2881 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2882 NMI_VECTOR | INTR_TYPE_SOFT_INTR |
2883 INTR_INFO_VALID_MASK);
2884 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
2885 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
2886 return; 3070 return;
2887 } 3071 }
2888 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 3072 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2889 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); 3073 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
3074 vmx_clear_hlt(vcpu);
2890} 3075}
2891 3076
2892static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) 3077static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -2895,13 +3080,16 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
2895 return 0; 3080 return 0;
2896 3081
2897 return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3082 return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
2898 (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI)); 3083 (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
3084 | GUEST_INTR_STATE_NMI));
2899} 3085}
2900 3086
2901static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) 3087static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
2902{ 3088{
2903 if (!cpu_has_virtual_nmis()) 3089 if (!cpu_has_virtual_nmis())
2904 return to_vmx(vcpu)->soft_vnmi_blocked; 3090 return to_vmx(vcpu)->soft_vnmi_blocked;
3091 if (to_vmx(vcpu)->nmi_known_unmasked)
3092 return false;
2905 return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; 3093 return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
2906} 3094}
2907 3095
@@ -2915,6 +3103,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
2915 vmx->vnmi_blocked_time = 0; 3103 vmx->vnmi_blocked_time = 0;
2916 } 3104 }
2917 } else { 3105 } else {
3106 vmx->nmi_known_unmasked = !masked;
2918 if (masked) 3107 if (masked)
2919 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 3108 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
2920 GUEST_INTR_STATE_NMI); 3109 GUEST_INTR_STATE_NMI);
@@ -2945,6 +3134,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
2945 if (ret) 3134 if (ret)
2946 return ret; 3135 return ret;
2947 kvm->arch.tss_addr = addr; 3136 kvm->arch.tss_addr = addr;
3137 if (!init_rmode_tss(kvm))
3138 return -ENOMEM;
3139
2948 return 0; 3140 return 0;
2949} 3141}
2950 3142
@@ -2956,7 +3148,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2956 * Cause the #SS fault with 0 error code in VM86 mode. 3148 * Cause the #SS fault with 0 error code in VM86 mode.
2957 */ 3149 */
2958 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) 3150 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0)
2959 if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) 3151 if (emulate_instruction(vcpu, 0) == EMULATE_DONE)
2960 return 1; 3152 return 1;
2961 /* 3153 /*
2962 * Forward all other exceptions that are valid in real mode. 3154 * Forward all other exceptions that are valid in real mode.
@@ -3029,7 +3221,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
3029 enum emulation_result er; 3221 enum emulation_result er;
3030 3222
3031 vect_info = vmx->idt_vectoring_info; 3223 vect_info = vmx->idt_vectoring_info;
3032 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 3224 intr_info = vmx->exit_intr_info;
3033 3225
3034 if (is_machine_check(intr_info)) 3226 if (is_machine_check(intr_info))
3035 return handle_machine_check(vcpu); 3227 return handle_machine_check(vcpu);
@@ -3053,14 +3245,13 @@ static int handle_exception(struct kvm_vcpu *vcpu)
3053 } 3245 }
3054 3246
3055 if (is_invalid_opcode(intr_info)) { 3247 if (is_invalid_opcode(intr_info)) {
3056 er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD); 3248 er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
3057 if (er != EMULATE_DONE) 3249 if (er != EMULATE_DONE)
3058 kvm_queue_exception(vcpu, UD_VECTOR); 3250 kvm_queue_exception(vcpu, UD_VECTOR);
3059 return 1; 3251 return 1;
3060 } 3252 }
3061 3253
3062 error_code = 0; 3254 error_code = 0;
3063 rip = kvm_rip_read(vcpu);
3064 if (intr_info & INTR_INFO_DELIVER_CODE_MASK) 3255 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
3065 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 3256 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
3066 if (is_page_fault(intr_info)) { 3257 if (is_page_fault(intr_info)) {
@@ -3072,7 +3263,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
3072 3263
3073 if (kvm_event_needs_reinjection(vcpu)) 3264 if (kvm_event_needs_reinjection(vcpu))
3074 kvm_mmu_unprotect_page_virt(vcpu, cr2); 3265 kvm_mmu_unprotect_page_virt(vcpu, cr2);
3075 return kvm_mmu_page_fault(vcpu, cr2, error_code); 3266 return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
3076 } 3267 }
3077 3268
3078 if (vmx->rmode.vm86_active && 3269 if (vmx->rmode.vm86_active &&
@@ -3107,6 +3298,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
3107 vmx->vcpu.arch.event_exit_inst_len = 3298 vmx->vcpu.arch.event_exit_inst_len =
3108 vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 3299 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
3109 kvm_run->exit_reason = KVM_EXIT_DEBUG; 3300 kvm_run->exit_reason = KVM_EXIT_DEBUG;
3301 rip = kvm_rip_read(vcpu);
3110 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; 3302 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
3111 kvm_run->debug.arch.exception = ex_no; 3303 kvm_run->debug.arch.exception = ex_no;
3112 break; 3304 break;
@@ -3144,7 +3336,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
3144 ++vcpu->stat.io_exits; 3336 ++vcpu->stat.io_exits;
3145 3337
3146 if (string || in) 3338 if (string || in)
3147 return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; 3339 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
3148 3340
3149 port = exit_qualification >> 16; 3341 port = exit_qualification >> 16;
3150 size = (exit_qualification & 7) + 1; 3342 size = (exit_qualification & 7) + 1;
@@ -3164,14 +3356,6 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
3164 hypercall[2] = 0xc1; 3356 hypercall[2] = 0xc1;
3165} 3357}
3166 3358
3167static void complete_insn_gp(struct kvm_vcpu *vcpu, int err)
3168{
3169 if (err)
3170 kvm_inject_gp(vcpu, 0);
3171 else
3172 skip_emulated_instruction(vcpu);
3173}
3174
3175static int handle_cr(struct kvm_vcpu *vcpu) 3359static int handle_cr(struct kvm_vcpu *vcpu)
3176{ 3360{
3177 unsigned long exit_qualification, val; 3361 unsigned long exit_qualification, val;
@@ -3189,21 +3373,21 @@ static int handle_cr(struct kvm_vcpu *vcpu)
3189 switch (cr) { 3373 switch (cr) {
3190 case 0: 3374 case 0:
3191 err = kvm_set_cr0(vcpu, val); 3375 err = kvm_set_cr0(vcpu, val);
3192 complete_insn_gp(vcpu, err); 3376 kvm_complete_insn_gp(vcpu, err);
3193 return 1; 3377 return 1;
3194 case 3: 3378 case 3:
3195 err = kvm_set_cr3(vcpu, val); 3379 err = kvm_set_cr3(vcpu, val);
3196 complete_insn_gp(vcpu, err); 3380 kvm_complete_insn_gp(vcpu, err);
3197 return 1; 3381 return 1;
3198 case 4: 3382 case 4:
3199 err = kvm_set_cr4(vcpu, val); 3383 err = kvm_set_cr4(vcpu, val);
3200 complete_insn_gp(vcpu, err); 3384 kvm_complete_insn_gp(vcpu, err);
3201 return 1; 3385 return 1;
3202 case 8: { 3386 case 8: {
3203 u8 cr8_prev = kvm_get_cr8(vcpu); 3387 u8 cr8_prev = kvm_get_cr8(vcpu);
3204 u8 cr8 = kvm_register_read(vcpu, reg); 3388 u8 cr8 = kvm_register_read(vcpu, reg);
3205 kvm_set_cr8(vcpu, cr8); 3389 err = kvm_set_cr8(vcpu, cr8);
3206 skip_emulated_instruction(vcpu); 3390 kvm_complete_insn_gp(vcpu, err);
3207 if (irqchip_in_kernel(vcpu->kvm)) 3391 if (irqchip_in_kernel(vcpu->kvm))
3208 return 1; 3392 return 1;
3209 if (cr8_prev <= cr8) 3393 if (cr8_prev <= cr8)
@@ -3222,8 +3406,9 @@ static int handle_cr(struct kvm_vcpu *vcpu)
3222 case 1: /*mov from cr*/ 3406 case 1: /*mov from cr*/
3223 switch (cr) { 3407 switch (cr) {
3224 case 3: 3408 case 3:
3225 kvm_register_write(vcpu, reg, vcpu->arch.cr3); 3409 val = kvm_read_cr3(vcpu);
3226 trace_kvm_cr_read(cr, vcpu->arch.cr3); 3410 kvm_register_write(vcpu, reg, val);
3411 trace_kvm_cr_read(cr, val);
3227 skip_emulated_instruction(vcpu); 3412 skip_emulated_instruction(vcpu);
3228 return 1; 3413 return 1;
3229 case 8: 3414 case 8:
@@ -3346,6 +3531,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
3346 3531
3347static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) 3532static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
3348{ 3533{
3534 kvm_make_request(KVM_REQ_EVENT, vcpu);
3349 return 1; 3535 return 1;
3350} 3536}
3351 3537
@@ -3358,6 +3544,8 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
3358 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; 3544 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
3359 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 3545 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
3360 3546
3547 kvm_make_request(KVM_REQ_EVENT, vcpu);
3548
3361 ++vcpu->stat.irq_window_exits; 3549 ++vcpu->stat.irq_window_exits;
3362 3550
3363 /* 3551 /*
@@ -3392,6 +3580,11 @@ static int handle_vmx_insn(struct kvm_vcpu *vcpu)
3392 return 1; 3580 return 1;
3393} 3581}
3394 3582
3583static int handle_invd(struct kvm_vcpu *vcpu)
3584{
3585 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
3586}
3587
3395static int handle_invlpg(struct kvm_vcpu *vcpu) 3588static int handle_invlpg(struct kvm_vcpu *vcpu)
3396{ 3589{
3397 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 3590 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -3420,7 +3613,7 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
3420 3613
3421static int handle_apic_access(struct kvm_vcpu *vcpu) 3614static int handle_apic_access(struct kvm_vcpu *vcpu)
3422{ 3615{
3423 return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; 3616 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
3424} 3617}
3425 3618
3426static int handle_task_switch(struct kvm_vcpu *vcpu) 3619static int handle_task_switch(struct kvm_vcpu *vcpu)
@@ -3442,9 +3635,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
3442 switch (type) { 3635 switch (type) {
3443 case INTR_TYPE_NMI_INTR: 3636 case INTR_TYPE_NMI_INTR:
3444 vcpu->arch.nmi_injected = false; 3637 vcpu->arch.nmi_injected = false;
3445 if (cpu_has_virtual_nmis()) 3638 vmx_set_nmi_mask(vcpu, true);
3446 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
3447 GUEST_INTR_STATE_NMI);
3448 break; 3639 break;
3449 case INTR_TYPE_EXT_INTR: 3640 case INTR_TYPE_EXT_INTR:
3450 case INTR_TYPE_SOFT_INTR: 3641 case INTR_TYPE_SOFT_INTR:
@@ -3519,7 +3710,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
3519 3710
3520 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); 3711 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
3521 trace_kvm_page_fault(gpa, exit_qualification); 3712 trace_kvm_page_fault(gpa, exit_qualification);
3522 return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); 3713 return kvm_mmu_page_fault(vcpu, gpa, exit_qualification & 0x3, NULL, 0);
3523} 3714}
3524 3715
3525static u64 ept_rsvd_mask(u64 spte, int level) 3716static u64 ept_rsvd_mask(u64 spte, int level)
@@ -3614,6 +3805,7 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
3614 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; 3805 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
3615 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 3806 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
3616 ++vcpu->stat.nmi_window_exits; 3807 ++vcpu->stat.nmi_window_exits;
3808 kvm_make_request(KVM_REQ_EVENT, vcpu);
3617 3809
3618 return 1; 3810 return 1;
3619} 3811}
@@ -3623,9 +3815,18 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
3623 struct vcpu_vmx *vmx = to_vmx(vcpu); 3815 struct vcpu_vmx *vmx = to_vmx(vcpu);
3624 enum emulation_result err = EMULATE_DONE; 3816 enum emulation_result err = EMULATE_DONE;
3625 int ret = 1; 3817 int ret = 1;
3818 u32 cpu_exec_ctrl;
3819 bool intr_window_requested;
3820
3821 cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
3822 intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
3626 3823
3627 while (!guest_state_valid(vcpu)) { 3824 while (!guest_state_valid(vcpu)) {
3628 err = emulate_instruction(vcpu, 0, 0, 0); 3825 if (intr_window_requested
3826 && (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF))
3827 return handle_interrupt_window(&vmx->vcpu);
3828
3829 err = emulate_instruction(vcpu, 0);
3629 3830
3630 if (err == EMULATE_DO_MMIO) { 3831 if (err == EMULATE_DO_MMIO) {
3631 ret = 0; 3832 ret = 0;
@@ -3682,6 +3883,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
3682 [EXIT_REASON_MSR_WRITE] = handle_wrmsr, 3883 [EXIT_REASON_MSR_WRITE] = handle_wrmsr,
3683 [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, 3884 [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
3684 [EXIT_REASON_HLT] = handle_halt, 3885 [EXIT_REASON_HLT] = handle_halt,
3886 [EXIT_REASON_INVD] = handle_invd,
3685 [EXIT_REASON_INVLPG] = handle_invlpg, 3887 [EXIT_REASON_INVLPG] = handle_invlpg,
3686 [EXIT_REASON_VMCALL] = handle_vmcall, 3888 [EXIT_REASON_VMCALL] = handle_vmcall,
3687 [EXIT_REASON_VMCLEAR] = handle_vmx_insn, 3889 [EXIT_REASON_VMCLEAR] = handle_vmx_insn,
@@ -3709,6 +3911,12 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
3709static const int kvm_vmx_max_exit_handlers = 3911static const int kvm_vmx_max_exit_handlers =
3710 ARRAY_SIZE(kvm_vmx_exit_handlers); 3912 ARRAY_SIZE(kvm_vmx_exit_handlers);
3711 3913
3914static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
3915{
3916 *info1 = vmcs_readl(EXIT_QUALIFICATION);
3917 *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
3918}
3919
3712/* 3920/*
3713 * The guest has exited. See if we can fix it or if we need userspace 3921 * The guest has exited. See if we can fix it or if we need userspace
3714 * assistance. 3922 * assistance.
@@ -3719,17 +3927,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
3719 u32 exit_reason = vmx->exit_reason; 3927 u32 exit_reason = vmx->exit_reason;
3720 u32 vectoring_info = vmx->idt_vectoring_info; 3928 u32 vectoring_info = vmx->idt_vectoring_info;
3721 3929
3722 trace_kvm_exit(exit_reason, vcpu); 3930 trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
3723 3931
3724 /* If guest state is invalid, start emulating */ 3932 /* If guest state is invalid, start emulating */
3725 if (vmx->emulation_required && emulate_invalid_guest_state) 3933 if (vmx->emulation_required && emulate_invalid_guest_state)
3726 return handle_invalid_guest_state(vcpu); 3934 return handle_invalid_guest_state(vcpu);
3727 3935
3728 /* Access CR3 don't cause VMExit in paging mode, so we need
3729 * to sync with guest real CR3. */
3730 if (enable_ept && is_paging(vcpu))
3731 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
3732
3733 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { 3936 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
3734 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; 3937 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3735 vcpu->run->fail_entry.hardware_entry_failure_reason 3938 vcpu->run->fail_entry.hardware_entry_failure_reason
@@ -3790,23 +3993,19 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3790 vmcs_write32(TPR_THRESHOLD, irr); 3993 vmcs_write32(TPR_THRESHOLD, irr);
3791} 3994}
3792 3995
3793static void vmx_complete_interrupts(struct vcpu_vmx *vmx) 3996static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
3794{ 3997{
3795 u32 exit_intr_info; 3998 u32 exit_intr_info;
3796 u32 idt_vectoring_info = vmx->idt_vectoring_info;
3797 bool unblock_nmi;
3798 u8 vector;
3799 int type;
3800 bool idtv_info_valid;
3801 3999
3802 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 4000 if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
4001 || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI))
4002 return;
3803 4003
3804 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); 4004 vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
4005 exit_intr_info = vmx->exit_intr_info;
3805 4006
3806 /* Handle machine checks before interrupts are enabled */ 4007 /* Handle machine checks before interrupts are enabled */
3807 if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) 4008 if (is_machine_check(exit_intr_info))
3808 || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI
3809 && is_machine_check(exit_intr_info)))
3810 kvm_machine_check(); 4009 kvm_machine_check();
3811 4010
3812 /* We need to handle NMIs before interrupts are enabled */ 4011 /* We need to handle NMIs before interrupts are enabled */
@@ -3816,10 +4015,25 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3816 asm("int $2"); 4015 asm("int $2");
3817 kvm_after_handle_nmi(&vmx->vcpu); 4016 kvm_after_handle_nmi(&vmx->vcpu);
3818 } 4017 }
4018}
3819 4019
3820 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; 4020static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
4021{
4022 u32 exit_intr_info;
4023 bool unblock_nmi;
4024 u8 vector;
4025 bool idtv_info_valid;
4026
4027 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
3821 4028
3822 if (cpu_has_virtual_nmis()) { 4029 if (cpu_has_virtual_nmis()) {
4030 if (vmx->nmi_known_unmasked)
4031 return;
4032 /*
4033 * Can't use vmx->exit_intr_info since we're not sure what
4034 * the exit reason is.
4035 */
4036 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
3823 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; 4037 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
3824 vector = exit_intr_info & INTR_INFO_VECTOR_MASK; 4038 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
3825 /* 4039 /*
@@ -3836,9 +4050,25 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3836 vector != DF_VECTOR && !idtv_info_valid) 4050 vector != DF_VECTOR && !idtv_info_valid)
3837 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 4051 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
3838 GUEST_INTR_STATE_NMI); 4052 GUEST_INTR_STATE_NMI);
4053 else
4054 vmx->nmi_known_unmasked =
4055 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
4056 & GUEST_INTR_STATE_NMI);
3839 } else if (unlikely(vmx->soft_vnmi_blocked)) 4057 } else if (unlikely(vmx->soft_vnmi_blocked))
3840 vmx->vnmi_blocked_time += 4058 vmx->vnmi_blocked_time +=
3841 ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); 4059 ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
4060}
4061
4062static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
4063 u32 idt_vectoring_info,
4064 int instr_len_field,
4065 int error_code_field)
4066{
4067 u8 vector;
4068 int type;
4069 bool idtv_info_valid;
4070
4071 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
3842 4072
3843 vmx->vcpu.arch.nmi_injected = false; 4073 vmx->vcpu.arch.nmi_injected = false;
3844 kvm_clear_exception_queue(&vmx->vcpu); 4074 kvm_clear_exception_queue(&vmx->vcpu);
@@ -3847,6 +4077,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3847 if (!idtv_info_valid) 4077 if (!idtv_info_valid)
3848 return; 4078 return;
3849 4079
4080 kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
4081
3850 vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; 4082 vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
3851 type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; 4083 type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
3852 4084
@@ -3858,23 +4090,22 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3858 * Clear bit "block by NMI" before VM entry if a NMI 4090 * Clear bit "block by NMI" before VM entry if a NMI
3859 * delivery faulted. 4091 * delivery faulted.
3860 */ 4092 */
3861 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, 4093 vmx_set_nmi_mask(&vmx->vcpu, false);
3862 GUEST_INTR_STATE_NMI);
3863 break; 4094 break;
3864 case INTR_TYPE_SOFT_EXCEPTION: 4095 case INTR_TYPE_SOFT_EXCEPTION:
3865 vmx->vcpu.arch.event_exit_inst_len = 4096 vmx->vcpu.arch.event_exit_inst_len =
3866 vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 4097 vmcs_read32(instr_len_field);
3867 /* fall through */ 4098 /* fall through */
3868 case INTR_TYPE_HARD_EXCEPTION: 4099 case INTR_TYPE_HARD_EXCEPTION:
3869 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { 4100 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
3870 u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE); 4101 u32 err = vmcs_read32(error_code_field);
3871 kvm_queue_exception_e(&vmx->vcpu, vector, err); 4102 kvm_queue_exception_e(&vmx->vcpu, vector, err);
3872 } else 4103 } else
3873 kvm_queue_exception(&vmx->vcpu, vector); 4104 kvm_queue_exception(&vmx->vcpu, vector);
3874 break; 4105 break;
3875 case INTR_TYPE_SOFT_INTR: 4106 case INTR_TYPE_SOFT_INTR:
3876 vmx->vcpu.arch.event_exit_inst_len = 4107 vmx->vcpu.arch.event_exit_inst_len =
3877 vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 4108 vmcs_read32(instr_len_field);
3878 /* fall through */ 4109 /* fall through */
3879 case INTR_TYPE_EXT_INTR: 4110 case INTR_TYPE_EXT_INTR:
3880 kvm_queue_interrupt(&vmx->vcpu, vector, 4111 kvm_queue_interrupt(&vmx->vcpu, vector,
@@ -3885,27 +4116,21 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3885 } 4116 }
3886} 4117}
3887 4118
3888/* 4119static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3889 * Failure to inject an interrupt should give us the information
3890 * in IDT_VECTORING_INFO_FIELD. However, if the failure occurs
3891 * when fetching the interrupt redirection bitmap in the real-mode
3892 * tss, this doesn't happen. So we do it ourselves.
3893 */
3894static void fixup_rmode_irq(struct vcpu_vmx *vmx)
3895{ 4120{
3896 vmx->rmode.irq.pending = 0; 4121 __vmx_complete_interrupts(vmx, vmx->idt_vectoring_info,
3897 if (kvm_rip_read(&vmx->vcpu) + 1 != vmx->rmode.irq.rip) 4122 VM_EXIT_INSTRUCTION_LEN,
3898 return; 4123 IDT_VECTORING_ERROR_CODE);
3899 kvm_rip_write(&vmx->vcpu, vmx->rmode.irq.rip); 4124}
3900 if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) { 4125
3901 vmx->idt_vectoring_info &= ~VECTORING_INFO_TYPE_MASK; 4126static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
3902 vmx->idt_vectoring_info |= INTR_TYPE_EXT_INTR; 4127{
3903 return; 4128 __vmx_complete_interrupts(to_vmx(vcpu),
3904 } 4129 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
3905 vmx->idt_vectoring_info = 4130 VM_ENTRY_INSTRUCTION_LEN,
3906 VECTORING_INFO_VALID_MASK 4131 VM_ENTRY_EXCEPTION_ERROR_CODE);
3907 | INTR_TYPE_EXT_INTR 4132
3908 | vmx->rmode.irq.vector; 4133 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
3909} 4134}
3910 4135
3911#ifdef CONFIG_X86_64 4136#ifdef CONFIG_X86_64
@@ -3916,7 +4141,7 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx)
3916#define Q "l" 4141#define Q "l"
3917#endif 4142#endif
3918 4143
3919static void vmx_vcpu_run(struct kvm_vcpu *vcpu) 4144static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
3920{ 4145{
3921 struct vcpu_vmx *vmx = to_vmx(vcpu); 4146 struct vcpu_vmx *vmx = to_vmx(vcpu);
3922 4147
@@ -3945,6 +4170,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
3945 asm( 4170 asm(
3946 /* Store host registers */ 4171 /* Store host registers */
3947 "push %%"R"dx; push %%"R"bp;" 4172 "push %%"R"dx; push %%"R"bp;"
4173 "push %%"R"cx \n\t" /* placeholder for guest rcx */
3948 "push %%"R"cx \n\t" 4174 "push %%"R"cx \n\t"
3949 "cmp %%"R"sp, %c[host_rsp](%0) \n\t" 4175 "cmp %%"R"sp, %c[host_rsp](%0) \n\t"
3950 "je 1f \n\t" 4176 "je 1f \n\t"
@@ -3986,10 +4212,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
3986 ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" 4212 ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t"
3987 ".Lkvm_vmx_return: " 4213 ".Lkvm_vmx_return: "
3988 /* Save guest registers, load host registers, keep flags */ 4214 /* Save guest registers, load host registers, keep flags */
3989 "xchg %0, (%%"R"sp) \n\t" 4215 "mov %0, %c[wordsize](%%"R"sp) \n\t"
4216 "pop %0 \n\t"
3990 "mov %%"R"ax, %c[rax](%0) \n\t" 4217 "mov %%"R"ax, %c[rax](%0) \n\t"
3991 "mov %%"R"bx, %c[rbx](%0) \n\t" 4218 "mov %%"R"bx, %c[rbx](%0) \n\t"
3992 "push"Q" (%%"R"sp); pop"Q" %c[rcx](%0) \n\t" 4219 "pop"Q" %c[rcx](%0) \n\t"
3993 "mov %%"R"dx, %c[rdx](%0) \n\t" 4220 "mov %%"R"dx, %c[rdx](%0) \n\t"
3994 "mov %%"R"si, %c[rsi](%0) \n\t" 4221 "mov %%"R"si, %c[rsi](%0) \n\t"
3995 "mov %%"R"di, %c[rdi](%0) \n\t" 4222 "mov %%"R"di, %c[rdi](%0) \n\t"
@@ -4007,7 +4234,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
4007 "mov %%cr2, %%"R"ax \n\t" 4234 "mov %%cr2, %%"R"ax \n\t"
4008 "mov %%"R"ax, %c[cr2](%0) \n\t" 4235 "mov %%"R"ax, %c[cr2](%0) \n\t"
4009 4236
4010 "pop %%"R"bp; pop %%"R"bp; pop %%"R"dx \n\t" 4237 "pop %%"R"bp; pop %%"R"dx \n\t"
4011 "setbe %c[fail](%0) \n\t" 4238 "setbe %c[fail](%0) \n\t"
4012 : : "c"(vmx), "d"((unsigned long)HOST_RSP), 4239 : : "c"(vmx), "d"((unsigned long)HOST_RSP),
4013 [launched]"i"(offsetof(struct vcpu_vmx, launched)), 4240 [launched]"i"(offsetof(struct vcpu_vmx, launched)),
@@ -4030,25 +4257,32 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
4030 [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])), 4257 [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
4031 [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), 4258 [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
4032#endif 4259#endif
4033 [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)) 4260 [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
4261 [wordsize]"i"(sizeof(ulong))
4034 : "cc", "memory" 4262 : "cc", "memory"
4035 , R"bx", R"di", R"si" 4263 , R"ax", R"bx", R"di", R"si"
4036#ifdef CONFIG_X86_64 4264#ifdef CONFIG_X86_64
4037 , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" 4265 , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
4038#endif 4266#endif
4039 ); 4267 );
4040 4268
4041 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) 4269 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
4042 | (1 << VCPU_EXREG_PDPTR)); 4270 | (1 << VCPU_EXREG_RFLAGS)
4271 | (1 << VCPU_EXREG_CPL)
4272 | (1 << VCPU_EXREG_PDPTR)
4273 | (1 << VCPU_EXREG_SEGMENTS)
4274 | (1 << VCPU_EXREG_CR3));
4043 vcpu->arch.regs_dirty = 0; 4275 vcpu->arch.regs_dirty = 0;
4044 4276
4045 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 4277 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
4046 if (vmx->rmode.irq.pending)
4047 fixup_rmode_irq(vmx);
4048 4278
4049 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 4279 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
4050 vmx->launched = 1; 4280 vmx->launched = 1;
4051 4281
4282 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
4283
4284 vmx_complete_atomic_exit(vmx);
4285 vmx_recover_nmi_blocking(vmx);
4052 vmx_complete_interrupts(vmx); 4286 vmx_complete_interrupts(vmx);
4053} 4287}
4054 4288
@@ -4106,8 +4340,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
4106 goto free_vcpu; 4340 goto free_vcpu;
4107 4341
4108 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); 4342 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
4343 err = -ENOMEM;
4109 if (!vmx->guest_msrs) { 4344 if (!vmx->guest_msrs) {
4110 err = -ENOMEM;
4111 goto uninit_vcpu; 4345 goto uninit_vcpu;
4112 } 4346 }
4113 4347
@@ -4119,21 +4353,26 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
4119 4353
4120 cpu = get_cpu(); 4354 cpu = get_cpu();
4121 vmx_vcpu_load(&vmx->vcpu, cpu); 4355 vmx_vcpu_load(&vmx->vcpu, cpu);
4356 vmx->vcpu.cpu = cpu;
4122 err = vmx_vcpu_setup(vmx); 4357 err = vmx_vcpu_setup(vmx);
4123 vmx_vcpu_put(&vmx->vcpu); 4358 vmx_vcpu_put(&vmx->vcpu);
4124 put_cpu(); 4359 put_cpu();
4125 if (err) 4360 if (err)
4126 goto free_vmcs; 4361 goto free_vmcs;
4127 if (vm_need_virtualize_apic_accesses(kvm)) 4362 if (vm_need_virtualize_apic_accesses(kvm))
4128 if (alloc_apic_access_page(kvm) != 0) 4363 err = alloc_apic_access_page(kvm);
4364 if (err)
4129 goto free_vmcs; 4365 goto free_vmcs;
4130 4366
4131 if (enable_ept) { 4367 if (enable_ept) {
4132 if (!kvm->arch.ept_identity_map_addr) 4368 if (!kvm->arch.ept_identity_map_addr)
4133 kvm->arch.ept_identity_map_addr = 4369 kvm->arch.ept_identity_map_addr =
4134 VMX_EPT_IDENTITY_PAGETABLE_ADDR; 4370 VMX_EPT_IDENTITY_PAGETABLE_ADDR;
4371 err = -ENOMEM;
4135 if (alloc_identity_pagetable(kvm) != 0) 4372 if (alloc_identity_pagetable(kvm) != 0)
4136 goto free_vmcs; 4373 goto free_vmcs;
4374 if (!init_rmode_identity_map(kvm))
4375 goto free_vmcs;
4137 } 4376 }
4138 4377
4139 return &vmx->vcpu; 4378 return &vmx->vcpu;
@@ -4249,11 +4488,6 @@ static int vmx_get_lpage_level(void)
4249 return PT_PDPE_LEVEL; 4488 return PT_PDPE_LEVEL;
4250} 4489}
4251 4490
4252static inline u32 bit(int bitno)
4253{
4254 return 1 << (bitno & 31);
4255}
4256
4257static void vmx_cpuid_update(struct kvm_vcpu *vcpu) 4491static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
4258{ 4492{
4259 struct kvm_cpuid_entry2 *best; 4493 struct kvm_cpuid_entry2 *best;
@@ -4280,6 +4514,13 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
4280{ 4514{
4281} 4515}
4282 4516
4517static int vmx_check_intercept(struct kvm_vcpu *vcpu,
4518 struct x86_instruction_info *info,
4519 enum x86_intercept_stage stage)
4520{
4521 return X86EMUL_CONTINUE;
4522}
4523
4283static struct kvm_x86_ops vmx_x86_ops = { 4524static struct kvm_x86_ops vmx_x86_ops = {
4284 .cpu_has_kvm_support = cpu_has_kvm_support, 4525 .cpu_has_kvm_support = cpu_has_kvm_support,
4285 .disabled_by_bios = vmx_disabled_by_bios, 4526 .disabled_by_bios = vmx_disabled_by_bios,
@@ -4307,6 +4548,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
4307 .get_cpl = vmx_get_cpl, 4548 .get_cpl = vmx_get_cpl,
4308 .get_cs_db_l_bits = vmx_get_cs_db_l_bits, 4549 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
4309 .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, 4550 .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
4551 .decache_cr3 = vmx_decache_cr3,
4310 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, 4552 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
4311 .set_cr0 = vmx_set_cr0, 4553 .set_cr0 = vmx_set_cr0,
4312 .set_cr3 = vmx_set_cr3, 4554 .set_cr3 = vmx_set_cr3,
@@ -4334,6 +4576,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
4334 .set_irq = vmx_inject_irq, 4576 .set_irq = vmx_inject_irq,
4335 .set_nmi = vmx_inject_nmi, 4577 .set_nmi = vmx_inject_nmi,
4336 .queue_exception = vmx_queue_exception, 4578 .queue_exception = vmx_queue_exception,
4579 .cancel_injection = vmx_cancel_injection,
4337 .interrupt_allowed = vmx_interrupt_allowed, 4580 .interrupt_allowed = vmx_interrupt_allowed,
4338 .nmi_allowed = vmx_nmi_allowed, 4581 .nmi_allowed = vmx_nmi_allowed,
4339 .get_nmi_mask = vmx_get_nmi_mask, 4582 .get_nmi_mask = vmx_get_nmi_mask,
@@ -4346,7 +4589,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
4346 .get_tdp_level = get_ept_level, 4589 .get_tdp_level = get_ept_level,
4347 .get_mt_mask = vmx_get_mt_mask, 4590 .get_mt_mask = vmx_get_mt_mask,
4348 4591
4592 .get_exit_info = vmx_get_exit_info,
4349 .exit_reasons_str = vmx_exit_reasons_str, 4593 .exit_reasons_str = vmx_exit_reasons_str,
4594
4350 .get_lpage_level = vmx_get_lpage_level, 4595 .get_lpage_level = vmx_get_lpage_level,
4351 4596
4352 .cpuid_update = vmx_cpuid_update, 4597 .cpuid_update = vmx_cpuid_update,
@@ -4356,6 +4601,15 @@ static struct kvm_x86_ops vmx_x86_ops = {
4356 .set_supported_cpuid = vmx_set_supported_cpuid, 4601 .set_supported_cpuid = vmx_set_supported_cpuid,
4357 4602
4358 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, 4603 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
4604
4605 .set_tsc_khz = vmx_set_tsc_khz,
4606 .write_tsc_offset = vmx_write_tsc_offset,
4607 .adjust_tsc_offset = vmx_adjust_tsc_offset,
4608 .compute_tsc_offset = vmx_compute_tsc_offset,
4609
4610 .set_tdp_cr3 = vmx_set_cr3,
4611
4612 .check_intercept = vmx_check_intercept,
4359}; 4613};
4360 4614
4361static int __init vmx_init(void) 4615static int __init vmx_init(void)
@@ -4417,8 +4671,6 @@ static int __init vmx_init(void)
4417 4671
4418 if (enable_ept) { 4672 if (enable_ept) {
4419 bypass_guest_pf = 0; 4673 bypass_guest_pf = 0;
4420 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
4421 VMX_EPT_WRITABLE_MASK);
4422 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, 4674 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
4423 VMX_EPT_EXECUTABLE_MASK); 4675 VMX_EPT_EXECUTABLE_MASK);
4424 kvm_enable_tdp(); 4676 kvm_enable_tdp();