aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c228
1 files changed, 192 insertions, 36 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5b4cdcbd154c..4c3fa0f67469 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -128,8 +128,11 @@ struct vcpu_vmx {
128 unsigned long host_rsp; 128 unsigned long host_rsp;
129 int launched; 129 int launched;
130 u8 fail; 130 u8 fail;
131 u8 cpl;
132 bool nmi_known_unmasked;
131 u32 exit_intr_info; 133 u32 exit_intr_info;
132 u32 idt_vectoring_info; 134 u32 idt_vectoring_info;
135 ulong rflags;
133 struct shared_msr_entry *guest_msrs; 136 struct shared_msr_entry *guest_msrs;
134 int nmsrs; 137 int nmsrs;
135 int save_nmsrs; 138 int save_nmsrs;
@@ -159,6 +162,10 @@ struct vcpu_vmx {
159 u32 ar; 162 u32 ar;
160 } tr, es, ds, fs, gs; 163 } tr, es, ds, fs, gs;
161 } rmode; 164 } rmode;
165 struct {
166 u32 bitmask; /* 4 bits per segment (1 bit per field) */
167 struct kvm_save_segment seg[8];
168 } segment_cache;
162 int vpid; 169 int vpid;
163 bool emulation_required; 170 bool emulation_required;
164 171
@@ -171,6 +178,15 @@ struct vcpu_vmx {
171 bool rdtscp_enabled; 178 bool rdtscp_enabled;
172}; 179};
173 180
181enum segment_cache_field {
182 SEG_FIELD_SEL = 0,
183 SEG_FIELD_BASE = 1,
184 SEG_FIELD_LIMIT = 2,
185 SEG_FIELD_AR = 3,
186
187 SEG_FIELD_NR = 4
188};
189
174static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 190static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
175{ 191{
176 return container_of(vcpu, struct vcpu_vmx, vcpu); 192 return container_of(vcpu, struct vcpu_vmx, vcpu);
@@ -643,6 +659,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask)
643 vmcs_writel(field, vmcs_readl(field) | mask); 659 vmcs_writel(field, vmcs_readl(field) | mask);
644} 660}
645 661
662static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
663{
664 vmx->segment_cache.bitmask = 0;
665}
666
667static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
668 unsigned field)
669{
670 bool ret;
671 u32 mask = 1 << (seg * SEG_FIELD_NR + field);
672
673 if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
674 vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
675 vmx->segment_cache.bitmask = 0;
676 }
677 ret = vmx->segment_cache.bitmask & mask;
678 vmx->segment_cache.bitmask |= mask;
679 return ret;
680}
681
682static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
683{
684 u16 *p = &vmx->segment_cache.seg[seg].selector;
685
686 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
687 *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
688 return *p;
689}
690
691static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
692{
693 ulong *p = &vmx->segment_cache.seg[seg].base;
694
695 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
696 *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
697 return *p;
698}
699
700static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
701{
702 u32 *p = &vmx->segment_cache.seg[seg].limit;
703
704 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
705 *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
706 return *p;
707}
708
709static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
710{
711 u32 *p = &vmx->segment_cache.seg[seg].ar;
712
713 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
714 *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
715 return *p;
716}
717
646static void update_exception_bitmap(struct kvm_vcpu *vcpu) 718static void update_exception_bitmap(struct kvm_vcpu *vcpu)
647{ 719{
648 u32 eb; 720 u32 eb;
@@ -970,17 +1042,24 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
970{ 1042{
971 unsigned long rflags, save_rflags; 1043 unsigned long rflags, save_rflags;
972 1044
973 rflags = vmcs_readl(GUEST_RFLAGS); 1045 if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
974 if (to_vmx(vcpu)->rmode.vm86_active) { 1046 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
975 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; 1047 rflags = vmcs_readl(GUEST_RFLAGS);
976 save_rflags = to_vmx(vcpu)->rmode.save_rflags; 1048 if (to_vmx(vcpu)->rmode.vm86_active) {
977 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; 1049 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
1050 save_rflags = to_vmx(vcpu)->rmode.save_rflags;
1051 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
1052 }
1053 to_vmx(vcpu)->rflags = rflags;
978 } 1054 }
979 return rflags; 1055 return to_vmx(vcpu)->rflags;
980} 1056}
981 1057
982static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 1058static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
983{ 1059{
1060 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
1061 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
1062 to_vmx(vcpu)->rflags = rflags;
984 if (to_vmx(vcpu)->rmode.vm86_active) { 1063 if (to_vmx(vcpu)->rmode.vm86_active) {
985 to_vmx(vcpu)->rmode.save_rflags = rflags; 1064 to_vmx(vcpu)->rmode.save_rflags = rflags;
986 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 1065 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
@@ -1053,7 +1132,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
1053 } 1132 }
1054 1133
1055 if (vmx->rmode.vm86_active) { 1134 if (vmx->rmode.vm86_active) {
1056 if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE) 1135 int inc_eip = 0;
1136 if (kvm_exception_is_soft(nr))
1137 inc_eip = vcpu->arch.event_exit_inst_len;
1138 if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
1057 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); 1139 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
1058 return; 1140 return;
1059 } 1141 }
@@ -1151,6 +1233,16 @@ static u64 guest_read_tsc(void)
1151} 1233}
1152 1234
1153/* 1235/*
1236 * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ
1237 * ioctl. In this case the call-back should update internal vmx state to make
1238 * the changes effective.
1239 */
1240static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
1241{
1242 /* Nothing to do here */
1243}
1244
1245/*
1154 * writes 'offset' into guest's timestamp counter offset register 1246 * writes 'offset' into guest's timestamp counter offset register
1155 */ 1247 */
1156static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) 1248static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
@@ -1164,6 +1256,11 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
1164 vmcs_write64(TSC_OFFSET, offset + adjustment); 1256 vmcs_write64(TSC_OFFSET, offset + adjustment);
1165} 1257}
1166 1258
1259static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
1260{
1261 return target_tsc - native_read_tsc();
1262}
1263
1167/* 1264/*
1168 * Reads an msr value (of 'msr_index') into 'pdata'. 1265 * Reads an msr value (of 'msr_index') into 'pdata'.
1169 * Returns 0 on success, non-0 otherwise. 1266 * Returns 0 on success, non-0 otherwise.
@@ -1243,9 +1340,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1243 break; 1340 break;
1244#ifdef CONFIG_X86_64 1341#ifdef CONFIG_X86_64
1245 case MSR_FS_BASE: 1342 case MSR_FS_BASE:
1343 vmx_segment_cache_clear(vmx);
1246 vmcs_writel(GUEST_FS_BASE, data); 1344 vmcs_writel(GUEST_FS_BASE, data);
1247 break; 1345 break;
1248 case MSR_GS_BASE: 1346 case MSR_GS_BASE:
1347 vmx_segment_cache_clear(vmx);
1249 vmcs_writel(GUEST_GS_BASE, data); 1348 vmcs_writel(GUEST_GS_BASE, data);
1250 break; 1349 break;
1251 case MSR_KERNEL_GS_BASE: 1350 case MSR_KERNEL_GS_BASE:
@@ -1689,6 +1788,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1689 vmx->emulation_required = 1; 1788 vmx->emulation_required = 1;
1690 vmx->rmode.vm86_active = 0; 1789 vmx->rmode.vm86_active = 0;
1691 1790
1791 vmx_segment_cache_clear(vmx);
1792
1692 vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); 1793 vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector);
1693 vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); 1794 vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base);
1694 vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); 1795 vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit);
@@ -1712,6 +1813,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1712 fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); 1813 fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs);
1713 fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); 1814 fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs);
1714 1815
1816 vmx_segment_cache_clear(vmx);
1817
1715 vmcs_write16(GUEST_SS_SELECTOR, 0); 1818 vmcs_write16(GUEST_SS_SELECTOR, 0);
1716 vmcs_write32(GUEST_SS_AR_BYTES, 0x93); 1819 vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
1717 1820
@@ -1775,6 +1878,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1775 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 1878 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1776 } 1879 }
1777 1880
1881 vmx_segment_cache_clear(vmx);
1882
1778 vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); 1883 vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR);
1779 vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); 1884 vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
1780 vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); 1885 vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
@@ -1851,6 +1956,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
1851{ 1956{
1852 u32 guest_tr_ar; 1957 u32 guest_tr_ar;
1853 1958
1959 vmx_segment_cache_clear(to_vmx(vcpu));
1960
1854 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); 1961 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
1855 if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { 1962 if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
1856 printk(KERN_DEBUG "%s: tss fixup for long mode. \n", 1963 printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
@@ -1998,6 +2105,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1998 vmcs_writel(CR0_READ_SHADOW, cr0); 2105 vmcs_writel(CR0_READ_SHADOW, cr0);
1999 vmcs_writel(GUEST_CR0, hw_cr0); 2106 vmcs_writel(GUEST_CR0, hw_cr0);
2000 vcpu->arch.cr0 = cr0; 2107 vcpu->arch.cr0 = cr0;
2108 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
2001} 2109}
2002 2110
2003static u64 construct_eptp(unsigned long root_hpa) 2111static u64 construct_eptp(unsigned long root_hpa)
@@ -2053,7 +2161,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
2053 struct kvm_segment *var, int seg) 2161 struct kvm_segment *var, int seg)
2054{ 2162{
2055 struct vcpu_vmx *vmx = to_vmx(vcpu); 2163 struct vcpu_vmx *vmx = to_vmx(vcpu);
2056 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2057 struct kvm_save_segment *save; 2164 struct kvm_save_segment *save;
2058 u32 ar; 2165 u32 ar;
2059 2166
@@ -2075,13 +2182,13 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
2075 var->limit = save->limit; 2182 var->limit = save->limit;
2076 ar = save->ar; 2183 ar = save->ar;
2077 if (seg == VCPU_SREG_TR 2184 if (seg == VCPU_SREG_TR
2078 || var->selector == vmcs_read16(sf->selector)) 2185 || var->selector == vmx_read_guest_seg_selector(vmx, seg))
2079 goto use_saved_rmode_seg; 2186 goto use_saved_rmode_seg;
2080 } 2187 }
2081 var->base = vmcs_readl(sf->base); 2188 var->base = vmx_read_guest_seg_base(vmx, seg);
2082 var->limit = vmcs_read32(sf->limit); 2189 var->limit = vmx_read_guest_seg_limit(vmx, seg);
2083 var->selector = vmcs_read16(sf->selector); 2190 var->selector = vmx_read_guest_seg_selector(vmx, seg);
2084 ar = vmcs_read32(sf->ar_bytes); 2191 ar = vmx_read_guest_seg_ar(vmx, seg);
2085use_saved_rmode_seg: 2192use_saved_rmode_seg:
2086 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) 2193 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
2087 ar = 0; 2194 ar = 0;
@@ -2098,27 +2205,37 @@ use_saved_rmode_seg:
2098 2205
2099static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) 2206static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
2100{ 2207{
2101 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2102 struct kvm_segment s; 2208 struct kvm_segment s;
2103 2209
2104 if (to_vmx(vcpu)->rmode.vm86_active) { 2210 if (to_vmx(vcpu)->rmode.vm86_active) {
2105 vmx_get_segment(vcpu, &s, seg); 2211 vmx_get_segment(vcpu, &s, seg);
2106 return s.base; 2212 return s.base;
2107 } 2213 }
2108 return vmcs_readl(sf->base); 2214 return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
2109} 2215}
2110 2216
2111static int vmx_get_cpl(struct kvm_vcpu *vcpu) 2217static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
2112{ 2218{
2113 if (!is_protmode(vcpu)) 2219 if (!is_protmode(vcpu))
2114 return 0; 2220 return 0;
2115 2221
2116 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ 2222 if (!is_long_mode(vcpu)
2223 && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */
2117 return 3; 2224 return 3;
2118 2225
2119 return vmcs_read16(GUEST_CS_SELECTOR) & 3; 2226 return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3;
2120} 2227}
2121 2228
2229static int vmx_get_cpl(struct kvm_vcpu *vcpu)
2230{
2231 if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
2232 __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
2233 to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu);
2234 }
2235 return to_vmx(vcpu)->cpl;
2236}
2237
2238
2122static u32 vmx_segment_access_rights(struct kvm_segment *var) 2239static u32 vmx_segment_access_rights(struct kvm_segment *var)
2123{ 2240{
2124 u32 ar; 2241 u32 ar;
@@ -2148,6 +2265,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
2148 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2265 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2149 u32 ar; 2266 u32 ar;
2150 2267
2268 vmx_segment_cache_clear(vmx);
2269
2151 if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { 2270 if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
2152 vmcs_write16(sf->selector, var->selector); 2271 vmcs_write16(sf->selector, var->selector);
2153 vmx->rmode.tr.selector = var->selector; 2272 vmx->rmode.tr.selector = var->selector;
@@ -2184,11 +2303,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
2184 ar |= 0x1; /* Accessed */ 2303 ar |= 0x1; /* Accessed */
2185 2304
2186 vmcs_write32(sf->ar_bytes, ar); 2305 vmcs_write32(sf->ar_bytes, ar);
2306 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
2187} 2307}
2188 2308
2189static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 2309static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
2190{ 2310{
2191 u32 ar = vmcs_read32(GUEST_CS_AR_BYTES); 2311 u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
2192 2312
2193 *db = (ar >> 14) & 1; 2313 *db = (ar >> 14) & 1;
2194 *l = (ar >> 13) & 1; 2314 *l = (ar >> 13) & 1;
@@ -2775,6 +2895,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2775 if (ret != 0) 2895 if (ret != 0)
2776 goto out; 2896 goto out;
2777 2897
2898 vmx_segment_cache_clear(vmx);
2899
2778 seg_setup(VCPU_SREG_CS); 2900 seg_setup(VCPU_SREG_CS);
2779 /* 2901 /*
2780 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode 2902 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
@@ -2904,7 +3026,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
2904 3026
2905 ++vcpu->stat.irq_injections; 3027 ++vcpu->stat.irq_injections;
2906 if (vmx->rmode.vm86_active) { 3028 if (vmx->rmode.vm86_active) {
2907 if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE) 3029 int inc_eip = 0;
3030 if (vcpu->arch.interrupt.soft)
3031 inc_eip = vcpu->arch.event_exit_inst_len;
3032 if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
2908 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); 3033 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2909 return; 3034 return;
2910 } 3035 }
@@ -2937,8 +3062,9 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
2937 } 3062 }
2938 3063
2939 ++vcpu->stat.nmi_injections; 3064 ++vcpu->stat.nmi_injections;
3065 vmx->nmi_known_unmasked = false;
2940 if (vmx->rmode.vm86_active) { 3066 if (vmx->rmode.vm86_active) {
2941 if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE) 3067 if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
2942 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); 3068 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2943 return; 3069 return;
2944 } 3070 }
@@ -2961,6 +3087,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
2961{ 3087{
2962 if (!cpu_has_virtual_nmis()) 3088 if (!cpu_has_virtual_nmis())
2963 return to_vmx(vcpu)->soft_vnmi_blocked; 3089 return to_vmx(vcpu)->soft_vnmi_blocked;
3090 if (to_vmx(vcpu)->nmi_known_unmasked)
3091 return false;
2964 return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; 3092 return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
2965} 3093}
2966 3094
@@ -2974,6 +3102,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
2974 vmx->vnmi_blocked_time = 0; 3102 vmx->vnmi_blocked_time = 0;
2975 } 3103 }
2976 } else { 3104 } else {
3105 vmx->nmi_known_unmasked = !masked;
2977 if (masked) 3106 if (masked)
2978 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 3107 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
2979 GUEST_INTR_STATE_NMI); 3108 GUEST_INTR_STATE_NMI);
@@ -3091,7 +3220,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
3091 enum emulation_result er; 3220 enum emulation_result er;
3092 3221
3093 vect_info = vmx->idt_vectoring_info; 3222 vect_info = vmx->idt_vectoring_info;
3094 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 3223 intr_info = vmx->exit_intr_info;
3095 3224
3096 if (is_machine_check(intr_info)) 3225 if (is_machine_check(intr_info))
3097 return handle_machine_check(vcpu); 3226 return handle_machine_check(vcpu);
@@ -3122,7 +3251,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
3122 } 3251 }
3123 3252
3124 error_code = 0; 3253 error_code = 0;
3125 rip = kvm_rip_read(vcpu);
3126 if (intr_info & INTR_INFO_DELIVER_CODE_MASK) 3254 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
3127 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 3255 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
3128 if (is_page_fault(intr_info)) { 3256 if (is_page_fault(intr_info)) {
@@ -3169,6 +3297,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
3169 vmx->vcpu.arch.event_exit_inst_len = 3297 vmx->vcpu.arch.event_exit_inst_len =
3170 vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 3298 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
3171 kvm_run->exit_reason = KVM_EXIT_DEBUG; 3299 kvm_run->exit_reason = KVM_EXIT_DEBUG;
3300 rip = kvm_rip_read(vcpu);
3172 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; 3301 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
3173 kvm_run->debug.arch.exception = ex_no; 3302 kvm_run->debug.arch.exception = ex_no;
3174 break; 3303 break;
@@ -3505,9 +3634,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
3505 switch (type) { 3634 switch (type) {
3506 case INTR_TYPE_NMI_INTR: 3635 case INTR_TYPE_NMI_INTR:
3507 vcpu->arch.nmi_injected = false; 3636 vcpu->arch.nmi_injected = false;
3508 if (cpu_has_virtual_nmis()) 3637 vmx_set_nmi_mask(vcpu, true);
3509 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
3510 GUEST_INTR_STATE_NMI);
3511 break; 3638 break;
3512 case INTR_TYPE_EXT_INTR: 3639 case INTR_TYPE_EXT_INTR:
3513 case INTR_TYPE_SOFT_INTR: 3640 case INTR_TYPE_SOFT_INTR:
@@ -3867,12 +3994,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3867 3994
3868static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) 3995static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
3869{ 3996{
3870 u32 exit_intr_info = vmx->exit_intr_info; 3997 u32 exit_intr_info;
3998
3999 if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
4000 || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI))
4001 return;
4002
4003 vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
4004 exit_intr_info = vmx->exit_intr_info;
3871 4005
3872 /* Handle machine checks before interrupts are enabled */ 4006 /* Handle machine checks before interrupts are enabled */
3873 if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) 4007 if (is_machine_check(exit_intr_info))
3874 || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI
3875 && is_machine_check(exit_intr_info)))
3876 kvm_machine_check(); 4008 kvm_machine_check();
3877 4009
3878 /* We need to handle NMIs before interrupts are enabled */ 4010 /* We need to handle NMIs before interrupts are enabled */
@@ -3886,7 +4018,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
3886 4018
3887static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) 4019static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
3888{ 4020{
3889 u32 exit_intr_info = vmx->exit_intr_info; 4021 u32 exit_intr_info;
3890 bool unblock_nmi; 4022 bool unblock_nmi;
3891 u8 vector; 4023 u8 vector;
3892 bool idtv_info_valid; 4024 bool idtv_info_valid;
@@ -3894,6 +4026,13 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
3894 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; 4026 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
3895 4027
3896 if (cpu_has_virtual_nmis()) { 4028 if (cpu_has_virtual_nmis()) {
4029 if (vmx->nmi_known_unmasked)
4030 return;
4031 /*
4032 * Can't use vmx->exit_intr_info since we're not sure what
4033 * the exit reason is.
4034 */
4035 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
3897 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; 4036 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
3898 vector = exit_intr_info & INTR_INFO_VECTOR_MASK; 4037 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
3899 /* 4038 /*
@@ -3910,6 +4049,10 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
3910 vector != DF_VECTOR && !idtv_info_valid) 4049 vector != DF_VECTOR && !idtv_info_valid)
3911 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 4050 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
3912 GUEST_INTR_STATE_NMI); 4051 GUEST_INTR_STATE_NMI);
4052 else
4053 vmx->nmi_known_unmasked =
4054 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
4055 & GUEST_INTR_STATE_NMI);
3913 } else if (unlikely(vmx->soft_vnmi_blocked)) 4056 } else if (unlikely(vmx->soft_vnmi_blocked))
3914 vmx->vnmi_blocked_time += 4057 vmx->vnmi_blocked_time +=
3915 ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); 4058 ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
@@ -3946,8 +4089,7 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
3946 * Clear bit "block by NMI" before VM entry if a NMI 4089 * Clear bit "block by NMI" before VM entry if a NMI
3947 * delivery faulted. 4090 * delivery faulted.
3948 */ 4091 */
3949 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, 4092 vmx_set_nmi_mask(&vmx->vcpu, false);
3950 GUEST_INTR_STATE_NMI);
3951 break; 4093 break;
3952 case INTR_TYPE_SOFT_EXCEPTION: 4094 case INTR_TYPE_SOFT_EXCEPTION:
3953 vmx->vcpu.arch.event_exit_inst_len = 4095 vmx->vcpu.arch.event_exit_inst_len =
@@ -4124,7 +4266,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
4124 ); 4266 );
4125 4267
4126 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) 4268 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
4269 | (1 << VCPU_EXREG_RFLAGS)
4270 | (1 << VCPU_EXREG_CPL)
4127 | (1 << VCPU_EXREG_PDPTR) 4271 | (1 << VCPU_EXREG_PDPTR)
4272 | (1 << VCPU_EXREG_SEGMENTS)
4128 | (1 << VCPU_EXREG_CR3)); 4273 | (1 << VCPU_EXREG_CR3));
4129 vcpu->arch.regs_dirty = 0; 4274 vcpu->arch.regs_dirty = 0;
4130 4275
@@ -4134,7 +4279,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
4134 vmx->launched = 1; 4279 vmx->launched = 1;
4135 4280
4136 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); 4281 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
4137 vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
4138 4282
4139 vmx_complete_atomic_exit(vmx); 4283 vmx_complete_atomic_exit(vmx);
4140 vmx_recover_nmi_blocking(vmx); 4284 vmx_recover_nmi_blocking(vmx);
@@ -4195,8 +4339,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
4195 goto free_vcpu; 4339 goto free_vcpu;
4196 4340
4197 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); 4341 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
4342 err = -ENOMEM;
4198 if (!vmx->guest_msrs) { 4343 if (!vmx->guest_msrs) {
4199 err = -ENOMEM;
4200 goto uninit_vcpu; 4344 goto uninit_vcpu;
4201 } 4345 }
4202 4346
@@ -4215,7 +4359,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
4215 if (err) 4359 if (err)
4216 goto free_vmcs; 4360 goto free_vmcs;
4217 if (vm_need_virtualize_apic_accesses(kvm)) 4361 if (vm_need_virtualize_apic_accesses(kvm))
4218 if (alloc_apic_access_page(kvm) != 0) 4362 err = alloc_apic_access_page(kvm);
4363 if (err)
4219 goto free_vmcs; 4364 goto free_vmcs;
4220 4365
4221 if (enable_ept) { 4366 if (enable_ept) {
@@ -4368,6 +4513,13 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
4368{ 4513{
4369} 4514}
4370 4515
4516static int vmx_check_intercept(struct kvm_vcpu *vcpu,
4517 struct x86_instruction_info *info,
4518 enum x86_intercept_stage stage)
4519{
4520 return X86EMUL_CONTINUE;
4521}
4522
4371static struct kvm_x86_ops vmx_x86_ops = { 4523static struct kvm_x86_ops vmx_x86_ops = {
4372 .cpu_has_kvm_support = cpu_has_kvm_support, 4524 .cpu_has_kvm_support = cpu_has_kvm_support,
4373 .disabled_by_bios = vmx_disabled_by_bios, 4525 .disabled_by_bios = vmx_disabled_by_bios,
@@ -4449,10 +4601,14 @@ static struct kvm_x86_ops vmx_x86_ops = {
4449 4601
4450 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, 4602 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
4451 4603
4604 .set_tsc_khz = vmx_set_tsc_khz,
4452 .write_tsc_offset = vmx_write_tsc_offset, 4605 .write_tsc_offset = vmx_write_tsc_offset,
4453 .adjust_tsc_offset = vmx_adjust_tsc_offset, 4606 .adjust_tsc_offset = vmx_adjust_tsc_offset,
4607 .compute_tsc_offset = vmx_compute_tsc_offset,
4454 4608
4455 .set_tdp_cr3 = vmx_set_cr3, 4609 .set_tdp_cr3 = vmx_set_cr3,
4610
4611 .check_intercept = vmx_check_intercept,
4456}; 4612};
4457 4613
4458static int __init vmx_init(void) 4614static int __init vmx_init(void)