aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 20:43:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 20:43:43 -0400
commit69def9f05dfce3281bb06599057e6b8097385d39 (patch)
tree7d826b22924268ddbfad101993b248996d40e2ec /arch/x86/kvm/vmx.c
parent353f6dd2dec992ddd34620a94b051b0f76227379 (diff)
parent8e616fc8d343bd7f0f0a0c22407fdcb77f6d22b1 (diff)
Merge branch 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (202 commits) MAINTAINERS: update KVM entry KVM: correct error-handling code KVM: fix compile warnings on s390 KVM: VMX: Check cpl before emulating debug register access KVM: fix misreporting of coalesced interrupts by kvm tracer KVM: x86: drop duplicate kvm_flush_remote_tlb calls KVM: VMX: call vmx_load_host_state() only if msr is cached KVM: VMX: Conditionally reload debug register 6 KVM: Use thread debug register storage instead of kvm specific data KVM guest: do not batch pte updates from interrupt context KVM: Fix coalesced interrupt reporting in IOAPIC KVM guest: fix bogus wallclock physical address calculation KVM: VMX: Fix cr8 exiting control clobbering by EPT KVM: Optimize kvm_mmu_unprotect_page_virt() for tdp KVM: Document KVM_CAP_IRQCHIP KVM: Protect update_cr8_intercept() when running without an apic KVM: VMX: Fix EPT with WP bit change during paging KVM: Use kvm_{read,write}_guest_virt() to read and write segment descriptors KVM: x86 emulator: Add adc and sbb missing decoder flags KVM: Add missing #include ...
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c497
1 files changed, 361 insertions, 136 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 29f912927a58..f3812014bd0b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -25,6 +25,7 @@
25#include <linux/highmem.h> 25#include <linux/highmem.h>
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <linux/moduleparam.h> 27#include <linux/moduleparam.h>
28#include <linux/ftrace_event.h>
28#include "kvm_cache_regs.h" 29#include "kvm_cache_regs.h"
29#include "x86.h" 30#include "x86.h"
30 31
@@ -34,6 +35,8 @@
34#include <asm/virtext.h> 35#include <asm/virtext.h>
35#include <asm/mce.h> 36#include <asm/mce.h>
36 37
38#include "trace.h"
39
37#define __ex(x) __kvm_handle_fault_on_reboot(x) 40#define __ex(x) __kvm_handle_fault_on_reboot(x)
38 41
39MODULE_AUTHOR("Qumranet"); 42MODULE_AUTHOR("Qumranet");
@@ -51,6 +54,10 @@ module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
51static int __read_mostly enable_ept = 1; 54static int __read_mostly enable_ept = 1;
52module_param_named(ept, enable_ept, bool, S_IRUGO); 55module_param_named(ept, enable_ept, bool, S_IRUGO);
53 56
57static int __read_mostly enable_unrestricted_guest = 1;
58module_param_named(unrestricted_guest,
59 enable_unrestricted_guest, bool, S_IRUGO);
60
54static int __read_mostly emulate_invalid_guest_state = 0; 61static int __read_mostly emulate_invalid_guest_state = 0;
55module_param(emulate_invalid_guest_state, bool, S_IRUGO); 62module_param(emulate_invalid_guest_state, bool, S_IRUGO);
56 63
@@ -84,6 +91,14 @@ struct vcpu_vmx {
84 int guest_efer_loaded; 91 int guest_efer_loaded;
85 } host_state; 92 } host_state;
86 struct { 93 struct {
94 int vm86_active;
95 u8 save_iopl;
96 struct kvm_save_segment {
97 u16 selector;
98 unsigned long base;
99 u32 limit;
100 u32 ar;
101 } tr, es, ds, fs, gs;
87 struct { 102 struct {
88 bool pending; 103 bool pending;
89 u8 vector; 104 u8 vector;
@@ -161,6 +176,8 @@ static struct kvm_vmx_segment_field {
161 VMX_SEGMENT_FIELD(LDTR), 176 VMX_SEGMENT_FIELD(LDTR),
162}; 177};
163 178
179static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
180
164/* 181/*
165 * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it 182 * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it
166 * away by decrementing the array size. 183 * away by decrementing the array size.
@@ -256,6 +273,26 @@ static inline bool cpu_has_vmx_flexpriority(void)
256 cpu_has_vmx_virtualize_apic_accesses(); 273 cpu_has_vmx_virtualize_apic_accesses();
257} 274}
258 275
276static inline bool cpu_has_vmx_ept_execute_only(void)
277{
278 return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT);
279}
280
281static inline bool cpu_has_vmx_eptp_uncacheable(void)
282{
283 return !!(vmx_capability.ept & VMX_EPTP_UC_BIT);
284}
285
286static inline bool cpu_has_vmx_eptp_writeback(void)
287{
288 return !!(vmx_capability.ept & VMX_EPTP_WB_BIT);
289}
290
291static inline bool cpu_has_vmx_ept_2m_page(void)
292{
293 return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT);
294}
295
259static inline int cpu_has_vmx_invept_individual_addr(void) 296static inline int cpu_has_vmx_invept_individual_addr(void)
260{ 297{
261 return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); 298 return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT);
@@ -277,6 +314,12 @@ static inline int cpu_has_vmx_ept(void)
277 SECONDARY_EXEC_ENABLE_EPT; 314 SECONDARY_EXEC_ENABLE_EPT;
278} 315}
279 316
317static inline int cpu_has_vmx_unrestricted_guest(void)
318{
319 return vmcs_config.cpu_based_2nd_exec_ctrl &
320 SECONDARY_EXEC_UNRESTRICTED_GUEST;
321}
322
280static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) 323static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
281{ 324{
282 return flexpriority_enabled && 325 return flexpriority_enabled &&
@@ -497,14 +540,16 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
497 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR); 540 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR);
498 if (!vcpu->fpu_active) 541 if (!vcpu->fpu_active)
499 eb |= 1u << NM_VECTOR; 542 eb |= 1u << NM_VECTOR;
543 /*
544 * Unconditionally intercept #DB so we can maintain dr6 without
545 * reading it every exit.
546 */
547 eb |= 1u << DB_VECTOR;
500 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { 548 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
501 if (vcpu->guest_debug &
502 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
503 eb |= 1u << DB_VECTOR;
504 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 549 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
505 eb |= 1u << BP_VECTOR; 550 eb |= 1u << BP_VECTOR;
506 } 551 }
507 if (vcpu->arch.rmode.vm86_active) 552 if (to_vmx(vcpu)->rmode.vm86_active)
508 eb = ~0; 553 eb = ~0;
509 if (enable_ept) 554 if (enable_ept)
510 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ 555 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
@@ -528,12 +573,15 @@ static void reload_tss(void)
528static void load_transition_efer(struct vcpu_vmx *vmx) 573static void load_transition_efer(struct vcpu_vmx *vmx)
529{ 574{
530 int efer_offset = vmx->msr_offset_efer; 575 int efer_offset = vmx->msr_offset_efer;
531 u64 host_efer = vmx->host_msrs[efer_offset].data; 576 u64 host_efer;
532 u64 guest_efer = vmx->guest_msrs[efer_offset].data; 577 u64 guest_efer;
533 u64 ignore_bits; 578 u64 ignore_bits;
534 579
535 if (efer_offset < 0) 580 if (efer_offset < 0)
536 return; 581 return;
582 host_efer = vmx->host_msrs[efer_offset].data;
583 guest_efer = vmx->guest_msrs[efer_offset].data;
584
537 /* 585 /*
538 * NX is emulated; LMA and LME handled by hardware; SCE meaninless 586 * NX is emulated; LMA and LME handled by hardware; SCE meaninless
539 * outside long mode 587 * outside long mode
@@ -735,12 +783,17 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
735 783
736static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 784static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
737{ 785{
738 return vmcs_readl(GUEST_RFLAGS); 786 unsigned long rflags;
787
788 rflags = vmcs_readl(GUEST_RFLAGS);
789 if (to_vmx(vcpu)->rmode.vm86_active)
790 rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
791 return rflags;
739} 792}
740 793
741static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 794static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
742{ 795{
743 if (vcpu->arch.rmode.vm86_active) 796 if (to_vmx(vcpu)->rmode.vm86_active)
744 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 797 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
745 vmcs_writel(GUEST_RFLAGS, rflags); 798 vmcs_writel(GUEST_RFLAGS, rflags);
746} 799}
@@ -797,12 +850,13 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
797 intr_info |= INTR_INFO_DELIVER_CODE_MASK; 850 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
798 } 851 }
799 852
800 if (vcpu->arch.rmode.vm86_active) { 853 if (vmx->rmode.vm86_active) {
801 vmx->rmode.irq.pending = true; 854 vmx->rmode.irq.pending = true;
802 vmx->rmode.irq.vector = nr; 855 vmx->rmode.irq.vector = nr;
803 vmx->rmode.irq.rip = kvm_rip_read(vcpu); 856 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
804 if (nr == BP_VECTOR || nr == OF_VECTOR) 857 if (kvm_exception_is_soft(nr))
805 vmx->rmode.irq.rip++; 858 vmx->rmode.irq.rip +=
859 vmx->vcpu.arch.event_exit_inst_len;
806 intr_info |= INTR_TYPE_SOFT_INTR; 860 intr_info |= INTR_TYPE_SOFT_INTR;
807 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); 861 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
808 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); 862 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
@@ -940,7 +994,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
940 case MSR_EFER: 994 case MSR_EFER:
941 return kvm_get_msr_common(vcpu, msr_index, pdata); 995 return kvm_get_msr_common(vcpu, msr_index, pdata);
942#endif 996#endif
943 case MSR_IA32_TIME_STAMP_COUNTER: 997 case MSR_IA32_TSC:
944 data = guest_read_tsc(); 998 data = guest_read_tsc();
945 break; 999 break;
946 case MSR_IA32_SYSENTER_CS: 1000 case MSR_IA32_SYSENTER_CS:
@@ -953,9 +1007,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
953 data = vmcs_readl(GUEST_SYSENTER_ESP); 1007 data = vmcs_readl(GUEST_SYSENTER_ESP);
954 break; 1008 break;
955 default: 1009 default:
956 vmx_load_host_state(to_vmx(vcpu));
957 msr = find_msr_entry(to_vmx(vcpu), msr_index); 1010 msr = find_msr_entry(to_vmx(vcpu), msr_index);
958 if (msr) { 1011 if (msr) {
1012 vmx_load_host_state(to_vmx(vcpu));
959 data = msr->data; 1013 data = msr->data;
960 break; 1014 break;
961 } 1015 }
@@ -1000,22 +1054,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1000 case MSR_IA32_SYSENTER_ESP: 1054 case MSR_IA32_SYSENTER_ESP:
1001 vmcs_writel(GUEST_SYSENTER_ESP, data); 1055 vmcs_writel(GUEST_SYSENTER_ESP, data);
1002 break; 1056 break;
1003 case MSR_IA32_TIME_STAMP_COUNTER: 1057 case MSR_IA32_TSC:
1004 rdtscll(host_tsc); 1058 rdtscll(host_tsc);
1005 guest_write_tsc(data, host_tsc); 1059 guest_write_tsc(data, host_tsc);
1006 break; 1060 break;
1007 case MSR_P6_PERFCTR0:
1008 case MSR_P6_PERFCTR1:
1009 case MSR_P6_EVNTSEL0:
1010 case MSR_P6_EVNTSEL1:
1011 /*
1012 * Just discard all writes to the performance counters; this
1013 * should keep both older linux and windows 64-bit guests
1014 * happy
1015 */
1016 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
1017
1018 break;
1019 case MSR_IA32_CR_PAT: 1061 case MSR_IA32_CR_PAT:
1020 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { 1062 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
1021 vmcs_write64(GUEST_IA32_PAT, data); 1063 vmcs_write64(GUEST_IA32_PAT, data);
@@ -1024,9 +1066,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1024 } 1066 }
1025 /* Otherwise falls through to kvm_set_msr_common */ 1067 /* Otherwise falls through to kvm_set_msr_common */
1026 default: 1068 default:
1027 vmx_load_host_state(vmx);
1028 msr = find_msr_entry(vmx, msr_index); 1069 msr = find_msr_entry(vmx, msr_index);
1029 if (msr) { 1070 if (msr) {
1071 vmx_load_host_state(vmx);
1030 msr->data = data; 1072 msr->data = data;
1031 break; 1073 break;
1032 } 1074 }
@@ -1046,6 +1088,10 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
1046 case VCPU_REGS_RIP: 1088 case VCPU_REGS_RIP:
1047 vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP); 1089 vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
1048 break; 1090 break;
1091 case VCPU_EXREG_PDPTR:
1092 if (enable_ept)
1093 ept_save_pdptrs(vcpu);
1094 break;
1049 default: 1095 default:
1050 break; 1096 break;
1051 } 1097 }
@@ -1203,7 +1249,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1203 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 1249 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
1204 SECONDARY_EXEC_WBINVD_EXITING | 1250 SECONDARY_EXEC_WBINVD_EXITING |
1205 SECONDARY_EXEC_ENABLE_VPID | 1251 SECONDARY_EXEC_ENABLE_VPID |
1206 SECONDARY_EXEC_ENABLE_EPT; 1252 SECONDARY_EXEC_ENABLE_EPT |
1253 SECONDARY_EXEC_UNRESTRICTED_GUEST;
1207 if (adjust_vmx_controls(min2, opt2, 1254 if (adjust_vmx_controls(min2, opt2,
1208 MSR_IA32_VMX_PROCBASED_CTLS2, 1255 MSR_IA32_VMX_PROCBASED_CTLS2,
1209 &_cpu_based_2nd_exec_control) < 0) 1256 &_cpu_based_2nd_exec_control) < 0)
@@ -1217,12 +1264,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1217 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { 1264 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
1218 /* CR3 accesses and invlpg don't need to cause VM Exits when EPT 1265 /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
1219 enabled */ 1266 enabled */
1220 min &= ~(CPU_BASED_CR3_LOAD_EXITING | 1267 _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
1221 CPU_BASED_CR3_STORE_EXITING | 1268 CPU_BASED_CR3_STORE_EXITING |
1222 CPU_BASED_INVLPG_EXITING); 1269 CPU_BASED_INVLPG_EXITING);
1223 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
1224 &_cpu_based_exec_control) < 0)
1225 return -EIO;
1226 rdmsr(MSR_IA32_VMX_EPT_VPID_CAP, 1270 rdmsr(MSR_IA32_VMX_EPT_VPID_CAP,
1227 vmx_capability.ept, vmx_capability.vpid); 1271 vmx_capability.ept, vmx_capability.vpid);
1228 } 1272 }
@@ -1333,8 +1377,13 @@ static __init int hardware_setup(void)
1333 if (!cpu_has_vmx_vpid()) 1377 if (!cpu_has_vmx_vpid())
1334 enable_vpid = 0; 1378 enable_vpid = 0;
1335 1379
1336 if (!cpu_has_vmx_ept()) 1380 if (!cpu_has_vmx_ept()) {
1337 enable_ept = 0; 1381 enable_ept = 0;
1382 enable_unrestricted_guest = 0;
1383 }
1384
1385 if (!cpu_has_vmx_unrestricted_guest())
1386 enable_unrestricted_guest = 0;
1338 1387
1339 if (!cpu_has_vmx_flexpriority()) 1388 if (!cpu_has_vmx_flexpriority())
1340 flexpriority_enabled = 0; 1389 flexpriority_enabled = 0;
@@ -1342,6 +1391,9 @@ static __init int hardware_setup(void)
1342 if (!cpu_has_vmx_tpr_shadow()) 1391 if (!cpu_has_vmx_tpr_shadow())
1343 kvm_x86_ops->update_cr8_intercept = NULL; 1392 kvm_x86_ops->update_cr8_intercept = NULL;
1344 1393
1394 if (enable_ept && !cpu_has_vmx_ept_2m_page())
1395 kvm_disable_largepages();
1396
1345 return alloc_kvm_area(); 1397 return alloc_kvm_area();
1346} 1398}
1347 1399
@@ -1372,15 +1424,15 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1372 struct vcpu_vmx *vmx = to_vmx(vcpu); 1424 struct vcpu_vmx *vmx = to_vmx(vcpu);
1373 1425
1374 vmx->emulation_required = 1; 1426 vmx->emulation_required = 1;
1375 vcpu->arch.rmode.vm86_active = 0; 1427 vmx->rmode.vm86_active = 0;
1376 1428
1377 vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base); 1429 vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base);
1378 vmcs_write32(GUEST_TR_LIMIT, vcpu->arch.rmode.tr.limit); 1430 vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit);
1379 vmcs_write32(GUEST_TR_AR_BYTES, vcpu->arch.rmode.tr.ar); 1431 vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
1380 1432
1381 flags = vmcs_readl(GUEST_RFLAGS); 1433 flags = vmcs_readl(GUEST_RFLAGS);
1382 flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); 1434 flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
1383 flags |= (vcpu->arch.rmode.save_iopl << IOPL_SHIFT); 1435 flags |= (vmx->rmode.save_iopl << IOPL_SHIFT);
1384 vmcs_writel(GUEST_RFLAGS, flags); 1436 vmcs_writel(GUEST_RFLAGS, flags);
1385 1437
1386 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | 1438 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
@@ -1391,10 +1443,10 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1391 if (emulate_invalid_guest_state) 1443 if (emulate_invalid_guest_state)
1392 return; 1444 return;
1393 1445
1394 fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es); 1446 fix_pmode_dataseg(VCPU_SREG_ES, &vmx->rmode.es);
1395 fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds); 1447 fix_pmode_dataseg(VCPU_SREG_DS, &vmx->rmode.ds);
1396 fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); 1448 fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs);
1397 fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); 1449 fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs);
1398 1450
1399 vmcs_write16(GUEST_SS_SELECTOR, 0); 1451 vmcs_write16(GUEST_SS_SELECTOR, 0);
1400 vmcs_write32(GUEST_SS_AR_BYTES, 0x93); 1452 vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
@@ -1433,20 +1485,23 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1433 unsigned long flags; 1485 unsigned long flags;
1434 struct vcpu_vmx *vmx = to_vmx(vcpu); 1486 struct vcpu_vmx *vmx = to_vmx(vcpu);
1435 1487
1488 if (enable_unrestricted_guest)
1489 return;
1490
1436 vmx->emulation_required = 1; 1491 vmx->emulation_required = 1;
1437 vcpu->arch.rmode.vm86_active = 1; 1492 vmx->rmode.vm86_active = 1;
1438 1493
1439 vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE); 1494 vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
1440 vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); 1495 vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
1441 1496
1442 vcpu->arch.rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT); 1497 vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
1443 vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); 1498 vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
1444 1499
1445 vcpu->arch.rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES); 1500 vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
1446 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); 1501 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
1447 1502
1448 flags = vmcs_readl(GUEST_RFLAGS); 1503 flags = vmcs_readl(GUEST_RFLAGS);
1449 vcpu->arch.rmode.save_iopl 1504 vmx->rmode.save_iopl
1450 = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; 1505 = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1451 1506
1452 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 1507 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
@@ -1468,10 +1523,10 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1468 vmcs_writel(GUEST_CS_BASE, 0xf0000); 1523 vmcs_writel(GUEST_CS_BASE, 0xf0000);
1469 vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); 1524 vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
1470 1525
1471 fix_rmode_seg(VCPU_SREG_ES, &vcpu->arch.rmode.es); 1526 fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es);
1472 fix_rmode_seg(VCPU_SREG_DS, &vcpu->arch.rmode.ds); 1527 fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds);
1473 fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); 1528 fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs);
1474 fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); 1529 fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs);
1475 1530
1476continue_rmode: 1531continue_rmode:
1477 kvm_mmu_reset_context(vcpu); 1532 kvm_mmu_reset_context(vcpu);
@@ -1545,11 +1600,11 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1545 1600
1546static void ept_load_pdptrs(struct kvm_vcpu *vcpu) 1601static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
1547{ 1602{
1603 if (!test_bit(VCPU_EXREG_PDPTR,
1604 (unsigned long *)&vcpu->arch.regs_dirty))
1605 return;
1606
1548 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { 1607 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
1549 if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
1550 printk(KERN_ERR "EPT: Fail to load pdptrs!\n");
1551 return;
1552 }
1553 vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]); 1608 vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]);
1554 vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]); 1609 vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]);
1555 vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]); 1610 vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]);
@@ -1557,6 +1612,21 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
1557 } 1612 }
1558} 1613}
1559 1614
1615static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
1616{
1617 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
1618 vcpu->arch.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
1619 vcpu->arch.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
1620 vcpu->arch.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
1621 vcpu->arch.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
1622 }
1623
1624 __set_bit(VCPU_EXREG_PDPTR,
1625 (unsigned long *)&vcpu->arch.regs_avail);
1626 __set_bit(VCPU_EXREG_PDPTR,
1627 (unsigned long *)&vcpu->arch.regs_dirty);
1628}
1629
1560static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); 1630static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
1561 1631
1562static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, 1632static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
@@ -1571,8 +1641,6 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1571 CPU_BASED_CR3_STORE_EXITING)); 1641 CPU_BASED_CR3_STORE_EXITING));
1572 vcpu->arch.cr0 = cr0; 1642 vcpu->arch.cr0 = cr0;
1573 vmx_set_cr4(vcpu, vcpu->arch.cr4); 1643 vmx_set_cr4(vcpu, vcpu->arch.cr4);
1574 *hw_cr0 |= X86_CR0_PE | X86_CR0_PG;
1575 *hw_cr0 &= ~X86_CR0_WP;
1576 } else if (!is_paging(vcpu)) { 1644 } else if (!is_paging(vcpu)) {
1577 /* From nonpaging to paging */ 1645 /* From nonpaging to paging */
1578 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 1646 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
@@ -1581,9 +1649,10 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1581 CPU_BASED_CR3_STORE_EXITING)); 1649 CPU_BASED_CR3_STORE_EXITING));
1582 vcpu->arch.cr0 = cr0; 1650 vcpu->arch.cr0 = cr0;
1583 vmx_set_cr4(vcpu, vcpu->arch.cr4); 1651 vmx_set_cr4(vcpu, vcpu->arch.cr4);
1584 if (!(vcpu->arch.cr0 & X86_CR0_WP))
1585 *hw_cr0 &= ~X86_CR0_WP;
1586 } 1652 }
1653
1654 if (!(cr0 & X86_CR0_WP))
1655 *hw_cr0 &= ~X86_CR0_WP;
1587} 1656}
1588 1657
1589static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, 1658static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
@@ -1598,15 +1667,21 @@ static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
1598 1667
1599static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1668static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1600{ 1669{
1601 unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | 1670 struct vcpu_vmx *vmx = to_vmx(vcpu);
1602 KVM_VM_CR0_ALWAYS_ON; 1671 unsigned long hw_cr0;
1672
1673 if (enable_unrestricted_guest)
1674 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST)
1675 | KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
1676 else
1677 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
1603 1678
1604 vmx_fpu_deactivate(vcpu); 1679 vmx_fpu_deactivate(vcpu);
1605 1680
1606 if (vcpu->arch.rmode.vm86_active && (cr0 & X86_CR0_PE)) 1681 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
1607 enter_pmode(vcpu); 1682 enter_pmode(vcpu);
1608 1683
1609 if (!vcpu->arch.rmode.vm86_active && !(cr0 & X86_CR0_PE)) 1684 if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
1610 enter_rmode(vcpu); 1685 enter_rmode(vcpu);
1611 1686
1612#ifdef CONFIG_X86_64 1687#ifdef CONFIG_X86_64
@@ -1650,10 +1725,8 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1650 if (enable_ept) { 1725 if (enable_ept) {
1651 eptp = construct_eptp(cr3); 1726 eptp = construct_eptp(cr3);
1652 vmcs_write64(EPT_POINTER, eptp); 1727 vmcs_write64(EPT_POINTER, eptp);
1653 ept_sync_context(eptp);
1654 ept_load_pdptrs(vcpu);
1655 guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : 1728 guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
1656 VMX_EPT_IDENTITY_PAGETABLE_ADDR; 1729 vcpu->kvm->arch.ept_identity_map_addr;
1657 } 1730 }
1658 1731
1659 vmx_flush_tlb(vcpu); 1732 vmx_flush_tlb(vcpu);
@@ -1664,7 +1737,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1664 1737
1665static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1738static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1666{ 1739{
1667 unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.vm86_active ? 1740 unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ?
1668 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); 1741 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
1669 1742
1670 vcpu->arch.cr4 = cr4; 1743 vcpu->arch.cr4 = cr4;
@@ -1707,16 +1780,13 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
1707 1780
1708static int vmx_get_cpl(struct kvm_vcpu *vcpu) 1781static int vmx_get_cpl(struct kvm_vcpu *vcpu)
1709{ 1782{
1710 struct kvm_segment kvm_seg;
1711
1712 if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */ 1783 if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */
1713 return 0; 1784 return 0;
1714 1785
1715 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ 1786 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
1716 return 3; 1787 return 3;
1717 1788
1718 vmx_get_segment(vcpu, &kvm_seg, VCPU_SREG_CS); 1789 return vmcs_read16(GUEST_CS_SELECTOR) & 3;
1719 return kvm_seg.selector & 3;
1720} 1790}
1721 1791
1722static u32 vmx_segment_access_rights(struct kvm_segment *var) 1792static u32 vmx_segment_access_rights(struct kvm_segment *var)
@@ -1744,20 +1814,21 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var)
1744static void vmx_set_segment(struct kvm_vcpu *vcpu, 1814static void vmx_set_segment(struct kvm_vcpu *vcpu,
1745 struct kvm_segment *var, int seg) 1815 struct kvm_segment *var, int seg)
1746{ 1816{
1817 struct vcpu_vmx *vmx = to_vmx(vcpu);
1747 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 1818 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
1748 u32 ar; 1819 u32 ar;
1749 1820
1750 if (vcpu->arch.rmode.vm86_active && seg == VCPU_SREG_TR) { 1821 if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
1751 vcpu->arch.rmode.tr.selector = var->selector; 1822 vmx->rmode.tr.selector = var->selector;
1752 vcpu->arch.rmode.tr.base = var->base; 1823 vmx->rmode.tr.base = var->base;
1753 vcpu->arch.rmode.tr.limit = var->limit; 1824 vmx->rmode.tr.limit = var->limit;
1754 vcpu->arch.rmode.tr.ar = vmx_segment_access_rights(var); 1825 vmx->rmode.tr.ar = vmx_segment_access_rights(var);
1755 return; 1826 return;
1756 } 1827 }
1757 vmcs_writel(sf->base, var->base); 1828 vmcs_writel(sf->base, var->base);
1758 vmcs_write32(sf->limit, var->limit); 1829 vmcs_write32(sf->limit, var->limit);
1759 vmcs_write16(sf->selector, var->selector); 1830 vmcs_write16(sf->selector, var->selector);
1760 if (vcpu->arch.rmode.vm86_active && var->s) { 1831 if (vmx->rmode.vm86_active && var->s) {
1761 /* 1832 /*
1762 * Hack real-mode segments into vm86 compatibility. 1833 * Hack real-mode segments into vm86 compatibility.
1763 */ 1834 */
@@ -1766,6 +1837,21 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
1766 ar = 0xf3; 1837 ar = 0xf3;
1767 } else 1838 } else
1768 ar = vmx_segment_access_rights(var); 1839 ar = vmx_segment_access_rights(var);
1840
1841 /*
1842 * Fix the "Accessed" bit in AR field of segment registers for older
1843 * qemu binaries.
1844 * IA32 arch specifies that at the time of processor reset the
1845 * "Accessed" bit in the AR field of segment registers is 1. And qemu
1846 * is setting it to 0 in the usedland code. This causes invalid guest
1847 * state vmexit when "unrestricted guest" mode is turned on.
1848 * Fix for this setup issue in cpu_reset is being pushed in the qemu
1849 * tree. Newer qemu binaries with that qemu fix would not need this
1850 * kvm hack.
1851 */
1852 if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
1853 ar |= 0x1; /* Accessed */
1854
1769 vmcs_write32(sf->ar_bytes, ar); 1855 vmcs_write32(sf->ar_bytes, ar);
1770} 1856}
1771 1857
@@ -2040,7 +2126,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
2040 if (likely(kvm->arch.ept_identity_pagetable_done)) 2126 if (likely(kvm->arch.ept_identity_pagetable_done))
2041 return 1; 2127 return 1;
2042 ret = 0; 2128 ret = 0;
2043 identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT; 2129 identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
2044 r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); 2130 r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
2045 if (r < 0) 2131 if (r < 0)
2046 goto out; 2132 goto out;
@@ -2062,11 +2148,19 @@ out:
2062static void seg_setup(int seg) 2148static void seg_setup(int seg)
2063{ 2149{
2064 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2150 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2151 unsigned int ar;
2065 2152
2066 vmcs_write16(sf->selector, 0); 2153 vmcs_write16(sf->selector, 0);
2067 vmcs_writel(sf->base, 0); 2154 vmcs_writel(sf->base, 0);
2068 vmcs_write32(sf->limit, 0xffff); 2155 vmcs_write32(sf->limit, 0xffff);
2069 vmcs_write32(sf->ar_bytes, 0xf3); 2156 if (enable_unrestricted_guest) {
2157 ar = 0x93;
2158 if (seg == VCPU_SREG_CS)
2159 ar |= 0x08; /* code segment */
2160 } else
2161 ar = 0xf3;
2162
2163 vmcs_write32(sf->ar_bytes, ar);
2070} 2164}
2071 2165
2072static int alloc_apic_access_page(struct kvm *kvm) 2166static int alloc_apic_access_page(struct kvm *kvm)
@@ -2101,14 +2195,15 @@ static int alloc_identity_pagetable(struct kvm *kvm)
2101 goto out; 2195 goto out;
2102 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; 2196 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
2103 kvm_userspace_mem.flags = 0; 2197 kvm_userspace_mem.flags = 0;
2104 kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; 2198 kvm_userspace_mem.guest_phys_addr =
2199 kvm->arch.ept_identity_map_addr;
2105 kvm_userspace_mem.memory_size = PAGE_SIZE; 2200 kvm_userspace_mem.memory_size = PAGE_SIZE;
2106 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); 2201 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
2107 if (r) 2202 if (r)
2108 goto out; 2203 goto out;
2109 2204
2110 kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, 2205 kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
2111 VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT); 2206 kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
2112out: 2207out:
2113 up_write(&kvm->slots_lock); 2208 up_write(&kvm->slots_lock);
2114 return r; 2209 return r;
@@ -2209,6 +2304,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2209 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; 2304 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
2210 if (!enable_ept) 2305 if (!enable_ept)
2211 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; 2306 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
2307 if (!enable_unrestricted_guest)
2308 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
2212 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 2309 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
2213 } 2310 }
2214 2311
@@ -2326,14 +2423,14 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2326 goto out; 2423 goto out;
2327 } 2424 }
2328 2425
2329 vmx->vcpu.arch.rmode.vm86_active = 0; 2426 vmx->rmode.vm86_active = 0;
2330 2427
2331 vmx->soft_vnmi_blocked = 0; 2428 vmx->soft_vnmi_blocked = 0;
2332 2429
2333 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 2430 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
2334 kvm_set_cr8(&vmx->vcpu, 0); 2431 kvm_set_cr8(&vmx->vcpu, 0);
2335 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 2432 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
2336 if (vmx->vcpu.vcpu_id == 0) 2433 if (kvm_vcpu_is_bsp(&vmx->vcpu))
2337 msr |= MSR_IA32_APICBASE_BSP; 2434 msr |= MSR_IA32_APICBASE_BSP;
2338 kvm_set_apic_base(&vmx->vcpu, msr); 2435 kvm_set_apic_base(&vmx->vcpu, msr);
2339 2436
@@ -2344,7 +2441,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2344 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode 2441 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
2345 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. 2442 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
2346 */ 2443 */
2347 if (vmx->vcpu.vcpu_id == 0) { 2444 if (kvm_vcpu_is_bsp(&vmx->vcpu)) {
2348 vmcs_write16(GUEST_CS_SELECTOR, 0xf000); 2445 vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
2349 vmcs_writel(GUEST_CS_BASE, 0x000f0000); 2446 vmcs_writel(GUEST_CS_BASE, 0x000f0000);
2350 } else { 2447 } else {
@@ -2373,7 +2470,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2373 vmcs_writel(GUEST_SYSENTER_EIP, 0); 2470 vmcs_writel(GUEST_SYSENTER_EIP, 0);
2374 2471
2375 vmcs_writel(GUEST_RFLAGS, 0x02); 2472 vmcs_writel(GUEST_RFLAGS, 0x02);
2376 if (vmx->vcpu.vcpu_id == 0) 2473 if (kvm_vcpu_is_bsp(&vmx->vcpu))
2377 kvm_rip_write(vcpu, 0xfff0); 2474 kvm_rip_write(vcpu, 0xfff0);
2378 else 2475 else
2379 kvm_rip_write(vcpu, 0); 2476 kvm_rip_write(vcpu, 0);
@@ -2461,13 +2558,16 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
2461 uint32_t intr; 2558 uint32_t intr;
2462 int irq = vcpu->arch.interrupt.nr; 2559 int irq = vcpu->arch.interrupt.nr;
2463 2560
2464 KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); 2561 trace_kvm_inj_virq(irq);
2465 2562
2466 ++vcpu->stat.irq_injections; 2563 ++vcpu->stat.irq_injections;
2467 if (vcpu->arch.rmode.vm86_active) { 2564 if (vmx->rmode.vm86_active) {
2468 vmx->rmode.irq.pending = true; 2565 vmx->rmode.irq.pending = true;
2469 vmx->rmode.irq.vector = irq; 2566 vmx->rmode.irq.vector = irq;
2470 vmx->rmode.irq.rip = kvm_rip_read(vcpu); 2567 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
2568 if (vcpu->arch.interrupt.soft)
2569 vmx->rmode.irq.rip +=
2570 vmx->vcpu.arch.event_exit_inst_len;
2471 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 2571 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2472 irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK); 2572 irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK);
2473 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); 2573 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
@@ -2502,7 +2602,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
2502 } 2602 }
2503 2603
2504 ++vcpu->stat.nmi_injections; 2604 ++vcpu->stat.nmi_injections;
2505 if (vcpu->arch.rmode.vm86_active) { 2605 if (vmx->rmode.vm86_active) {
2506 vmx->rmode.irq.pending = true; 2606 vmx->rmode.irq.pending = true;
2507 vmx->rmode.irq.vector = NMI_VECTOR; 2607 vmx->rmode.irq.vector = NMI_VECTOR;
2508 vmx->rmode.irq.rip = kvm_rip_read(vcpu); 2608 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
@@ -2659,14 +2759,14 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2659 if (enable_ept) 2759 if (enable_ept)
2660 BUG(); 2760 BUG();
2661 cr2 = vmcs_readl(EXIT_QUALIFICATION); 2761 cr2 = vmcs_readl(EXIT_QUALIFICATION);
2662 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, 2762 trace_kvm_page_fault(cr2, error_code);
2663 (u32)((u64)cr2 >> 32), handler); 2763
2664 if (kvm_event_needs_reinjection(vcpu)) 2764 if (kvm_event_needs_reinjection(vcpu))
2665 kvm_mmu_unprotect_page_virt(vcpu, cr2); 2765 kvm_mmu_unprotect_page_virt(vcpu, cr2);
2666 return kvm_mmu_page_fault(vcpu, cr2, error_code); 2766 return kvm_mmu_page_fault(vcpu, cr2, error_code);
2667 } 2767 }
2668 2768
2669 if (vcpu->arch.rmode.vm86_active && 2769 if (vmx->rmode.vm86_active &&
2670 handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, 2770 handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
2671 error_code)) { 2771 error_code)) {
2672 if (vcpu->arch.halt_request) { 2772 if (vcpu->arch.halt_request) {
@@ -2707,7 +2807,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
2707 struct kvm_run *kvm_run) 2807 struct kvm_run *kvm_run)
2708{ 2808{
2709 ++vcpu->stat.irq_exits; 2809 ++vcpu->stat.irq_exits;
2710 KVMTRACE_1D(INTR, vcpu, vmcs_read32(VM_EXIT_INTR_INFO), handler);
2711 return 1; 2810 return 1;
2712} 2811}
2713 2812
@@ -2755,7 +2854,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
2755 2854
2756static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2855static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2757{ 2856{
2758 unsigned long exit_qualification; 2857 unsigned long exit_qualification, val;
2759 int cr; 2858 int cr;
2760 int reg; 2859 int reg;
2761 2860
@@ -2764,21 +2863,19 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2764 reg = (exit_qualification >> 8) & 15; 2863 reg = (exit_qualification >> 8) & 15;
2765 switch ((exit_qualification >> 4) & 3) { 2864 switch ((exit_qualification >> 4) & 3) {
2766 case 0: /* mov to cr */ 2865 case 0: /* mov to cr */
2767 KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, 2866 val = kvm_register_read(vcpu, reg);
2768 (u32)kvm_register_read(vcpu, reg), 2867 trace_kvm_cr_write(cr, val);
2769 (u32)((u64)kvm_register_read(vcpu, reg) >> 32),
2770 handler);
2771 switch (cr) { 2868 switch (cr) {
2772 case 0: 2869 case 0:
2773 kvm_set_cr0(vcpu, kvm_register_read(vcpu, reg)); 2870 kvm_set_cr0(vcpu, val);
2774 skip_emulated_instruction(vcpu); 2871 skip_emulated_instruction(vcpu);
2775 return 1; 2872 return 1;
2776 case 3: 2873 case 3:
2777 kvm_set_cr3(vcpu, kvm_register_read(vcpu, reg)); 2874 kvm_set_cr3(vcpu, val);
2778 skip_emulated_instruction(vcpu); 2875 skip_emulated_instruction(vcpu);
2779 return 1; 2876 return 1;
2780 case 4: 2877 case 4:
2781 kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg)); 2878 kvm_set_cr4(vcpu, val);
2782 skip_emulated_instruction(vcpu); 2879 skip_emulated_instruction(vcpu);
2783 return 1; 2880 return 1;
2784 case 8: { 2881 case 8: {
@@ -2800,23 +2897,19 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2800 vcpu->arch.cr0 &= ~X86_CR0_TS; 2897 vcpu->arch.cr0 &= ~X86_CR0_TS;
2801 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); 2898 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
2802 vmx_fpu_activate(vcpu); 2899 vmx_fpu_activate(vcpu);
2803 KVMTRACE_0D(CLTS, vcpu, handler);
2804 skip_emulated_instruction(vcpu); 2900 skip_emulated_instruction(vcpu);
2805 return 1; 2901 return 1;
2806 case 1: /*mov from cr*/ 2902 case 1: /*mov from cr*/
2807 switch (cr) { 2903 switch (cr) {
2808 case 3: 2904 case 3:
2809 kvm_register_write(vcpu, reg, vcpu->arch.cr3); 2905 kvm_register_write(vcpu, reg, vcpu->arch.cr3);
2810 KVMTRACE_3D(CR_READ, vcpu, (u32)cr, 2906 trace_kvm_cr_read(cr, vcpu->arch.cr3);
2811 (u32)kvm_register_read(vcpu, reg),
2812 (u32)((u64)kvm_register_read(vcpu, reg) >> 32),
2813 handler);
2814 skip_emulated_instruction(vcpu); 2907 skip_emulated_instruction(vcpu);
2815 return 1; 2908 return 1;
2816 case 8: 2909 case 8:
2817 kvm_register_write(vcpu, reg, kvm_get_cr8(vcpu)); 2910 val = kvm_get_cr8(vcpu);
2818 KVMTRACE_2D(CR_READ, vcpu, (u32)cr, 2911 kvm_register_write(vcpu, reg, val);
2819 (u32)kvm_register_read(vcpu, reg), handler); 2912 trace_kvm_cr_read(cr, val);
2820 skip_emulated_instruction(vcpu); 2913 skip_emulated_instruction(vcpu);
2821 return 1; 2914 return 1;
2822 } 2915 }
@@ -2841,6 +2934,8 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2841 unsigned long val; 2934 unsigned long val;
2842 int dr, reg; 2935 int dr, reg;
2843 2936
2937 if (!kvm_require_cpl(vcpu, 0))
2938 return 1;
2844 dr = vmcs_readl(GUEST_DR7); 2939 dr = vmcs_readl(GUEST_DR7);
2845 if (dr & DR7_GD) { 2940 if (dr & DR7_GD) {
2846 /* 2941 /*
@@ -2884,7 +2979,6 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2884 val = 0; 2979 val = 0;
2885 } 2980 }
2886 kvm_register_write(vcpu, reg, val); 2981 kvm_register_write(vcpu, reg, val);
2887 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
2888 } else { 2982 } else {
2889 val = vcpu->arch.regs[reg]; 2983 val = vcpu->arch.regs[reg];
2890 switch (dr) { 2984 switch (dr) {
@@ -2917,7 +3011,6 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2917 } 3011 }
2918 break; 3012 break;
2919 } 3013 }
2920 KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler);
2921 } 3014 }
2922 skip_emulated_instruction(vcpu); 3015 skip_emulated_instruction(vcpu);
2923 return 1; 3016 return 1;
@@ -2939,8 +3032,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2939 return 1; 3032 return 1;
2940 } 3033 }
2941 3034
2942 KVMTRACE_3D(MSR_READ, vcpu, ecx, (u32)data, (u32)(data >> 32), 3035 trace_kvm_msr_read(ecx, data);
2943 handler);
2944 3036
2945 /* FIXME: handling of bits 32:63 of rax, rdx */ 3037 /* FIXME: handling of bits 32:63 of rax, rdx */
2946 vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u; 3038 vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
@@ -2955,8 +3047,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2955 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) 3047 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
2956 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); 3048 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
2957 3049
2958 KVMTRACE_3D(MSR_WRITE, vcpu, ecx, (u32)data, (u32)(data >> 32), 3050 trace_kvm_msr_write(ecx, data);
2959 handler);
2960 3051
2961 if (vmx_set_msr(vcpu, ecx, data) != 0) { 3052 if (vmx_set_msr(vcpu, ecx, data) != 0) {
2962 kvm_inject_gp(vcpu, 0); 3053 kvm_inject_gp(vcpu, 0);
@@ -2983,7 +3074,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
2983 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; 3074 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
2984 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 3075 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2985 3076
2986 KVMTRACE_0D(PEND_INTR, vcpu, handler);
2987 ++vcpu->stat.irq_window_exits; 3077 ++vcpu->stat.irq_window_exits;
2988 3078
2989 /* 3079 /*
@@ -3049,7 +3139,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3049 printk(KERN_ERR 3139 printk(KERN_ERR
3050 "Fail to handle apic access vmexit! Offset is 0x%lx\n", 3140 "Fail to handle apic access vmexit! Offset is 0x%lx\n",
3051 offset); 3141 offset);
3052 return -ENOTSUPP; 3142 return -ENOEXEC;
3053 } 3143 }
3054 return 1; 3144 return 1;
3055} 3145}
@@ -3118,7 +3208,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3118 3208
3119 if (exit_qualification & (1 << 6)) { 3209 if (exit_qualification & (1 << 6)) {
3120 printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); 3210 printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
3121 return -ENOTSUPP; 3211 return -EINVAL;
3122 } 3212 }
3123 3213
3124 gla_validity = (exit_qualification >> 7) & 0x3; 3214 gla_validity = (exit_qualification >> 7) & 0x3;
@@ -3130,14 +3220,98 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3130 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", 3220 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
3131 (long unsigned int)exit_qualification); 3221 (long unsigned int)exit_qualification);
3132 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 3222 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
3133 kvm_run->hw.hardware_exit_reason = 0; 3223 kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION;
3134 return -ENOTSUPP; 3224 return 0;
3135 } 3225 }
3136 3226
3137 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); 3227 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
3228 trace_kvm_page_fault(gpa, exit_qualification);
3138 return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); 3229 return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
3139} 3230}
3140 3231
3232static u64 ept_rsvd_mask(u64 spte, int level)
3233{
3234 int i;
3235 u64 mask = 0;
3236
3237 for (i = 51; i > boot_cpu_data.x86_phys_bits; i--)
3238 mask |= (1ULL << i);
3239
3240 if (level > 2)
3241 /* bits 7:3 reserved */
3242 mask |= 0xf8;
3243 else if (level == 2) {
3244 if (spte & (1ULL << 7))
3245 /* 2MB ref, bits 20:12 reserved */
3246 mask |= 0x1ff000;
3247 else
3248 /* bits 6:3 reserved */
3249 mask |= 0x78;
3250 }
3251
3252 return mask;
3253}
3254
3255static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
3256 int level)
3257{
3258 printk(KERN_ERR "%s: spte 0x%llx level %d\n", __func__, spte, level);
3259
3260 /* 010b (write-only) */
3261 WARN_ON((spte & 0x7) == 0x2);
3262
3263 /* 110b (write/execute) */
3264 WARN_ON((spte & 0x7) == 0x6);
3265
3266 /* 100b (execute-only) and value not supported by logical processor */
3267 if (!cpu_has_vmx_ept_execute_only())
3268 WARN_ON((spte & 0x7) == 0x4);
3269
3270 /* not 000b */
3271 if ((spte & 0x7)) {
3272 u64 rsvd_bits = spte & ept_rsvd_mask(spte, level);
3273
3274 if (rsvd_bits != 0) {
3275 printk(KERN_ERR "%s: rsvd_bits = 0x%llx\n",
3276 __func__, rsvd_bits);
3277 WARN_ON(1);
3278 }
3279
3280 if (level == 1 || (level == 2 && (spte & (1ULL << 7)))) {
3281 u64 ept_mem_type = (spte & 0x38) >> 3;
3282
3283 if (ept_mem_type == 2 || ept_mem_type == 3 ||
3284 ept_mem_type == 7) {
3285 printk(KERN_ERR "%s: ept_mem_type=0x%llx\n",
3286 __func__, ept_mem_type);
3287 WARN_ON(1);
3288 }
3289 }
3290 }
3291}
3292
3293static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3294{
3295 u64 sptes[4];
3296 int nr_sptes, i;
3297 gpa_t gpa;
3298
3299 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
3300
3301 printk(KERN_ERR "EPT: Misconfiguration.\n");
3302 printk(KERN_ERR "EPT: GPA: 0x%llx\n", gpa);
3303
3304 nr_sptes = kvm_mmu_get_spte_hierarchy(vcpu, gpa, sptes);
3305
3306 for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i)
3307 ept_misconfig_inspect_spte(vcpu, sptes[i-1], i);
3308
3309 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
3310 kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
3311
3312 return 0;
3313}
3314
3141static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3315static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3142{ 3316{
3143 u32 cpu_based_vm_exec_control; 3317 u32 cpu_based_vm_exec_control;
@@ -3217,8 +3391,9 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
3217 [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 3391 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
3218 [EXIT_REASON_WBINVD] = handle_wbinvd, 3392 [EXIT_REASON_WBINVD] = handle_wbinvd,
3219 [EXIT_REASON_TASK_SWITCH] = handle_task_switch, 3393 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
3220 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
3221 [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, 3394 [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
3395 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
3396 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
3222}; 3397};
3223 3398
3224static const int kvm_vmx_max_exit_handlers = 3399static const int kvm_vmx_max_exit_handlers =
@@ -3234,8 +3409,7 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3234 u32 exit_reason = vmx->exit_reason; 3409 u32 exit_reason = vmx->exit_reason;
3235 u32 vectoring_info = vmx->idt_vectoring_info; 3410 u32 vectoring_info = vmx->idt_vectoring_info;
3236 3411
3237 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), 3412 trace_kvm_exit(exit_reason, kvm_rip_read(vcpu));
3238 (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
3239 3413
3240 /* If we need to emulate an MMIO from handle_invalid_guest_state 3414 /* If we need to emulate an MMIO from handle_invalid_guest_state
3241 * we just return 0 */ 3415 * we just return 0 */
@@ -3247,10 +3421,8 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3247 3421
3248 /* Access CR3 don't cause VMExit in paging mode, so we need 3422 /* Access CR3 don't cause VMExit in paging mode, so we need
3249 * to sync with guest real CR3. */ 3423 * to sync with guest real CR3. */
3250 if (enable_ept && is_paging(vcpu)) { 3424 if (enable_ept && is_paging(vcpu))
3251 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); 3425 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
3252 ept_load_pdptrs(vcpu);
3253 }
3254 3426
3255 if (unlikely(vmx->fail)) { 3427 if (unlikely(vmx->fail)) {
3256 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 3428 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -3326,10 +3498,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3326 3498
3327 /* We need to handle NMIs before interrupts are enabled */ 3499 /* We need to handle NMIs before interrupts are enabled */
3328 if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && 3500 if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
3329 (exit_intr_info & INTR_INFO_VALID_MASK)) { 3501 (exit_intr_info & INTR_INFO_VALID_MASK))
3330 KVMTRACE_0D(NMI, &vmx->vcpu, handler);
3331 asm("int $2"); 3502 asm("int $2");
3332 }
3333 3503
3334 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; 3504 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
3335 3505
@@ -3434,6 +3604,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3434{ 3604{
3435 struct vcpu_vmx *vmx = to_vmx(vcpu); 3605 struct vcpu_vmx *vmx = to_vmx(vcpu);
3436 3606
3607 if (enable_ept && is_paging(vcpu)) {
3608 vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
3609 ept_load_pdptrs(vcpu);
3610 }
3437 /* Record the guest's net vcpu time for enforced NMI injections. */ 3611 /* Record the guest's net vcpu time for enforced NMI injections. */
3438 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) 3612 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
3439 vmx->entry_time = ktime_get(); 3613 vmx->entry_time = ktime_get();
@@ -3449,12 +3623,21 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3449 if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) 3623 if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
3450 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); 3624 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
3451 3625
3626 /* When single-stepping over STI and MOV SS, we must clear the
3627 * corresponding interruptibility bits in the guest state. Otherwise
3628 * vmentry fails as it then expects bit 14 (BS) in pending debug
3629 * exceptions being set, but that's not correct for the guest debugging
3630 * case. */
3631 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
3632 vmx_set_interrupt_shadow(vcpu, 0);
3633
3452 /* 3634 /*
3453 * Loading guest fpu may have cleared host cr0.ts 3635 * Loading guest fpu may have cleared host cr0.ts
3454 */ 3636 */
3455 vmcs_writel(HOST_CR0, read_cr0()); 3637 vmcs_writel(HOST_CR0, read_cr0());
3456 3638
3457 set_debugreg(vcpu->arch.dr6, 6); 3639 if (vcpu->arch.switch_db_regs)
3640 set_debugreg(vcpu->arch.dr6, 6);
3458 3641
3459 asm( 3642 asm(
3460 /* Store host registers */ 3643 /* Store host registers */
@@ -3465,11 +3648,16 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3465 "mov %%"R"sp, %c[host_rsp](%0) \n\t" 3648 "mov %%"R"sp, %c[host_rsp](%0) \n\t"
3466 __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" 3649 __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
3467 "1: \n\t" 3650 "1: \n\t"
3651 /* Reload cr2 if changed */
3652 "mov %c[cr2](%0), %%"R"ax \n\t"
3653 "mov %%cr2, %%"R"dx \n\t"
3654 "cmp %%"R"ax, %%"R"dx \n\t"
3655 "je 2f \n\t"
3656 "mov %%"R"ax, %%cr2 \n\t"
3657 "2: \n\t"
3468 /* Check if vmlaunch of vmresume is needed */ 3658 /* Check if vmlaunch of vmresume is needed */
3469 "cmpl $0, %c[launched](%0) \n\t" 3659 "cmpl $0, %c[launched](%0) \n\t"
3470 /* Load guest registers. Don't clobber flags. */ 3660 /* Load guest registers. Don't clobber flags. */
3471 "mov %c[cr2](%0), %%"R"ax \n\t"
3472 "mov %%"R"ax, %%cr2 \n\t"
3473 "mov %c[rax](%0), %%"R"ax \n\t" 3661 "mov %c[rax](%0), %%"R"ax \n\t"
3474 "mov %c[rbx](%0), %%"R"bx \n\t" 3662 "mov %c[rbx](%0), %%"R"bx \n\t"
3475 "mov %c[rdx](%0), %%"R"dx \n\t" 3663 "mov %c[rdx](%0), %%"R"dx \n\t"
@@ -3547,10 +3735,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3547#endif 3735#endif
3548 ); 3736 );
3549 3737
3550 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); 3738 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
3739 | (1 << VCPU_EXREG_PDPTR));
3551 vcpu->arch.regs_dirty = 0; 3740 vcpu->arch.regs_dirty = 0;
3552 3741
3553 get_debugreg(vcpu->arch.dr6, 6); 3742 if (vcpu->arch.switch_db_regs)
3743 get_debugreg(vcpu->arch.dr6, 6);
3554 3744
3555 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 3745 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
3556 if (vmx->rmode.irq.pending) 3746 if (vmx->rmode.irq.pending)
@@ -3633,9 +3823,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
3633 if (alloc_apic_access_page(kvm) != 0) 3823 if (alloc_apic_access_page(kvm) != 0)
3634 goto free_vmcs; 3824 goto free_vmcs;
3635 3825
3636 if (enable_ept) 3826 if (enable_ept) {
3827 if (!kvm->arch.ept_identity_map_addr)
3828 kvm->arch.ept_identity_map_addr =
3829 VMX_EPT_IDENTITY_PAGETABLE_ADDR;
3637 if (alloc_identity_pagetable(kvm) != 0) 3830 if (alloc_identity_pagetable(kvm) != 0)
3638 goto free_vmcs; 3831 goto free_vmcs;
3832 }
3639 3833
3640 return &vmx->vcpu; 3834 return &vmx->vcpu;
3641 3835
@@ -3699,6 +3893,34 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3699 return ret; 3893 return ret;
3700} 3894}
3701 3895
3896static const struct trace_print_flags vmx_exit_reasons_str[] = {
3897 { EXIT_REASON_EXCEPTION_NMI, "exception" },
3898 { EXIT_REASON_EXTERNAL_INTERRUPT, "ext_irq" },
3899 { EXIT_REASON_TRIPLE_FAULT, "triple_fault" },
3900 { EXIT_REASON_NMI_WINDOW, "nmi_window" },
3901 { EXIT_REASON_IO_INSTRUCTION, "io_instruction" },
3902 { EXIT_REASON_CR_ACCESS, "cr_access" },
3903 { EXIT_REASON_DR_ACCESS, "dr_access" },
3904 { EXIT_REASON_CPUID, "cpuid" },
3905 { EXIT_REASON_MSR_READ, "rdmsr" },
3906 { EXIT_REASON_MSR_WRITE, "wrmsr" },
3907 { EXIT_REASON_PENDING_INTERRUPT, "interrupt_window" },
3908 { EXIT_REASON_HLT, "halt" },
3909 { EXIT_REASON_INVLPG, "invlpg" },
3910 { EXIT_REASON_VMCALL, "hypercall" },
3911 { EXIT_REASON_TPR_BELOW_THRESHOLD, "tpr_below_thres" },
3912 { EXIT_REASON_APIC_ACCESS, "apic_access" },
3913 { EXIT_REASON_WBINVD, "wbinvd" },
3914 { EXIT_REASON_TASK_SWITCH, "task_switch" },
3915 { EXIT_REASON_EPT_VIOLATION, "ept_violation" },
3916 { -1, NULL }
3917};
3918
3919static bool vmx_gb_page_enable(void)
3920{
3921 return false;
3922}
3923
3702static struct kvm_x86_ops vmx_x86_ops = { 3924static struct kvm_x86_ops vmx_x86_ops = {
3703 .cpu_has_kvm_support = cpu_has_kvm_support, 3925 .cpu_has_kvm_support = cpu_has_kvm_support,
3704 .disabled_by_bios = vmx_disabled_by_bios, 3926 .disabled_by_bios = vmx_disabled_by_bios,
@@ -3758,6 +3980,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
3758 .set_tss_addr = vmx_set_tss_addr, 3980 .set_tss_addr = vmx_set_tss_addr,
3759 .get_tdp_level = get_ept_level, 3981 .get_tdp_level = get_ept_level,
3760 .get_mt_mask = vmx_get_mt_mask, 3982 .get_mt_mask = vmx_get_mt_mask,
3983
3984 .exit_reasons_str = vmx_exit_reasons_str,
3985 .gb_page_enable = vmx_gb_page_enable,
3761}; 3986};
3762 3987
3763static int __init vmx_init(void) 3988static int __init vmx_init(void)