diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-14 20:43:43 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-14 20:43:43 -0400 |
commit | 69def9f05dfce3281bb06599057e6b8097385d39 (patch) | |
tree | 7d826b22924268ddbfad101993b248996d40e2ec /arch/x86/kvm/vmx.c | |
parent | 353f6dd2dec992ddd34620a94b051b0f76227379 (diff) | |
parent | 8e616fc8d343bd7f0f0a0c22407fdcb77f6d22b1 (diff) |
Merge branch 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (202 commits)
MAINTAINERS: update KVM entry
KVM: correct error-handling code
KVM: fix compile warnings on s390
KVM: VMX: Check cpl before emulating debug register access
KVM: fix misreporting of coalesced interrupts by kvm tracer
KVM: x86: drop duplicate kvm_flush_remote_tlb calls
KVM: VMX: call vmx_load_host_state() only if msr is cached
KVM: VMX: Conditionally reload debug register 6
KVM: Use thread debug register storage instead of kvm specific data
KVM guest: do not batch pte updates from interrupt context
KVM: Fix coalesced interrupt reporting in IOAPIC
KVM guest: fix bogus wallclock physical address calculation
KVM: VMX: Fix cr8 exiting control clobbering by EPT
KVM: Optimize kvm_mmu_unprotect_page_virt() for tdp
KVM: Document KVM_CAP_IRQCHIP
KVM: Protect update_cr8_intercept() when running without an apic
KVM: VMX: Fix EPT with WP bit change during paging
KVM: Use kvm_{read,write}_guest_virt() to read and write segment descriptors
KVM: x86 emulator: Add adc and sbb missing decoder flags
KVM: Add missing #include
...
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx.c | 497 |
1 files changed, 361 insertions, 136 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 29f912927a58..f3812014bd0b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/highmem.h> | 25 | #include <linux/highmem.h> |
26 | #include <linux/sched.h> | 26 | #include <linux/sched.h> |
27 | #include <linux/moduleparam.h> | 27 | #include <linux/moduleparam.h> |
28 | #include <linux/ftrace_event.h> | ||
28 | #include "kvm_cache_regs.h" | 29 | #include "kvm_cache_regs.h" |
29 | #include "x86.h" | 30 | #include "x86.h" |
30 | 31 | ||
@@ -34,6 +35,8 @@ | |||
34 | #include <asm/virtext.h> | 35 | #include <asm/virtext.h> |
35 | #include <asm/mce.h> | 36 | #include <asm/mce.h> |
36 | 37 | ||
38 | #include "trace.h" | ||
39 | |||
37 | #define __ex(x) __kvm_handle_fault_on_reboot(x) | 40 | #define __ex(x) __kvm_handle_fault_on_reboot(x) |
38 | 41 | ||
39 | MODULE_AUTHOR("Qumranet"); | 42 | MODULE_AUTHOR("Qumranet"); |
@@ -51,6 +54,10 @@ module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); | |||
51 | static int __read_mostly enable_ept = 1; | 54 | static int __read_mostly enable_ept = 1; |
52 | module_param_named(ept, enable_ept, bool, S_IRUGO); | 55 | module_param_named(ept, enable_ept, bool, S_IRUGO); |
53 | 56 | ||
57 | static int __read_mostly enable_unrestricted_guest = 1; | ||
58 | module_param_named(unrestricted_guest, | ||
59 | enable_unrestricted_guest, bool, S_IRUGO); | ||
60 | |||
54 | static int __read_mostly emulate_invalid_guest_state = 0; | 61 | static int __read_mostly emulate_invalid_guest_state = 0; |
55 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 62 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
56 | 63 | ||
@@ -84,6 +91,14 @@ struct vcpu_vmx { | |||
84 | int guest_efer_loaded; | 91 | int guest_efer_loaded; |
85 | } host_state; | 92 | } host_state; |
86 | struct { | 93 | struct { |
94 | int vm86_active; | ||
95 | u8 save_iopl; | ||
96 | struct kvm_save_segment { | ||
97 | u16 selector; | ||
98 | unsigned long base; | ||
99 | u32 limit; | ||
100 | u32 ar; | ||
101 | } tr, es, ds, fs, gs; | ||
87 | struct { | 102 | struct { |
88 | bool pending; | 103 | bool pending; |
89 | u8 vector; | 104 | u8 vector; |
@@ -161,6 +176,8 @@ static struct kvm_vmx_segment_field { | |||
161 | VMX_SEGMENT_FIELD(LDTR), | 176 | VMX_SEGMENT_FIELD(LDTR), |
162 | }; | 177 | }; |
163 | 178 | ||
179 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu); | ||
180 | |||
164 | /* | 181 | /* |
165 | * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it | 182 | * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it |
166 | * away by decrementing the array size. | 183 | * away by decrementing the array size. |
@@ -256,6 +273,26 @@ static inline bool cpu_has_vmx_flexpriority(void) | |||
256 | cpu_has_vmx_virtualize_apic_accesses(); | 273 | cpu_has_vmx_virtualize_apic_accesses(); |
257 | } | 274 | } |
258 | 275 | ||
276 | static inline bool cpu_has_vmx_ept_execute_only(void) | ||
277 | { | ||
278 | return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT); | ||
279 | } | ||
280 | |||
281 | static inline bool cpu_has_vmx_eptp_uncacheable(void) | ||
282 | { | ||
283 | return !!(vmx_capability.ept & VMX_EPTP_UC_BIT); | ||
284 | } | ||
285 | |||
286 | static inline bool cpu_has_vmx_eptp_writeback(void) | ||
287 | { | ||
288 | return !!(vmx_capability.ept & VMX_EPTP_WB_BIT); | ||
289 | } | ||
290 | |||
291 | static inline bool cpu_has_vmx_ept_2m_page(void) | ||
292 | { | ||
293 | return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); | ||
294 | } | ||
295 | |||
259 | static inline int cpu_has_vmx_invept_individual_addr(void) | 296 | static inline int cpu_has_vmx_invept_individual_addr(void) |
260 | { | 297 | { |
261 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); | 298 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); |
@@ -277,6 +314,12 @@ static inline int cpu_has_vmx_ept(void) | |||
277 | SECONDARY_EXEC_ENABLE_EPT; | 314 | SECONDARY_EXEC_ENABLE_EPT; |
278 | } | 315 | } |
279 | 316 | ||
317 | static inline int cpu_has_vmx_unrestricted_guest(void) | ||
318 | { | ||
319 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
320 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | ||
321 | } | ||
322 | |||
280 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 323 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) |
281 | { | 324 | { |
282 | return flexpriority_enabled && | 325 | return flexpriority_enabled && |
@@ -497,14 +540,16 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
497 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR); | 540 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR); |
498 | if (!vcpu->fpu_active) | 541 | if (!vcpu->fpu_active) |
499 | eb |= 1u << NM_VECTOR; | 542 | eb |= 1u << NM_VECTOR; |
543 | /* | ||
544 | * Unconditionally intercept #DB so we can maintain dr6 without | ||
545 | * reading it every exit. | ||
546 | */ | ||
547 | eb |= 1u << DB_VECTOR; | ||
500 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | 548 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { |
501 | if (vcpu->guest_debug & | ||
502 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) | ||
503 | eb |= 1u << DB_VECTOR; | ||
504 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | 549 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) |
505 | eb |= 1u << BP_VECTOR; | 550 | eb |= 1u << BP_VECTOR; |
506 | } | 551 | } |
507 | if (vcpu->arch.rmode.vm86_active) | 552 | if (to_vmx(vcpu)->rmode.vm86_active) |
508 | eb = ~0; | 553 | eb = ~0; |
509 | if (enable_ept) | 554 | if (enable_ept) |
510 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ | 555 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ |
@@ -528,12 +573,15 @@ static void reload_tss(void) | |||
528 | static void load_transition_efer(struct vcpu_vmx *vmx) | 573 | static void load_transition_efer(struct vcpu_vmx *vmx) |
529 | { | 574 | { |
530 | int efer_offset = vmx->msr_offset_efer; | 575 | int efer_offset = vmx->msr_offset_efer; |
531 | u64 host_efer = vmx->host_msrs[efer_offset].data; | 576 | u64 host_efer; |
532 | u64 guest_efer = vmx->guest_msrs[efer_offset].data; | 577 | u64 guest_efer; |
533 | u64 ignore_bits; | 578 | u64 ignore_bits; |
534 | 579 | ||
535 | if (efer_offset < 0) | 580 | if (efer_offset < 0) |
536 | return; | 581 | return; |
582 | host_efer = vmx->host_msrs[efer_offset].data; | ||
583 | guest_efer = vmx->guest_msrs[efer_offset].data; | ||
584 | |||
537 | /* | 585 | /* |
538 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless | 586 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless |
539 | * outside long mode | 587 | * outside long mode |
@@ -735,12 +783,17 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) | |||
735 | 783 | ||
736 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | 784 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) |
737 | { | 785 | { |
738 | return vmcs_readl(GUEST_RFLAGS); | 786 | unsigned long rflags; |
787 | |||
788 | rflags = vmcs_readl(GUEST_RFLAGS); | ||
789 | if (to_vmx(vcpu)->rmode.vm86_active) | ||
790 | rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM); | ||
791 | return rflags; | ||
739 | } | 792 | } |
740 | 793 | ||
741 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 794 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
742 | { | 795 | { |
743 | if (vcpu->arch.rmode.vm86_active) | 796 | if (to_vmx(vcpu)->rmode.vm86_active) |
744 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | 797 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; |
745 | vmcs_writel(GUEST_RFLAGS, rflags); | 798 | vmcs_writel(GUEST_RFLAGS, rflags); |
746 | } | 799 | } |
@@ -797,12 +850,13 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
797 | intr_info |= INTR_INFO_DELIVER_CODE_MASK; | 850 | intr_info |= INTR_INFO_DELIVER_CODE_MASK; |
798 | } | 851 | } |
799 | 852 | ||
800 | if (vcpu->arch.rmode.vm86_active) { | 853 | if (vmx->rmode.vm86_active) { |
801 | vmx->rmode.irq.pending = true; | 854 | vmx->rmode.irq.pending = true; |
802 | vmx->rmode.irq.vector = nr; | 855 | vmx->rmode.irq.vector = nr; |
803 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | 856 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); |
804 | if (nr == BP_VECTOR || nr == OF_VECTOR) | 857 | if (kvm_exception_is_soft(nr)) |
805 | vmx->rmode.irq.rip++; | 858 | vmx->rmode.irq.rip += |
859 | vmx->vcpu.arch.event_exit_inst_len; | ||
806 | intr_info |= INTR_TYPE_SOFT_INTR; | 860 | intr_info |= INTR_TYPE_SOFT_INTR; |
807 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); | 861 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); |
808 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); | 862 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); |
@@ -940,7 +994,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
940 | case MSR_EFER: | 994 | case MSR_EFER: |
941 | return kvm_get_msr_common(vcpu, msr_index, pdata); | 995 | return kvm_get_msr_common(vcpu, msr_index, pdata); |
942 | #endif | 996 | #endif |
943 | case MSR_IA32_TIME_STAMP_COUNTER: | 997 | case MSR_IA32_TSC: |
944 | data = guest_read_tsc(); | 998 | data = guest_read_tsc(); |
945 | break; | 999 | break; |
946 | case MSR_IA32_SYSENTER_CS: | 1000 | case MSR_IA32_SYSENTER_CS: |
@@ -953,9 +1007,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
953 | data = vmcs_readl(GUEST_SYSENTER_ESP); | 1007 | data = vmcs_readl(GUEST_SYSENTER_ESP); |
954 | break; | 1008 | break; |
955 | default: | 1009 | default: |
956 | vmx_load_host_state(to_vmx(vcpu)); | ||
957 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 1010 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
958 | if (msr) { | 1011 | if (msr) { |
1012 | vmx_load_host_state(to_vmx(vcpu)); | ||
959 | data = msr->data; | 1013 | data = msr->data; |
960 | break; | 1014 | break; |
961 | } | 1015 | } |
@@ -1000,22 +1054,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1000 | case MSR_IA32_SYSENTER_ESP: | 1054 | case MSR_IA32_SYSENTER_ESP: |
1001 | vmcs_writel(GUEST_SYSENTER_ESP, data); | 1055 | vmcs_writel(GUEST_SYSENTER_ESP, data); |
1002 | break; | 1056 | break; |
1003 | case MSR_IA32_TIME_STAMP_COUNTER: | 1057 | case MSR_IA32_TSC: |
1004 | rdtscll(host_tsc); | 1058 | rdtscll(host_tsc); |
1005 | guest_write_tsc(data, host_tsc); | 1059 | guest_write_tsc(data, host_tsc); |
1006 | break; | 1060 | break; |
1007 | case MSR_P6_PERFCTR0: | ||
1008 | case MSR_P6_PERFCTR1: | ||
1009 | case MSR_P6_EVNTSEL0: | ||
1010 | case MSR_P6_EVNTSEL1: | ||
1011 | /* | ||
1012 | * Just discard all writes to the performance counters; this | ||
1013 | * should keep both older linux and windows 64-bit guests | ||
1014 | * happy | ||
1015 | */ | ||
1016 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); | ||
1017 | |||
1018 | break; | ||
1019 | case MSR_IA32_CR_PAT: | 1061 | case MSR_IA32_CR_PAT: |
1020 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { | 1062 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { |
1021 | vmcs_write64(GUEST_IA32_PAT, data); | 1063 | vmcs_write64(GUEST_IA32_PAT, data); |
@@ -1024,9 +1066,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1024 | } | 1066 | } |
1025 | /* Otherwise falls through to kvm_set_msr_common */ | 1067 | /* Otherwise falls through to kvm_set_msr_common */ |
1026 | default: | 1068 | default: |
1027 | vmx_load_host_state(vmx); | ||
1028 | msr = find_msr_entry(vmx, msr_index); | 1069 | msr = find_msr_entry(vmx, msr_index); |
1029 | if (msr) { | 1070 | if (msr) { |
1071 | vmx_load_host_state(vmx); | ||
1030 | msr->data = data; | 1072 | msr->data = data; |
1031 | break; | 1073 | break; |
1032 | } | 1074 | } |
@@ -1046,6 +1088,10 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | |||
1046 | case VCPU_REGS_RIP: | 1088 | case VCPU_REGS_RIP: |
1047 | vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP); | 1089 | vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP); |
1048 | break; | 1090 | break; |
1091 | case VCPU_EXREG_PDPTR: | ||
1092 | if (enable_ept) | ||
1093 | ept_save_pdptrs(vcpu); | ||
1094 | break; | ||
1049 | default: | 1095 | default: |
1050 | break; | 1096 | break; |
1051 | } | 1097 | } |
@@ -1203,7 +1249,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1203 | opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 1249 | opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
1204 | SECONDARY_EXEC_WBINVD_EXITING | | 1250 | SECONDARY_EXEC_WBINVD_EXITING | |
1205 | SECONDARY_EXEC_ENABLE_VPID | | 1251 | SECONDARY_EXEC_ENABLE_VPID | |
1206 | SECONDARY_EXEC_ENABLE_EPT; | 1252 | SECONDARY_EXEC_ENABLE_EPT | |
1253 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | ||
1207 | if (adjust_vmx_controls(min2, opt2, | 1254 | if (adjust_vmx_controls(min2, opt2, |
1208 | MSR_IA32_VMX_PROCBASED_CTLS2, | 1255 | MSR_IA32_VMX_PROCBASED_CTLS2, |
1209 | &_cpu_based_2nd_exec_control) < 0) | 1256 | &_cpu_based_2nd_exec_control) < 0) |
@@ -1217,12 +1264,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1217 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { | 1264 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { |
1218 | /* CR3 accesses and invlpg don't need to cause VM Exits when EPT | 1265 | /* CR3 accesses and invlpg don't need to cause VM Exits when EPT |
1219 | enabled */ | 1266 | enabled */ |
1220 | min &= ~(CPU_BASED_CR3_LOAD_EXITING | | 1267 | _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING | |
1221 | CPU_BASED_CR3_STORE_EXITING | | 1268 | CPU_BASED_CR3_STORE_EXITING | |
1222 | CPU_BASED_INVLPG_EXITING); | 1269 | CPU_BASED_INVLPG_EXITING); |
1223 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, | ||
1224 | &_cpu_based_exec_control) < 0) | ||
1225 | return -EIO; | ||
1226 | rdmsr(MSR_IA32_VMX_EPT_VPID_CAP, | 1270 | rdmsr(MSR_IA32_VMX_EPT_VPID_CAP, |
1227 | vmx_capability.ept, vmx_capability.vpid); | 1271 | vmx_capability.ept, vmx_capability.vpid); |
1228 | } | 1272 | } |
@@ -1333,8 +1377,13 @@ static __init int hardware_setup(void) | |||
1333 | if (!cpu_has_vmx_vpid()) | 1377 | if (!cpu_has_vmx_vpid()) |
1334 | enable_vpid = 0; | 1378 | enable_vpid = 0; |
1335 | 1379 | ||
1336 | if (!cpu_has_vmx_ept()) | 1380 | if (!cpu_has_vmx_ept()) { |
1337 | enable_ept = 0; | 1381 | enable_ept = 0; |
1382 | enable_unrestricted_guest = 0; | ||
1383 | } | ||
1384 | |||
1385 | if (!cpu_has_vmx_unrestricted_guest()) | ||
1386 | enable_unrestricted_guest = 0; | ||
1338 | 1387 | ||
1339 | if (!cpu_has_vmx_flexpriority()) | 1388 | if (!cpu_has_vmx_flexpriority()) |
1340 | flexpriority_enabled = 0; | 1389 | flexpriority_enabled = 0; |
@@ -1342,6 +1391,9 @@ static __init int hardware_setup(void) | |||
1342 | if (!cpu_has_vmx_tpr_shadow()) | 1391 | if (!cpu_has_vmx_tpr_shadow()) |
1343 | kvm_x86_ops->update_cr8_intercept = NULL; | 1392 | kvm_x86_ops->update_cr8_intercept = NULL; |
1344 | 1393 | ||
1394 | if (enable_ept && !cpu_has_vmx_ept_2m_page()) | ||
1395 | kvm_disable_largepages(); | ||
1396 | |||
1345 | return alloc_kvm_area(); | 1397 | return alloc_kvm_area(); |
1346 | } | 1398 | } |
1347 | 1399 | ||
@@ -1372,15 +1424,15 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1372 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1424 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1373 | 1425 | ||
1374 | vmx->emulation_required = 1; | 1426 | vmx->emulation_required = 1; |
1375 | vcpu->arch.rmode.vm86_active = 0; | 1427 | vmx->rmode.vm86_active = 0; |
1376 | 1428 | ||
1377 | vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base); | 1429 | vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); |
1378 | vmcs_write32(GUEST_TR_LIMIT, vcpu->arch.rmode.tr.limit); | 1430 | vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); |
1379 | vmcs_write32(GUEST_TR_AR_BYTES, vcpu->arch.rmode.tr.ar); | 1431 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); |
1380 | 1432 | ||
1381 | flags = vmcs_readl(GUEST_RFLAGS); | 1433 | flags = vmcs_readl(GUEST_RFLAGS); |
1382 | flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); | 1434 | flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); |
1383 | flags |= (vcpu->arch.rmode.save_iopl << IOPL_SHIFT); | 1435 | flags |= (vmx->rmode.save_iopl << IOPL_SHIFT); |
1384 | vmcs_writel(GUEST_RFLAGS, flags); | 1436 | vmcs_writel(GUEST_RFLAGS, flags); |
1385 | 1437 | ||
1386 | vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | | 1438 | vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | |
@@ -1391,10 +1443,10 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1391 | if (emulate_invalid_guest_state) | 1443 | if (emulate_invalid_guest_state) |
1392 | return; | 1444 | return; |
1393 | 1445 | ||
1394 | fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es); | 1446 | fix_pmode_dataseg(VCPU_SREG_ES, &vmx->rmode.es); |
1395 | fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds); | 1447 | fix_pmode_dataseg(VCPU_SREG_DS, &vmx->rmode.ds); |
1396 | fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); | 1448 | fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); |
1397 | fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); | 1449 | fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); |
1398 | 1450 | ||
1399 | vmcs_write16(GUEST_SS_SELECTOR, 0); | 1451 | vmcs_write16(GUEST_SS_SELECTOR, 0); |
1400 | vmcs_write32(GUEST_SS_AR_BYTES, 0x93); | 1452 | vmcs_write32(GUEST_SS_AR_BYTES, 0x93); |
@@ -1433,20 +1485,23 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1433 | unsigned long flags; | 1485 | unsigned long flags; |
1434 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1486 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1435 | 1487 | ||
1488 | if (enable_unrestricted_guest) | ||
1489 | return; | ||
1490 | |||
1436 | vmx->emulation_required = 1; | 1491 | vmx->emulation_required = 1; |
1437 | vcpu->arch.rmode.vm86_active = 1; | 1492 | vmx->rmode.vm86_active = 1; |
1438 | 1493 | ||
1439 | vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE); | 1494 | vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); |
1440 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); | 1495 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); |
1441 | 1496 | ||
1442 | vcpu->arch.rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT); | 1497 | vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT); |
1443 | vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); | 1498 | vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); |
1444 | 1499 | ||
1445 | vcpu->arch.rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES); | 1500 | vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES); |
1446 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); | 1501 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); |
1447 | 1502 | ||
1448 | flags = vmcs_readl(GUEST_RFLAGS); | 1503 | flags = vmcs_readl(GUEST_RFLAGS); |
1449 | vcpu->arch.rmode.save_iopl | 1504 | vmx->rmode.save_iopl |
1450 | = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 1505 | = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
1451 | 1506 | ||
1452 | flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | 1507 | flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; |
@@ -1468,10 +1523,10 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1468 | vmcs_writel(GUEST_CS_BASE, 0xf0000); | 1523 | vmcs_writel(GUEST_CS_BASE, 0xf0000); |
1469 | vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); | 1524 | vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); |
1470 | 1525 | ||
1471 | fix_rmode_seg(VCPU_SREG_ES, &vcpu->arch.rmode.es); | 1526 | fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es); |
1472 | fix_rmode_seg(VCPU_SREG_DS, &vcpu->arch.rmode.ds); | 1527 | fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds); |
1473 | fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); | 1528 | fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs); |
1474 | fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); | 1529 | fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs); |
1475 | 1530 | ||
1476 | continue_rmode: | 1531 | continue_rmode: |
1477 | kvm_mmu_reset_context(vcpu); | 1532 | kvm_mmu_reset_context(vcpu); |
@@ -1545,11 +1600,11 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | |||
1545 | 1600 | ||
1546 | static void ept_load_pdptrs(struct kvm_vcpu *vcpu) | 1601 | static void ept_load_pdptrs(struct kvm_vcpu *vcpu) |
1547 | { | 1602 | { |
1603 | if (!test_bit(VCPU_EXREG_PDPTR, | ||
1604 | (unsigned long *)&vcpu->arch.regs_dirty)) | ||
1605 | return; | ||
1606 | |||
1548 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { | 1607 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { |
1549 | if (!load_pdptrs(vcpu, vcpu->arch.cr3)) { | ||
1550 | printk(KERN_ERR "EPT: Fail to load pdptrs!\n"); | ||
1551 | return; | ||
1552 | } | ||
1553 | vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]); | 1608 | vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]); |
1554 | vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]); | 1609 | vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]); |
1555 | vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]); | 1610 | vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]); |
@@ -1557,6 +1612,21 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu) | |||
1557 | } | 1612 | } |
1558 | } | 1613 | } |
1559 | 1614 | ||
1615 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu) | ||
1616 | { | ||
1617 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { | ||
1618 | vcpu->arch.pdptrs[0] = vmcs_read64(GUEST_PDPTR0); | ||
1619 | vcpu->arch.pdptrs[1] = vmcs_read64(GUEST_PDPTR1); | ||
1620 | vcpu->arch.pdptrs[2] = vmcs_read64(GUEST_PDPTR2); | ||
1621 | vcpu->arch.pdptrs[3] = vmcs_read64(GUEST_PDPTR3); | ||
1622 | } | ||
1623 | |||
1624 | __set_bit(VCPU_EXREG_PDPTR, | ||
1625 | (unsigned long *)&vcpu->arch.regs_avail); | ||
1626 | __set_bit(VCPU_EXREG_PDPTR, | ||
1627 | (unsigned long *)&vcpu->arch.regs_dirty); | ||
1628 | } | ||
1629 | |||
1560 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); | 1630 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
1561 | 1631 | ||
1562 | static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | 1632 | static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, |
@@ -1571,8 +1641,6 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1571 | CPU_BASED_CR3_STORE_EXITING)); | 1641 | CPU_BASED_CR3_STORE_EXITING)); |
1572 | vcpu->arch.cr0 = cr0; | 1642 | vcpu->arch.cr0 = cr0; |
1573 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | 1643 | vmx_set_cr4(vcpu, vcpu->arch.cr4); |
1574 | *hw_cr0 |= X86_CR0_PE | X86_CR0_PG; | ||
1575 | *hw_cr0 &= ~X86_CR0_WP; | ||
1576 | } else if (!is_paging(vcpu)) { | 1644 | } else if (!is_paging(vcpu)) { |
1577 | /* From nonpaging to paging */ | 1645 | /* From nonpaging to paging */ |
1578 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | 1646 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, |
@@ -1581,9 +1649,10 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1581 | CPU_BASED_CR3_STORE_EXITING)); | 1649 | CPU_BASED_CR3_STORE_EXITING)); |
1582 | vcpu->arch.cr0 = cr0; | 1650 | vcpu->arch.cr0 = cr0; |
1583 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | 1651 | vmx_set_cr4(vcpu, vcpu->arch.cr4); |
1584 | if (!(vcpu->arch.cr0 & X86_CR0_WP)) | ||
1585 | *hw_cr0 &= ~X86_CR0_WP; | ||
1586 | } | 1652 | } |
1653 | |||
1654 | if (!(cr0 & X86_CR0_WP)) | ||
1655 | *hw_cr0 &= ~X86_CR0_WP; | ||
1587 | } | 1656 | } |
1588 | 1657 | ||
1589 | static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, | 1658 | static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, |
@@ -1598,15 +1667,21 @@ static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, | |||
1598 | 1667 | ||
1599 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 1668 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
1600 | { | 1669 | { |
1601 | unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | | 1670 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1602 | KVM_VM_CR0_ALWAYS_ON; | 1671 | unsigned long hw_cr0; |
1672 | |||
1673 | if (enable_unrestricted_guest) | ||
1674 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST) | ||
1675 | | KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; | ||
1676 | else | ||
1677 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; | ||
1603 | 1678 | ||
1604 | vmx_fpu_deactivate(vcpu); | 1679 | vmx_fpu_deactivate(vcpu); |
1605 | 1680 | ||
1606 | if (vcpu->arch.rmode.vm86_active && (cr0 & X86_CR0_PE)) | 1681 | if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) |
1607 | enter_pmode(vcpu); | 1682 | enter_pmode(vcpu); |
1608 | 1683 | ||
1609 | if (!vcpu->arch.rmode.vm86_active && !(cr0 & X86_CR0_PE)) | 1684 | if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) |
1610 | enter_rmode(vcpu); | 1685 | enter_rmode(vcpu); |
1611 | 1686 | ||
1612 | #ifdef CONFIG_X86_64 | 1687 | #ifdef CONFIG_X86_64 |
@@ -1650,10 +1725,8 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
1650 | if (enable_ept) { | 1725 | if (enable_ept) { |
1651 | eptp = construct_eptp(cr3); | 1726 | eptp = construct_eptp(cr3); |
1652 | vmcs_write64(EPT_POINTER, eptp); | 1727 | vmcs_write64(EPT_POINTER, eptp); |
1653 | ept_sync_context(eptp); | ||
1654 | ept_load_pdptrs(vcpu); | ||
1655 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : | 1728 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : |
1656 | VMX_EPT_IDENTITY_PAGETABLE_ADDR; | 1729 | vcpu->kvm->arch.ept_identity_map_addr; |
1657 | } | 1730 | } |
1658 | 1731 | ||
1659 | vmx_flush_tlb(vcpu); | 1732 | vmx_flush_tlb(vcpu); |
@@ -1664,7 +1737,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
1664 | 1737 | ||
1665 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 1738 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
1666 | { | 1739 | { |
1667 | unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.vm86_active ? | 1740 | unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ? |
1668 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); | 1741 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); |
1669 | 1742 | ||
1670 | vcpu->arch.cr4 = cr4; | 1743 | vcpu->arch.cr4 = cr4; |
@@ -1707,16 +1780,13 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
1707 | 1780 | ||
1708 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | 1781 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) |
1709 | { | 1782 | { |
1710 | struct kvm_segment kvm_seg; | ||
1711 | |||
1712 | if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */ | 1783 | if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */ |
1713 | return 0; | 1784 | return 0; |
1714 | 1785 | ||
1715 | if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ | 1786 | if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ |
1716 | return 3; | 1787 | return 3; |
1717 | 1788 | ||
1718 | vmx_get_segment(vcpu, &kvm_seg, VCPU_SREG_CS); | 1789 | return vmcs_read16(GUEST_CS_SELECTOR) & 3; |
1719 | return kvm_seg.selector & 3; | ||
1720 | } | 1790 | } |
1721 | 1791 | ||
1722 | static u32 vmx_segment_access_rights(struct kvm_segment *var) | 1792 | static u32 vmx_segment_access_rights(struct kvm_segment *var) |
@@ -1744,20 +1814,21 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var) | |||
1744 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | 1814 | static void vmx_set_segment(struct kvm_vcpu *vcpu, |
1745 | struct kvm_segment *var, int seg) | 1815 | struct kvm_segment *var, int seg) |
1746 | { | 1816 | { |
1817 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
1747 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 1818 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
1748 | u32 ar; | 1819 | u32 ar; |
1749 | 1820 | ||
1750 | if (vcpu->arch.rmode.vm86_active && seg == VCPU_SREG_TR) { | 1821 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { |
1751 | vcpu->arch.rmode.tr.selector = var->selector; | 1822 | vmx->rmode.tr.selector = var->selector; |
1752 | vcpu->arch.rmode.tr.base = var->base; | 1823 | vmx->rmode.tr.base = var->base; |
1753 | vcpu->arch.rmode.tr.limit = var->limit; | 1824 | vmx->rmode.tr.limit = var->limit; |
1754 | vcpu->arch.rmode.tr.ar = vmx_segment_access_rights(var); | 1825 | vmx->rmode.tr.ar = vmx_segment_access_rights(var); |
1755 | return; | 1826 | return; |
1756 | } | 1827 | } |
1757 | vmcs_writel(sf->base, var->base); | 1828 | vmcs_writel(sf->base, var->base); |
1758 | vmcs_write32(sf->limit, var->limit); | 1829 | vmcs_write32(sf->limit, var->limit); |
1759 | vmcs_write16(sf->selector, var->selector); | 1830 | vmcs_write16(sf->selector, var->selector); |
1760 | if (vcpu->arch.rmode.vm86_active && var->s) { | 1831 | if (vmx->rmode.vm86_active && var->s) { |
1761 | /* | 1832 | /* |
1762 | * Hack real-mode segments into vm86 compatibility. | 1833 | * Hack real-mode segments into vm86 compatibility. |
1763 | */ | 1834 | */ |
@@ -1766,6 +1837,21 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
1766 | ar = 0xf3; | 1837 | ar = 0xf3; |
1767 | } else | 1838 | } else |
1768 | ar = vmx_segment_access_rights(var); | 1839 | ar = vmx_segment_access_rights(var); |
1840 | |||
1841 | /* | ||
1842 | * Fix the "Accessed" bit in AR field of segment registers for older | ||
1843 | * qemu binaries. | ||
1844 | * IA32 arch specifies that at the time of processor reset the | ||
1845 | * "Accessed" bit in the AR field of segment registers is 1. And qemu | ||
1846 | * is setting it to 0 in the usedland code. This causes invalid guest | ||
1847 | * state vmexit when "unrestricted guest" mode is turned on. | ||
1848 | * Fix for this setup issue in cpu_reset is being pushed in the qemu | ||
1849 | * tree. Newer qemu binaries with that qemu fix would not need this | ||
1850 | * kvm hack. | ||
1851 | */ | ||
1852 | if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) | ||
1853 | ar |= 0x1; /* Accessed */ | ||
1854 | |||
1769 | vmcs_write32(sf->ar_bytes, ar); | 1855 | vmcs_write32(sf->ar_bytes, ar); |
1770 | } | 1856 | } |
1771 | 1857 | ||
@@ -2040,7 +2126,7 @@ static int init_rmode_identity_map(struct kvm *kvm) | |||
2040 | if (likely(kvm->arch.ept_identity_pagetable_done)) | 2126 | if (likely(kvm->arch.ept_identity_pagetable_done)) |
2041 | return 1; | 2127 | return 1; |
2042 | ret = 0; | 2128 | ret = 0; |
2043 | identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT; | 2129 | identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; |
2044 | r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); | 2130 | r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); |
2045 | if (r < 0) | 2131 | if (r < 0) |
2046 | goto out; | 2132 | goto out; |
@@ -2062,11 +2148,19 @@ out: | |||
2062 | static void seg_setup(int seg) | 2148 | static void seg_setup(int seg) |
2063 | { | 2149 | { |
2064 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2150 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
2151 | unsigned int ar; | ||
2065 | 2152 | ||
2066 | vmcs_write16(sf->selector, 0); | 2153 | vmcs_write16(sf->selector, 0); |
2067 | vmcs_writel(sf->base, 0); | 2154 | vmcs_writel(sf->base, 0); |
2068 | vmcs_write32(sf->limit, 0xffff); | 2155 | vmcs_write32(sf->limit, 0xffff); |
2069 | vmcs_write32(sf->ar_bytes, 0xf3); | 2156 | if (enable_unrestricted_guest) { |
2157 | ar = 0x93; | ||
2158 | if (seg == VCPU_SREG_CS) | ||
2159 | ar |= 0x08; /* code segment */ | ||
2160 | } else | ||
2161 | ar = 0xf3; | ||
2162 | |||
2163 | vmcs_write32(sf->ar_bytes, ar); | ||
2070 | } | 2164 | } |
2071 | 2165 | ||
2072 | static int alloc_apic_access_page(struct kvm *kvm) | 2166 | static int alloc_apic_access_page(struct kvm *kvm) |
@@ -2101,14 +2195,15 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
2101 | goto out; | 2195 | goto out; |
2102 | kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; | 2196 | kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; |
2103 | kvm_userspace_mem.flags = 0; | 2197 | kvm_userspace_mem.flags = 0; |
2104 | kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; | 2198 | kvm_userspace_mem.guest_phys_addr = |
2199 | kvm->arch.ept_identity_map_addr; | ||
2105 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 2200 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
2106 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); | 2201 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); |
2107 | if (r) | 2202 | if (r) |
2108 | goto out; | 2203 | goto out; |
2109 | 2204 | ||
2110 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, | 2205 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, |
2111 | VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT); | 2206 | kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); |
2112 | out: | 2207 | out: |
2113 | up_write(&kvm->slots_lock); | 2208 | up_write(&kvm->slots_lock); |
2114 | return r; | 2209 | return r; |
@@ -2209,6 +2304,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2209 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | 2304 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
2210 | if (!enable_ept) | 2305 | if (!enable_ept) |
2211 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | 2306 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; |
2307 | if (!enable_unrestricted_guest) | ||
2308 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | ||
2212 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 2309 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
2213 | } | 2310 | } |
2214 | 2311 | ||
@@ -2326,14 +2423,14 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2326 | goto out; | 2423 | goto out; |
2327 | } | 2424 | } |
2328 | 2425 | ||
2329 | vmx->vcpu.arch.rmode.vm86_active = 0; | 2426 | vmx->rmode.vm86_active = 0; |
2330 | 2427 | ||
2331 | vmx->soft_vnmi_blocked = 0; | 2428 | vmx->soft_vnmi_blocked = 0; |
2332 | 2429 | ||
2333 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); | 2430 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); |
2334 | kvm_set_cr8(&vmx->vcpu, 0); | 2431 | kvm_set_cr8(&vmx->vcpu, 0); |
2335 | msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | 2432 | msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; |
2336 | if (vmx->vcpu.vcpu_id == 0) | 2433 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) |
2337 | msr |= MSR_IA32_APICBASE_BSP; | 2434 | msr |= MSR_IA32_APICBASE_BSP; |
2338 | kvm_set_apic_base(&vmx->vcpu, msr); | 2435 | kvm_set_apic_base(&vmx->vcpu, msr); |
2339 | 2436 | ||
@@ -2344,7 +2441,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2344 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode | 2441 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode |
2345 | * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. | 2442 | * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. |
2346 | */ | 2443 | */ |
2347 | if (vmx->vcpu.vcpu_id == 0) { | 2444 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) { |
2348 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); | 2445 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); |
2349 | vmcs_writel(GUEST_CS_BASE, 0x000f0000); | 2446 | vmcs_writel(GUEST_CS_BASE, 0x000f0000); |
2350 | } else { | 2447 | } else { |
@@ -2373,7 +2470,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2373 | vmcs_writel(GUEST_SYSENTER_EIP, 0); | 2470 | vmcs_writel(GUEST_SYSENTER_EIP, 0); |
2374 | 2471 | ||
2375 | vmcs_writel(GUEST_RFLAGS, 0x02); | 2472 | vmcs_writel(GUEST_RFLAGS, 0x02); |
2376 | if (vmx->vcpu.vcpu_id == 0) | 2473 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) |
2377 | kvm_rip_write(vcpu, 0xfff0); | 2474 | kvm_rip_write(vcpu, 0xfff0); |
2378 | else | 2475 | else |
2379 | kvm_rip_write(vcpu, 0); | 2476 | kvm_rip_write(vcpu, 0); |
@@ -2461,13 +2558,16 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) | |||
2461 | uint32_t intr; | 2558 | uint32_t intr; |
2462 | int irq = vcpu->arch.interrupt.nr; | 2559 | int irq = vcpu->arch.interrupt.nr; |
2463 | 2560 | ||
2464 | KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); | 2561 | trace_kvm_inj_virq(irq); |
2465 | 2562 | ||
2466 | ++vcpu->stat.irq_injections; | 2563 | ++vcpu->stat.irq_injections; |
2467 | if (vcpu->arch.rmode.vm86_active) { | 2564 | if (vmx->rmode.vm86_active) { |
2468 | vmx->rmode.irq.pending = true; | 2565 | vmx->rmode.irq.pending = true; |
2469 | vmx->rmode.irq.vector = irq; | 2566 | vmx->rmode.irq.vector = irq; |
2470 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | 2567 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); |
2568 | if (vcpu->arch.interrupt.soft) | ||
2569 | vmx->rmode.irq.rip += | ||
2570 | vmx->vcpu.arch.event_exit_inst_len; | ||
2471 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 2571 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
2472 | irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK); | 2572 | irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK); |
2473 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); | 2573 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); |
@@ -2502,7 +2602,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
2502 | } | 2602 | } |
2503 | 2603 | ||
2504 | ++vcpu->stat.nmi_injections; | 2604 | ++vcpu->stat.nmi_injections; |
2505 | if (vcpu->arch.rmode.vm86_active) { | 2605 | if (vmx->rmode.vm86_active) { |
2506 | vmx->rmode.irq.pending = true; | 2606 | vmx->rmode.irq.pending = true; |
2507 | vmx->rmode.irq.vector = NMI_VECTOR; | 2607 | vmx->rmode.irq.vector = NMI_VECTOR; |
2508 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | 2608 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); |
@@ -2659,14 +2759,14 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2659 | if (enable_ept) | 2759 | if (enable_ept) |
2660 | BUG(); | 2760 | BUG(); |
2661 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 2761 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
2662 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, | 2762 | trace_kvm_page_fault(cr2, error_code); |
2663 | (u32)((u64)cr2 >> 32), handler); | 2763 | |
2664 | if (kvm_event_needs_reinjection(vcpu)) | 2764 | if (kvm_event_needs_reinjection(vcpu)) |
2665 | kvm_mmu_unprotect_page_virt(vcpu, cr2); | 2765 | kvm_mmu_unprotect_page_virt(vcpu, cr2); |
2666 | return kvm_mmu_page_fault(vcpu, cr2, error_code); | 2766 | return kvm_mmu_page_fault(vcpu, cr2, error_code); |
2667 | } | 2767 | } |
2668 | 2768 | ||
2669 | if (vcpu->arch.rmode.vm86_active && | 2769 | if (vmx->rmode.vm86_active && |
2670 | handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, | 2770 | handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, |
2671 | error_code)) { | 2771 | error_code)) { |
2672 | if (vcpu->arch.halt_request) { | 2772 | if (vcpu->arch.halt_request) { |
@@ -2707,7 +2807,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu, | |||
2707 | struct kvm_run *kvm_run) | 2807 | struct kvm_run *kvm_run) |
2708 | { | 2808 | { |
2709 | ++vcpu->stat.irq_exits; | 2809 | ++vcpu->stat.irq_exits; |
2710 | KVMTRACE_1D(INTR, vcpu, vmcs_read32(VM_EXIT_INTR_INFO), handler); | ||
2711 | return 1; | 2810 | return 1; |
2712 | } | 2811 | } |
2713 | 2812 | ||
@@ -2755,7 +2854,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
2755 | 2854 | ||
2756 | static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2855 | static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
2757 | { | 2856 | { |
2758 | unsigned long exit_qualification; | 2857 | unsigned long exit_qualification, val; |
2759 | int cr; | 2858 | int cr; |
2760 | int reg; | 2859 | int reg; |
2761 | 2860 | ||
@@ -2764,21 +2863,19 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2764 | reg = (exit_qualification >> 8) & 15; | 2863 | reg = (exit_qualification >> 8) & 15; |
2765 | switch ((exit_qualification >> 4) & 3) { | 2864 | switch ((exit_qualification >> 4) & 3) { |
2766 | case 0: /* mov to cr */ | 2865 | case 0: /* mov to cr */ |
2767 | KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, | 2866 | val = kvm_register_read(vcpu, reg); |
2768 | (u32)kvm_register_read(vcpu, reg), | 2867 | trace_kvm_cr_write(cr, val); |
2769 | (u32)((u64)kvm_register_read(vcpu, reg) >> 32), | ||
2770 | handler); | ||
2771 | switch (cr) { | 2868 | switch (cr) { |
2772 | case 0: | 2869 | case 0: |
2773 | kvm_set_cr0(vcpu, kvm_register_read(vcpu, reg)); | 2870 | kvm_set_cr0(vcpu, val); |
2774 | skip_emulated_instruction(vcpu); | 2871 | skip_emulated_instruction(vcpu); |
2775 | return 1; | 2872 | return 1; |
2776 | case 3: | 2873 | case 3: |
2777 | kvm_set_cr3(vcpu, kvm_register_read(vcpu, reg)); | 2874 | kvm_set_cr3(vcpu, val); |
2778 | skip_emulated_instruction(vcpu); | 2875 | skip_emulated_instruction(vcpu); |
2779 | return 1; | 2876 | return 1; |
2780 | case 4: | 2877 | case 4: |
2781 | kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg)); | 2878 | kvm_set_cr4(vcpu, val); |
2782 | skip_emulated_instruction(vcpu); | 2879 | skip_emulated_instruction(vcpu); |
2783 | return 1; | 2880 | return 1; |
2784 | case 8: { | 2881 | case 8: { |
@@ -2800,23 +2897,19 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2800 | vcpu->arch.cr0 &= ~X86_CR0_TS; | 2897 | vcpu->arch.cr0 &= ~X86_CR0_TS; |
2801 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | 2898 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); |
2802 | vmx_fpu_activate(vcpu); | 2899 | vmx_fpu_activate(vcpu); |
2803 | KVMTRACE_0D(CLTS, vcpu, handler); | ||
2804 | skip_emulated_instruction(vcpu); | 2900 | skip_emulated_instruction(vcpu); |
2805 | return 1; | 2901 | return 1; |
2806 | case 1: /*mov from cr*/ | 2902 | case 1: /*mov from cr*/ |
2807 | switch (cr) { | 2903 | switch (cr) { |
2808 | case 3: | 2904 | case 3: |
2809 | kvm_register_write(vcpu, reg, vcpu->arch.cr3); | 2905 | kvm_register_write(vcpu, reg, vcpu->arch.cr3); |
2810 | KVMTRACE_3D(CR_READ, vcpu, (u32)cr, | 2906 | trace_kvm_cr_read(cr, vcpu->arch.cr3); |
2811 | (u32)kvm_register_read(vcpu, reg), | ||
2812 | (u32)((u64)kvm_register_read(vcpu, reg) >> 32), | ||
2813 | handler); | ||
2814 | skip_emulated_instruction(vcpu); | 2907 | skip_emulated_instruction(vcpu); |
2815 | return 1; | 2908 | return 1; |
2816 | case 8: | 2909 | case 8: |
2817 | kvm_register_write(vcpu, reg, kvm_get_cr8(vcpu)); | 2910 | val = kvm_get_cr8(vcpu); |
2818 | KVMTRACE_2D(CR_READ, vcpu, (u32)cr, | 2911 | kvm_register_write(vcpu, reg, val); |
2819 | (u32)kvm_register_read(vcpu, reg), handler); | 2912 | trace_kvm_cr_read(cr, val); |
2820 | skip_emulated_instruction(vcpu); | 2913 | skip_emulated_instruction(vcpu); |
2821 | return 1; | 2914 | return 1; |
2822 | } | 2915 | } |
@@ -2841,6 +2934,8 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2841 | unsigned long val; | 2934 | unsigned long val; |
2842 | int dr, reg; | 2935 | int dr, reg; |
2843 | 2936 | ||
2937 | if (!kvm_require_cpl(vcpu, 0)) | ||
2938 | return 1; | ||
2844 | dr = vmcs_readl(GUEST_DR7); | 2939 | dr = vmcs_readl(GUEST_DR7); |
2845 | if (dr & DR7_GD) { | 2940 | if (dr & DR7_GD) { |
2846 | /* | 2941 | /* |
@@ -2884,7 +2979,6 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2884 | val = 0; | 2979 | val = 0; |
2885 | } | 2980 | } |
2886 | kvm_register_write(vcpu, reg, val); | 2981 | kvm_register_write(vcpu, reg, val); |
2887 | KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); | ||
2888 | } else { | 2982 | } else { |
2889 | val = vcpu->arch.regs[reg]; | 2983 | val = vcpu->arch.regs[reg]; |
2890 | switch (dr) { | 2984 | switch (dr) { |
@@ -2917,7 +3011,6 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2917 | } | 3011 | } |
2918 | break; | 3012 | break; |
2919 | } | 3013 | } |
2920 | KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler); | ||
2921 | } | 3014 | } |
2922 | skip_emulated_instruction(vcpu); | 3015 | skip_emulated_instruction(vcpu); |
2923 | return 1; | 3016 | return 1; |
@@ -2939,8 +3032,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2939 | return 1; | 3032 | return 1; |
2940 | } | 3033 | } |
2941 | 3034 | ||
2942 | KVMTRACE_3D(MSR_READ, vcpu, ecx, (u32)data, (u32)(data >> 32), | 3035 | trace_kvm_msr_read(ecx, data); |
2943 | handler); | ||
2944 | 3036 | ||
2945 | /* FIXME: handling of bits 32:63 of rax, rdx */ | 3037 | /* FIXME: handling of bits 32:63 of rax, rdx */ |
2946 | vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u; | 3038 | vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u; |
@@ -2955,8 +3047,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2955 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) | 3047 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) |
2956 | | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); | 3048 | | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); |
2957 | 3049 | ||
2958 | KVMTRACE_3D(MSR_WRITE, vcpu, ecx, (u32)data, (u32)(data >> 32), | 3050 | trace_kvm_msr_write(ecx, data); |
2959 | handler); | ||
2960 | 3051 | ||
2961 | if (vmx_set_msr(vcpu, ecx, data) != 0) { | 3052 | if (vmx_set_msr(vcpu, ecx, data) != 0) { |
2962 | kvm_inject_gp(vcpu, 0); | 3053 | kvm_inject_gp(vcpu, 0); |
@@ -2983,7 +3074,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
2983 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; | 3074 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; |
2984 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 3075 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
2985 | 3076 | ||
2986 | KVMTRACE_0D(PEND_INTR, vcpu, handler); | ||
2987 | ++vcpu->stat.irq_window_exits; | 3077 | ++vcpu->stat.irq_window_exits; |
2988 | 3078 | ||
2989 | /* | 3079 | /* |
@@ -3049,7 +3139,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3049 | printk(KERN_ERR | 3139 | printk(KERN_ERR |
3050 | "Fail to handle apic access vmexit! Offset is 0x%lx\n", | 3140 | "Fail to handle apic access vmexit! Offset is 0x%lx\n", |
3051 | offset); | 3141 | offset); |
3052 | return -ENOTSUPP; | 3142 | return -ENOEXEC; |
3053 | } | 3143 | } |
3054 | return 1; | 3144 | return 1; |
3055 | } | 3145 | } |
@@ -3118,7 +3208,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3118 | 3208 | ||
3119 | if (exit_qualification & (1 << 6)) { | 3209 | if (exit_qualification & (1 << 6)) { |
3120 | printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); | 3210 | printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); |
3121 | return -ENOTSUPP; | 3211 | return -EINVAL; |
3122 | } | 3212 | } |
3123 | 3213 | ||
3124 | gla_validity = (exit_qualification >> 7) & 0x3; | 3214 | gla_validity = (exit_qualification >> 7) & 0x3; |
@@ -3130,14 +3220,98 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3130 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", | 3220 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", |
3131 | (long unsigned int)exit_qualification); | 3221 | (long unsigned int)exit_qualification); |
3132 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3222 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; |
3133 | kvm_run->hw.hardware_exit_reason = 0; | 3223 | kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; |
3134 | return -ENOTSUPP; | 3224 | return 0; |
3135 | } | 3225 | } |
3136 | 3226 | ||
3137 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | 3227 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); |
3228 | trace_kvm_page_fault(gpa, exit_qualification); | ||
3138 | return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); | 3229 | return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); |
3139 | } | 3230 | } |
3140 | 3231 | ||
3232 | static u64 ept_rsvd_mask(u64 spte, int level) | ||
3233 | { | ||
3234 | int i; | ||
3235 | u64 mask = 0; | ||
3236 | |||
3237 | for (i = 51; i > boot_cpu_data.x86_phys_bits; i--) | ||
3238 | mask |= (1ULL << i); | ||
3239 | |||
3240 | if (level > 2) | ||
3241 | /* bits 7:3 reserved */ | ||
3242 | mask |= 0xf8; | ||
3243 | else if (level == 2) { | ||
3244 | if (spte & (1ULL << 7)) | ||
3245 | /* 2MB ref, bits 20:12 reserved */ | ||
3246 | mask |= 0x1ff000; | ||
3247 | else | ||
3248 | /* bits 6:3 reserved */ | ||
3249 | mask |= 0x78; | ||
3250 | } | ||
3251 | |||
3252 | return mask; | ||
3253 | } | ||
3254 | |||
3255 | static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte, | ||
3256 | int level) | ||
3257 | { | ||
3258 | printk(KERN_ERR "%s: spte 0x%llx level %d\n", __func__, spte, level); | ||
3259 | |||
3260 | /* 010b (write-only) */ | ||
3261 | WARN_ON((spte & 0x7) == 0x2); | ||
3262 | |||
3263 | /* 110b (write/execute) */ | ||
3264 | WARN_ON((spte & 0x7) == 0x6); | ||
3265 | |||
3266 | /* 100b (execute-only) and value not supported by logical processor */ | ||
3267 | if (!cpu_has_vmx_ept_execute_only()) | ||
3268 | WARN_ON((spte & 0x7) == 0x4); | ||
3269 | |||
3270 | /* not 000b */ | ||
3271 | if ((spte & 0x7)) { | ||
3272 | u64 rsvd_bits = spte & ept_rsvd_mask(spte, level); | ||
3273 | |||
3274 | if (rsvd_bits != 0) { | ||
3275 | printk(KERN_ERR "%s: rsvd_bits = 0x%llx\n", | ||
3276 | __func__, rsvd_bits); | ||
3277 | WARN_ON(1); | ||
3278 | } | ||
3279 | |||
3280 | if (level == 1 || (level == 2 && (spte & (1ULL << 7)))) { | ||
3281 | u64 ept_mem_type = (spte & 0x38) >> 3; | ||
3282 | |||
3283 | if (ept_mem_type == 2 || ept_mem_type == 3 || | ||
3284 | ept_mem_type == 7) { | ||
3285 | printk(KERN_ERR "%s: ept_mem_type=0x%llx\n", | ||
3286 | __func__, ept_mem_type); | ||
3287 | WARN_ON(1); | ||
3288 | } | ||
3289 | } | ||
3290 | } | ||
3291 | } | ||
3292 | |||
3293 | static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
3294 | { | ||
3295 | u64 sptes[4]; | ||
3296 | int nr_sptes, i; | ||
3297 | gpa_t gpa; | ||
3298 | |||
3299 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | ||
3300 | |||
3301 | printk(KERN_ERR "EPT: Misconfiguration.\n"); | ||
3302 | printk(KERN_ERR "EPT: GPA: 0x%llx\n", gpa); | ||
3303 | |||
3304 | nr_sptes = kvm_mmu_get_spte_hierarchy(vcpu, gpa, sptes); | ||
3305 | |||
3306 | for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i) | ||
3307 | ept_misconfig_inspect_spte(vcpu, sptes[i-1], i); | ||
3308 | |||
3309 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | ||
3310 | kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; | ||
3311 | |||
3312 | return 0; | ||
3313 | } | ||
3314 | |||
3141 | static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3315 | static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
3142 | { | 3316 | { |
3143 | u32 cpu_based_vm_exec_control; | 3317 | u32 cpu_based_vm_exec_control; |
@@ -3217,8 +3391,9 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
3217 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 3391 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
3218 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 3392 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
3219 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, | 3393 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, |
3220 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | ||
3221 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, | 3394 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, |
3395 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | ||
3396 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, | ||
3222 | }; | 3397 | }; |
3223 | 3398 | ||
3224 | static const int kvm_vmx_max_exit_handlers = | 3399 | static const int kvm_vmx_max_exit_handlers = |
@@ -3234,8 +3409,7 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3234 | u32 exit_reason = vmx->exit_reason; | 3409 | u32 exit_reason = vmx->exit_reason; |
3235 | u32 vectoring_info = vmx->idt_vectoring_info; | 3410 | u32 vectoring_info = vmx->idt_vectoring_info; |
3236 | 3411 | ||
3237 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), | 3412 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); |
3238 | (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); | ||
3239 | 3413 | ||
3240 | /* If we need to emulate an MMIO from handle_invalid_guest_state | 3414 | /* If we need to emulate an MMIO from handle_invalid_guest_state |
3241 | * we just return 0 */ | 3415 | * we just return 0 */ |
@@ -3247,10 +3421,8 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3247 | 3421 | ||
3248 | /* Access CR3 don't cause VMExit in paging mode, so we need | 3422 | /* Access CR3 don't cause VMExit in paging mode, so we need |
3249 | * to sync with guest real CR3. */ | 3423 | * to sync with guest real CR3. */ |
3250 | if (enable_ept && is_paging(vcpu)) { | 3424 | if (enable_ept && is_paging(vcpu)) |
3251 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | 3425 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); |
3252 | ept_load_pdptrs(vcpu); | ||
3253 | } | ||
3254 | 3426 | ||
3255 | if (unlikely(vmx->fail)) { | 3427 | if (unlikely(vmx->fail)) { |
3256 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 3428 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
@@ -3326,10 +3498,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | |||
3326 | 3498 | ||
3327 | /* We need to handle NMIs before interrupts are enabled */ | 3499 | /* We need to handle NMIs before interrupts are enabled */ |
3328 | if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && | 3500 | if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && |
3329 | (exit_intr_info & INTR_INFO_VALID_MASK)) { | 3501 | (exit_intr_info & INTR_INFO_VALID_MASK)) |
3330 | KVMTRACE_0D(NMI, &vmx->vcpu, handler); | ||
3331 | asm("int $2"); | 3502 | asm("int $2"); |
3332 | } | ||
3333 | 3503 | ||
3334 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 3504 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; |
3335 | 3505 | ||
@@ -3434,6 +3604,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3434 | { | 3604 | { |
3435 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3605 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3436 | 3606 | ||
3607 | if (enable_ept && is_paging(vcpu)) { | ||
3608 | vmcs_writel(GUEST_CR3, vcpu->arch.cr3); | ||
3609 | ept_load_pdptrs(vcpu); | ||
3610 | } | ||
3437 | /* Record the guest's net vcpu time for enforced NMI injections. */ | 3611 | /* Record the guest's net vcpu time for enforced NMI injections. */ |
3438 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) | 3612 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) |
3439 | vmx->entry_time = ktime_get(); | 3613 | vmx->entry_time = ktime_get(); |
@@ -3449,12 +3623,21 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3449 | if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) | 3623 | if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) |
3450 | vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); | 3624 | vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); |
3451 | 3625 | ||
3626 | /* When single-stepping over STI and MOV SS, we must clear the | ||
3627 | * corresponding interruptibility bits in the guest state. Otherwise | ||
3628 | * vmentry fails as it then expects bit 14 (BS) in pending debug | ||
3629 | * exceptions being set, but that's not correct for the guest debugging | ||
3630 | * case. */ | ||
3631 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
3632 | vmx_set_interrupt_shadow(vcpu, 0); | ||
3633 | |||
3452 | /* | 3634 | /* |
3453 | * Loading guest fpu may have cleared host cr0.ts | 3635 | * Loading guest fpu may have cleared host cr0.ts |
3454 | */ | 3636 | */ |
3455 | vmcs_writel(HOST_CR0, read_cr0()); | 3637 | vmcs_writel(HOST_CR0, read_cr0()); |
3456 | 3638 | ||
3457 | set_debugreg(vcpu->arch.dr6, 6); | 3639 | if (vcpu->arch.switch_db_regs) |
3640 | set_debugreg(vcpu->arch.dr6, 6); | ||
3458 | 3641 | ||
3459 | asm( | 3642 | asm( |
3460 | /* Store host registers */ | 3643 | /* Store host registers */ |
@@ -3465,11 +3648,16 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3465 | "mov %%"R"sp, %c[host_rsp](%0) \n\t" | 3648 | "mov %%"R"sp, %c[host_rsp](%0) \n\t" |
3466 | __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" | 3649 | __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" |
3467 | "1: \n\t" | 3650 | "1: \n\t" |
3651 | /* Reload cr2 if changed */ | ||
3652 | "mov %c[cr2](%0), %%"R"ax \n\t" | ||
3653 | "mov %%cr2, %%"R"dx \n\t" | ||
3654 | "cmp %%"R"ax, %%"R"dx \n\t" | ||
3655 | "je 2f \n\t" | ||
3656 | "mov %%"R"ax, %%cr2 \n\t" | ||
3657 | "2: \n\t" | ||
3468 | /* Check if vmlaunch of vmresume is needed */ | 3658 | /* Check if vmlaunch of vmresume is needed */ |
3469 | "cmpl $0, %c[launched](%0) \n\t" | 3659 | "cmpl $0, %c[launched](%0) \n\t" |
3470 | /* Load guest registers. Don't clobber flags. */ | 3660 | /* Load guest registers. Don't clobber flags. */ |
3471 | "mov %c[cr2](%0), %%"R"ax \n\t" | ||
3472 | "mov %%"R"ax, %%cr2 \n\t" | ||
3473 | "mov %c[rax](%0), %%"R"ax \n\t" | 3661 | "mov %c[rax](%0), %%"R"ax \n\t" |
3474 | "mov %c[rbx](%0), %%"R"bx \n\t" | 3662 | "mov %c[rbx](%0), %%"R"bx \n\t" |
3475 | "mov %c[rdx](%0), %%"R"dx \n\t" | 3663 | "mov %c[rdx](%0), %%"R"dx \n\t" |
@@ -3547,10 +3735,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3547 | #endif | 3735 | #endif |
3548 | ); | 3736 | ); |
3549 | 3737 | ||
3550 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | 3738 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) |
3739 | | (1 << VCPU_EXREG_PDPTR)); | ||
3551 | vcpu->arch.regs_dirty = 0; | 3740 | vcpu->arch.regs_dirty = 0; |
3552 | 3741 | ||
3553 | get_debugreg(vcpu->arch.dr6, 6); | 3742 | if (vcpu->arch.switch_db_regs) |
3743 | get_debugreg(vcpu->arch.dr6, 6); | ||
3554 | 3744 | ||
3555 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 3745 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
3556 | if (vmx->rmode.irq.pending) | 3746 | if (vmx->rmode.irq.pending) |
@@ -3633,9 +3823,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
3633 | if (alloc_apic_access_page(kvm) != 0) | 3823 | if (alloc_apic_access_page(kvm) != 0) |
3634 | goto free_vmcs; | 3824 | goto free_vmcs; |
3635 | 3825 | ||
3636 | if (enable_ept) | 3826 | if (enable_ept) { |
3827 | if (!kvm->arch.ept_identity_map_addr) | ||
3828 | kvm->arch.ept_identity_map_addr = | ||
3829 | VMX_EPT_IDENTITY_PAGETABLE_ADDR; | ||
3637 | if (alloc_identity_pagetable(kvm) != 0) | 3830 | if (alloc_identity_pagetable(kvm) != 0) |
3638 | goto free_vmcs; | 3831 | goto free_vmcs; |
3832 | } | ||
3639 | 3833 | ||
3640 | return &vmx->vcpu; | 3834 | return &vmx->vcpu; |
3641 | 3835 | ||
@@ -3699,6 +3893,34 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
3699 | return ret; | 3893 | return ret; |
3700 | } | 3894 | } |
3701 | 3895 | ||
3896 | static const struct trace_print_flags vmx_exit_reasons_str[] = { | ||
3897 | { EXIT_REASON_EXCEPTION_NMI, "exception" }, | ||
3898 | { EXIT_REASON_EXTERNAL_INTERRUPT, "ext_irq" }, | ||
3899 | { EXIT_REASON_TRIPLE_FAULT, "triple_fault" }, | ||
3900 | { EXIT_REASON_NMI_WINDOW, "nmi_window" }, | ||
3901 | { EXIT_REASON_IO_INSTRUCTION, "io_instruction" }, | ||
3902 | { EXIT_REASON_CR_ACCESS, "cr_access" }, | ||
3903 | { EXIT_REASON_DR_ACCESS, "dr_access" }, | ||
3904 | { EXIT_REASON_CPUID, "cpuid" }, | ||
3905 | { EXIT_REASON_MSR_READ, "rdmsr" }, | ||
3906 | { EXIT_REASON_MSR_WRITE, "wrmsr" }, | ||
3907 | { EXIT_REASON_PENDING_INTERRUPT, "interrupt_window" }, | ||
3908 | { EXIT_REASON_HLT, "halt" }, | ||
3909 | { EXIT_REASON_INVLPG, "invlpg" }, | ||
3910 | { EXIT_REASON_VMCALL, "hypercall" }, | ||
3911 | { EXIT_REASON_TPR_BELOW_THRESHOLD, "tpr_below_thres" }, | ||
3912 | { EXIT_REASON_APIC_ACCESS, "apic_access" }, | ||
3913 | { EXIT_REASON_WBINVD, "wbinvd" }, | ||
3914 | { EXIT_REASON_TASK_SWITCH, "task_switch" }, | ||
3915 | { EXIT_REASON_EPT_VIOLATION, "ept_violation" }, | ||
3916 | { -1, NULL } | ||
3917 | }; | ||
3918 | |||
3919 | static bool vmx_gb_page_enable(void) | ||
3920 | { | ||
3921 | return false; | ||
3922 | } | ||
3923 | |||
3702 | static struct kvm_x86_ops vmx_x86_ops = { | 3924 | static struct kvm_x86_ops vmx_x86_ops = { |
3703 | .cpu_has_kvm_support = cpu_has_kvm_support, | 3925 | .cpu_has_kvm_support = cpu_has_kvm_support, |
3704 | .disabled_by_bios = vmx_disabled_by_bios, | 3926 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -3758,6 +3980,9 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3758 | .set_tss_addr = vmx_set_tss_addr, | 3980 | .set_tss_addr = vmx_set_tss_addr, |
3759 | .get_tdp_level = get_ept_level, | 3981 | .get_tdp_level = get_ept_level, |
3760 | .get_mt_mask = vmx_get_mt_mask, | 3982 | .get_mt_mask = vmx_get_mt_mask, |
3983 | |||
3984 | .exit_reasons_str = vmx_exit_reasons_str, | ||
3985 | .gb_page_enable = vmx_gb_page_enable, | ||
3761 | }; | 3986 | }; |
3762 | 3987 | ||
3763 | static int __init vmx_init(void) | 3988 | static int __init vmx_init(void) |