aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/svm.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 20:43:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 20:43:43 -0400
commit69def9f05dfce3281bb06599057e6b8097385d39 (patch)
tree7d826b22924268ddbfad101993b248996d40e2ec /arch/x86/kvm/svm.c
parent353f6dd2dec992ddd34620a94b051b0f76227379 (diff)
parent8e616fc8d343bd7f0f0a0c22407fdcb77f6d22b1 (diff)
Merge branch 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (202 commits) MAINTAINERS: update KVM entry KVM: correct error-handling code KVM: fix compile warnings on s390 KVM: VMX: Check cpl before emulating debug register access KVM: fix misreporting of coalesced interrupts by kvm tracer KVM: x86: drop duplicate kvm_flush_remote_tlb calls KVM: VMX: call vmx_load_host_state() only if msr is cached KVM: VMX: Conditionally reload debug register 6 KVM: Use thread debug register storage instead of kvm specific data KVM guest: do not batch pte updates from interrupt context KVM: Fix coalesced interrupt reporting in IOAPIC KVM guest: fix bogus wallclock physical address calculation KVM: VMX: Fix cr8 exiting control clobbering by EPT KVM: Optimize kvm_mmu_unprotect_page_virt() for tdp KVM: Document KVM_CAP_IRQCHIP KVM: Protect update_cr8_intercept() when running without an apic KVM: VMX: Fix EPT with WP bit change during paging KVM: Use kvm_{read,write}_guest_virt() to read and write segment descriptors KVM: x86 emulator: Add adc and sbb missing decoder flags KVM: Add missing #include ...
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r--arch/x86/kvm/svm.c889
1 files changed, 523 insertions, 366 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b1f658ad2f06..944cc9c04b3c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -15,7 +15,6 @@
15 */ 15 */
16#include <linux/kvm_host.h> 16#include <linux/kvm_host.h>
17 17
18#include "kvm_svm.h"
19#include "irq.h" 18#include "irq.h"
20#include "mmu.h" 19#include "mmu.h"
21#include "kvm_cache_regs.h" 20#include "kvm_cache_regs.h"
@@ -26,10 +25,12 @@
26#include <linux/vmalloc.h> 25#include <linux/vmalloc.h>
27#include <linux/highmem.h> 26#include <linux/highmem.h>
28#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/ftrace_event.h>
29 29
30#include <asm/desc.h> 30#include <asm/desc.h>
31 31
32#include <asm/virtext.h> 32#include <asm/virtext.h>
33#include "trace.h"
33 34
34#define __ex(x) __kvm_handle_fault_on_reboot(x) 35#define __ex(x) __kvm_handle_fault_on_reboot(x)
35 36
@@ -46,6 +47,10 @@ MODULE_LICENSE("GPL");
46#define SVM_FEATURE_LBRV (1 << 1) 47#define SVM_FEATURE_LBRV (1 << 1)
47#define SVM_FEATURE_SVML (1 << 2) 48#define SVM_FEATURE_SVML (1 << 2)
48 49
50#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
51#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
52#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
53
49#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) 54#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
50 55
51/* Turn on to get debugging output*/ 56/* Turn on to get debugging output*/
@@ -57,6 +62,58 @@ MODULE_LICENSE("GPL");
57#define nsvm_printk(fmt, args...) do {} while(0) 62#define nsvm_printk(fmt, args...) do {} while(0)
58#endif 63#endif
59 64
65static const u32 host_save_user_msrs[] = {
66#ifdef CONFIG_X86_64
67 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
68 MSR_FS_BASE,
69#endif
70 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
71};
72
73#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
74
75struct kvm_vcpu;
76
77struct nested_state {
78 struct vmcb *hsave;
79 u64 hsave_msr;
80 u64 vmcb;
81
82 /* These are the merged vectors */
83 u32 *msrpm;
84
85 /* gpa pointers to the real vectors */
86 u64 vmcb_msrpm;
87
88 /* cache for intercepts of the guest */
89 u16 intercept_cr_read;
90 u16 intercept_cr_write;
91 u16 intercept_dr_read;
92 u16 intercept_dr_write;
93 u32 intercept_exceptions;
94 u64 intercept;
95
96};
97
98struct vcpu_svm {
99 struct kvm_vcpu vcpu;
100 struct vmcb *vmcb;
101 unsigned long vmcb_pa;
102 struct svm_cpu_data *svm_data;
103 uint64_t asid_generation;
104 uint64_t sysenter_esp;
105 uint64_t sysenter_eip;
106
107 u64 next_rip;
108
109 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
110 u64 host_gs_base;
111
112 u32 *msrpm;
113
114 struct nested_state nested;
115};
116
60/* enable NPT for AMD64 and X86 with PAE */ 117/* enable NPT for AMD64 and X86 with PAE */
61#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 118#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
62static bool npt_enabled = true; 119static bool npt_enabled = true;
@@ -67,15 +124,14 @@ static int npt = 1;
67 124
68module_param(npt, int, S_IRUGO); 125module_param(npt, int, S_IRUGO);
69 126
70static int nested = 0; 127static int nested = 1;
71module_param(nested, int, S_IRUGO); 128module_param(nested, int, S_IRUGO);
72 129
73static void svm_flush_tlb(struct kvm_vcpu *vcpu); 130static void svm_flush_tlb(struct kvm_vcpu *vcpu);
131static void svm_complete_interrupts(struct vcpu_svm *svm);
74 132
75static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override); 133static int nested_svm_exit_handled(struct vcpu_svm *svm);
76static int nested_svm_vmexit(struct vcpu_svm *svm); 134static int nested_svm_vmexit(struct vcpu_svm *svm);
77static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
78 void *arg2, void *opaque);
79static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 135static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
80 bool has_error_code, u32 error_code); 136 bool has_error_code, u32 error_code);
81 137
@@ -86,7 +142,22 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
86 142
87static inline bool is_nested(struct vcpu_svm *svm) 143static inline bool is_nested(struct vcpu_svm *svm)
88{ 144{
89 return svm->nested_vmcb; 145 return svm->nested.vmcb;
146}
147
148static inline void enable_gif(struct vcpu_svm *svm)
149{
150 svm->vcpu.arch.hflags |= HF_GIF_MASK;
151}
152
153static inline void disable_gif(struct vcpu_svm *svm)
154{
155 svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
156}
157
158static inline bool gif_set(struct vcpu_svm *svm)
159{
160 return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
90} 161}
91 162
92static unsigned long iopm_base; 163static unsigned long iopm_base;
@@ -147,19 +218,6 @@ static inline void invlpga(unsigned long addr, u32 asid)
147 asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); 218 asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid));
148} 219}
149 220
150static inline unsigned long kvm_read_cr2(void)
151{
152 unsigned long cr2;
153
154 asm volatile ("mov %%cr2, %0" : "=r" (cr2));
155 return cr2;
156}
157
158static inline void kvm_write_cr2(unsigned long val)
159{
160 asm volatile ("mov %0, %%cr2" :: "r" (val));
161}
162
163static inline void force_new_asid(struct kvm_vcpu *vcpu) 221static inline void force_new_asid(struct kvm_vcpu *vcpu)
164{ 222{
165 to_svm(vcpu)->asid_generation--; 223 to_svm(vcpu)->asid_generation--;
@@ -263,7 +321,7 @@ static void svm_hardware_enable(void *garbage)
263 321
264 struct svm_cpu_data *svm_data; 322 struct svm_cpu_data *svm_data;
265 uint64_t efer; 323 uint64_t efer;
266 struct desc_ptr gdt_descr; 324 struct descriptor_table gdt_descr;
267 struct desc_struct *gdt; 325 struct desc_struct *gdt;
268 int me = raw_smp_processor_id(); 326 int me = raw_smp_processor_id();
269 327
@@ -283,8 +341,8 @@ static void svm_hardware_enable(void *garbage)
283 svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; 341 svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
284 svm_data->next_asid = svm_data->max_asid + 1; 342 svm_data->next_asid = svm_data->max_asid + 1;
285 343
286 asm volatile ("sgdt %0" : "=m"(gdt_descr)); 344 kvm_get_gdt(&gdt_descr);
287 gdt = (struct desc_struct *)gdt_descr.address; 345 gdt = (struct desc_struct *)gdt_descr.base;
288 svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); 346 svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
289 347
290 rdmsrl(MSR_EFER, efer); 348 rdmsrl(MSR_EFER, efer);
@@ -367,8 +425,6 @@ static void svm_vcpu_init_msrpm(u32 *msrpm)
367#endif 425#endif
368 set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); 426 set_msr_interception(msrpm, MSR_K6_STAR, 1, 1);
369 set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); 427 set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1);
370 set_msr_interception(msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
371 set_msr_interception(msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
372} 428}
373 429
374static void svm_enable_lbrv(struct vcpu_svm *svm) 430static void svm_enable_lbrv(struct vcpu_svm *svm)
@@ -595,8 +651,10 @@ static void init_vmcb(struct vcpu_svm *svm)
595 } 651 }
596 force_new_asid(&svm->vcpu); 652 force_new_asid(&svm->vcpu);
597 653
598 svm->nested_vmcb = 0; 654 svm->nested.vmcb = 0;
599 svm->vcpu.arch.hflags = HF_GIF_MASK; 655 svm->vcpu.arch.hflags = 0;
656
657 enable_gif(svm);
600} 658}
601 659
602static int svm_vcpu_reset(struct kvm_vcpu *vcpu) 660static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -605,7 +663,7 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
605 663
606 init_vmcb(svm); 664 init_vmcb(svm);
607 665
608 if (vcpu->vcpu_id != 0) { 666 if (!kvm_vcpu_is_bsp(vcpu)) {
609 kvm_rip_write(vcpu, 0); 667 kvm_rip_write(vcpu, 0);
610 svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; 668 svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
611 svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; 669 svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
@@ -656,9 +714,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
656 hsave_page = alloc_page(GFP_KERNEL); 714 hsave_page = alloc_page(GFP_KERNEL);
657 if (!hsave_page) 715 if (!hsave_page)
658 goto uninit; 716 goto uninit;
659 svm->hsave = page_address(hsave_page); 717 svm->nested.hsave = page_address(hsave_page);
660 718
661 svm->nested_msrpm = page_address(nested_msrpm_pages); 719 svm->nested.msrpm = page_address(nested_msrpm_pages);
662 720
663 svm->vmcb = page_address(page); 721 svm->vmcb = page_address(page);
664 clear_page(svm->vmcb); 722 clear_page(svm->vmcb);
@@ -669,7 +727,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
669 fx_init(&svm->vcpu); 727 fx_init(&svm->vcpu);
670 svm->vcpu.fpu_active = 1; 728 svm->vcpu.fpu_active = 1;
671 svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 729 svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
672 if (svm->vcpu.vcpu_id == 0) 730 if (kvm_vcpu_is_bsp(&svm->vcpu))
673 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; 731 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
674 732
675 return &svm->vcpu; 733 return &svm->vcpu;
@@ -688,8 +746,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
688 746
689 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); 747 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
690 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); 748 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
691 __free_page(virt_to_page(svm->hsave)); 749 __free_page(virt_to_page(svm->nested.hsave));
692 __free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER); 750 __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
693 kvm_vcpu_uninit(vcpu); 751 kvm_vcpu_uninit(vcpu);
694 kmem_cache_free(kvm_vcpu_cache, svm); 752 kmem_cache_free(kvm_vcpu_cache, svm);
695} 753}
@@ -740,6 +798,18 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
740 to_svm(vcpu)->vmcb->save.rflags = rflags; 798 to_svm(vcpu)->vmcb->save.rflags = rflags;
741} 799}
742 800
801static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
802{
803 switch (reg) {
804 case VCPU_EXREG_PDPTR:
805 BUG_ON(!npt_enabled);
806 load_pdptrs(vcpu, vcpu->arch.cr3);
807 break;
808 default:
809 BUG();
810 }
811}
812
743static void svm_set_vintr(struct vcpu_svm *svm) 813static void svm_set_vintr(struct vcpu_svm *svm)
744{ 814{
745 svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR; 815 svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
@@ -1061,7 +1131,6 @@ static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
1061 val = 0; 1131 val = 0;
1062 } 1132 }
1063 1133
1064 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
1065 return val; 1134 return val;
1066} 1135}
1067 1136
@@ -1070,8 +1139,6 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
1070{ 1139{
1071 struct vcpu_svm *svm = to_svm(vcpu); 1140 struct vcpu_svm *svm = to_svm(vcpu);
1072 1141
1073 KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);
1074
1075 *exception = 0; 1142 *exception = 0;
1076 1143
1077 switch (dr) { 1144 switch (dr) {
@@ -1119,25 +1186,9 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1119 fault_address = svm->vmcb->control.exit_info_2; 1186 fault_address = svm->vmcb->control.exit_info_2;
1120 error_code = svm->vmcb->control.exit_info_1; 1187 error_code = svm->vmcb->control.exit_info_1;
1121 1188
1122 if (!npt_enabled) 1189 trace_kvm_page_fault(fault_address, error_code);
1123 KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code, 1190 if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
1124 (u32)fault_address, (u32)(fault_address >> 32), 1191 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1125 handler);
1126 else
1127 KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code,
1128 (u32)fault_address, (u32)(fault_address >> 32),
1129 handler);
1130 /*
1131 * FIXME: Tis shouldn't be necessary here, but there is a flush
1132 * missing in the MMU code. Until we find this bug, flush the
1133 * complete TLB here on an NPF
1134 */
1135 if (npt_enabled)
1136 svm_flush_tlb(&svm->vcpu);
1137 else {
1138 if (kvm_event_needs_reinjection(&svm->vcpu))
1139 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1140 }
1141 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); 1192 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
1142} 1193}
1143 1194
@@ -1253,14 +1304,12 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1253 1304
1254static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1305static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1255{ 1306{
1256 KVMTRACE_0D(NMI, &svm->vcpu, handler);
1257 return 1; 1307 return 1;
1258} 1308}
1259 1309
1260static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1310static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1261{ 1311{
1262 ++svm->vcpu.stat.irq_exits; 1312 ++svm->vcpu.stat.irq_exits;
1263 KVMTRACE_0D(INTR, &svm->vcpu, handler);
1264 return 1; 1313 return 1;
1265} 1314}
1266 1315
@@ -1303,44 +1352,39 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm)
1303static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 1352static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
1304 bool has_error_code, u32 error_code) 1353 bool has_error_code, u32 error_code)
1305{ 1354{
1306 if (is_nested(svm)) { 1355 if (!is_nested(svm))
1307 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; 1356 return 0;
1308 svm->vmcb->control.exit_code_hi = 0;
1309 svm->vmcb->control.exit_info_1 = error_code;
1310 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
1311 if (nested_svm_exit_handled(svm, false)) {
1312 nsvm_printk("VMexit -> EXCP 0x%x\n", nr);
1313
1314 nested_svm_vmexit(svm);
1315 return 1;
1316 }
1317 }
1318 1357
1319 return 0; 1358 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
1359 svm->vmcb->control.exit_code_hi = 0;
1360 svm->vmcb->control.exit_info_1 = error_code;
1361 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
1362
1363 return nested_svm_exit_handled(svm);
1320} 1364}
1321 1365
1322static inline int nested_svm_intr(struct vcpu_svm *svm) 1366static inline int nested_svm_intr(struct vcpu_svm *svm)
1323{ 1367{
1324 if (is_nested(svm)) { 1368 if (!is_nested(svm))
1325 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) 1369 return 0;
1326 return 0;
1327 1370
1328 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) 1371 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1329 return 0; 1372 return 0;
1330 1373
1331 svm->vmcb->control.exit_code = SVM_EXIT_INTR; 1374 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
1375 return 0;
1332 1376
1333 if (nested_svm_exit_handled(svm, false)) { 1377 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
1334 nsvm_printk("VMexit -> INTR\n"); 1378
1335 nested_svm_vmexit(svm); 1379 if (nested_svm_exit_handled(svm)) {
1336 return 1; 1380 nsvm_printk("VMexit -> INTR\n");
1337 } 1381 return 1;
1338 } 1382 }
1339 1383
1340 return 0; 1384 return 0;
1341} 1385}
1342 1386
1343static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa) 1387static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx)
1344{ 1388{
1345 struct page *page; 1389 struct page *page;
1346 1390
@@ -1348,236 +1392,246 @@ static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
1348 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); 1392 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
1349 up_read(&current->mm->mmap_sem); 1393 up_read(&current->mm->mmap_sem);
1350 1394
1351 if (is_error_page(page)) { 1395 if (is_error_page(page))
1352 printk(KERN_INFO "%s: could not find page at 0x%llx\n", 1396 goto error;
1353 __func__, gpa); 1397
1354 kvm_release_page_clean(page); 1398 return kmap_atomic(page, idx);
1355 kvm_inject_gp(&svm->vcpu, 0); 1399
1356 return NULL; 1400error:
1357 } 1401 kvm_release_page_clean(page);
1358 return page; 1402 kvm_inject_gp(&svm->vcpu, 0);
1403
1404 return NULL;
1359} 1405}
1360 1406
1361static int nested_svm_do(struct vcpu_svm *svm, 1407static void nested_svm_unmap(void *addr, enum km_type idx)
1362 u64 arg1_gpa, u64 arg2_gpa, void *opaque,
1363 int (*handler)(struct vcpu_svm *svm,
1364 void *arg1,
1365 void *arg2,
1366 void *opaque))
1367{ 1408{
1368 struct page *arg1_page; 1409 struct page *page;
1369 struct page *arg2_page = NULL;
1370 void *arg1;
1371 void *arg2 = NULL;
1372 int retval;
1373 1410
1374 arg1_page = nested_svm_get_page(svm, arg1_gpa); 1411 if (!addr)
1375 if(arg1_page == NULL) 1412 return;
1376 return 1;
1377 1413
1378 if (arg2_gpa) { 1414 page = kmap_atomic_to_page(addr);
1379 arg2_page = nested_svm_get_page(svm, arg2_gpa); 1415
1380 if(arg2_page == NULL) { 1416 kunmap_atomic(addr, idx);
1381 kvm_release_page_clean(arg1_page); 1417 kvm_release_page_dirty(page);
1382 return 1; 1418}
1383 } 1419
1384 } 1420static bool nested_svm_exit_handled_msr(struct vcpu_svm *svm)
1421{
1422 u32 param = svm->vmcb->control.exit_info_1 & 1;
1423 u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1424 bool ret = false;
1425 u32 t0, t1;
1426 u8 *msrpm;
1385 1427
1386 arg1 = kmap_atomic(arg1_page, KM_USER0); 1428 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
1387 if (arg2_gpa) 1429 return false;
1388 arg2 = kmap_atomic(arg2_page, KM_USER1);
1389 1430
1390 retval = handler(svm, arg1, arg2, opaque); 1431 msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0);
1432
1433 if (!msrpm)
1434 goto out;
1435
1436 switch (msr) {
1437 case 0 ... 0x1fff:
1438 t0 = (msr * 2) % 8;
1439 t1 = msr / 8;
1440 break;
1441 case 0xc0000000 ... 0xc0001fff:
1442 t0 = (8192 + msr - 0xc0000000) * 2;
1443 t1 = (t0 / 8);
1444 t0 %= 8;
1445 break;
1446 case 0xc0010000 ... 0xc0011fff:
1447 t0 = (16384 + msr - 0xc0010000) * 2;
1448 t1 = (t0 / 8);
1449 t0 %= 8;
1450 break;
1451 default:
1452 ret = true;
1453 goto out;
1454 }
1391 1455
1392 kunmap_atomic(arg1, KM_USER0); 1456 ret = msrpm[t1] & ((1 << param) << t0);
1393 if (arg2_gpa)
1394 kunmap_atomic(arg2, KM_USER1);
1395 1457
1396 kvm_release_page_dirty(arg1_page); 1458out:
1397 if (arg2_gpa) 1459 nested_svm_unmap(msrpm, KM_USER0);
1398 kvm_release_page_dirty(arg2_page);
1399 1460
1400 return retval; 1461 return ret;
1401} 1462}
1402 1463
1403static int nested_svm_exit_handled_real(struct vcpu_svm *svm, 1464static int nested_svm_exit_special(struct vcpu_svm *svm)
1404 void *arg1,
1405 void *arg2,
1406 void *opaque)
1407{ 1465{
1408 struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1409 bool kvm_overrides = *(bool *)opaque;
1410 u32 exit_code = svm->vmcb->control.exit_code; 1466 u32 exit_code = svm->vmcb->control.exit_code;
1411 1467
1412 if (kvm_overrides) { 1468 switch (exit_code) {
1413 switch (exit_code) { 1469 case SVM_EXIT_INTR:
1414 case SVM_EXIT_INTR: 1470 case SVM_EXIT_NMI:
1415 case SVM_EXIT_NMI: 1471 return NESTED_EXIT_HOST;
1416 return 0;
1417 /* For now we are always handling NPFs when using them */ 1472 /* For now we are always handling NPFs when using them */
1418 case SVM_EXIT_NPF: 1473 case SVM_EXIT_NPF:
1419 if (npt_enabled) 1474 if (npt_enabled)
1420 return 0; 1475 return NESTED_EXIT_HOST;
1421 break; 1476 break;
1422 /* When we're shadowing, trap PFs */ 1477 /* When we're shadowing, trap PFs */
1423 case SVM_EXIT_EXCP_BASE + PF_VECTOR: 1478 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
1424 if (!npt_enabled) 1479 if (!npt_enabled)
1425 return 0; 1480 return NESTED_EXIT_HOST;
1426 break; 1481 break;
1427 default: 1482 default:
1428 break; 1483 break;
1429 }
1430 } 1484 }
1431 1485
1486 return NESTED_EXIT_CONTINUE;
1487}
1488
1489/*
1490 * If this function returns true, this #vmexit was already handled
1491 */
1492static int nested_svm_exit_handled(struct vcpu_svm *svm)
1493{
1494 u32 exit_code = svm->vmcb->control.exit_code;
1495 int vmexit = NESTED_EXIT_HOST;
1496
1432 switch (exit_code) { 1497 switch (exit_code) {
1498 case SVM_EXIT_MSR:
1499 vmexit = nested_svm_exit_handled_msr(svm);
1500 break;
1433 case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { 1501 case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
1434 u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); 1502 u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
1435 if (nested_vmcb->control.intercept_cr_read & cr_bits) 1503 if (svm->nested.intercept_cr_read & cr_bits)
1436 return 1; 1504 vmexit = NESTED_EXIT_DONE;
1437 break; 1505 break;
1438 } 1506 }
1439 case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: { 1507 case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
1440 u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0); 1508 u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
1441 if (nested_vmcb->control.intercept_cr_write & cr_bits) 1509 if (svm->nested.intercept_cr_write & cr_bits)
1442 return 1; 1510 vmexit = NESTED_EXIT_DONE;
1443 break; 1511 break;
1444 } 1512 }
1445 case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: { 1513 case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
1446 u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0); 1514 u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
1447 if (nested_vmcb->control.intercept_dr_read & dr_bits) 1515 if (svm->nested.intercept_dr_read & dr_bits)
1448 return 1; 1516 vmexit = NESTED_EXIT_DONE;
1449 break; 1517 break;
1450 } 1518 }
1451 case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: { 1519 case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
1452 u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0); 1520 u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
1453 if (nested_vmcb->control.intercept_dr_write & dr_bits) 1521 if (svm->nested.intercept_dr_write & dr_bits)
1454 return 1; 1522 vmexit = NESTED_EXIT_DONE;
1455 break; 1523 break;
1456 } 1524 }
1457 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { 1525 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
1458 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); 1526 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
1459 if (nested_vmcb->control.intercept_exceptions & excp_bits) 1527 if (svm->nested.intercept_exceptions & excp_bits)
1460 return 1; 1528 vmexit = NESTED_EXIT_DONE;
1461 break; 1529 break;
1462 } 1530 }
1463 default: { 1531 default: {
1464 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); 1532 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
1465 nsvm_printk("exit code: 0x%x\n", exit_code); 1533 nsvm_printk("exit code: 0x%x\n", exit_code);
1466 if (nested_vmcb->control.intercept & exit_bits) 1534 if (svm->nested.intercept & exit_bits)
1467 return 1; 1535 vmexit = NESTED_EXIT_DONE;
1468 } 1536 }
1469 } 1537 }
1470 1538
1471 return 0; 1539 if (vmexit == NESTED_EXIT_DONE) {
1472} 1540 nsvm_printk("#VMEXIT reason=%04x\n", exit_code);
1473 1541 nested_svm_vmexit(svm);
1474static int nested_svm_exit_handled_msr(struct vcpu_svm *svm,
1475 void *arg1, void *arg2,
1476 void *opaque)
1477{
1478 struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1479 u8 *msrpm = (u8 *)arg2;
1480 u32 t0, t1;
1481 u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1482 u32 param = svm->vmcb->control.exit_info_1 & 1;
1483
1484 if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT)))
1485 return 0;
1486
1487 switch(msr) {
1488 case 0 ... 0x1fff:
1489 t0 = (msr * 2) % 8;
1490 t1 = msr / 8;
1491 break;
1492 case 0xc0000000 ... 0xc0001fff:
1493 t0 = (8192 + msr - 0xc0000000) * 2;
1494 t1 = (t0 / 8);
1495 t0 %= 8;
1496 break;
1497 case 0xc0010000 ... 0xc0011fff:
1498 t0 = (16384 + msr - 0xc0010000) * 2;
1499 t1 = (t0 / 8);
1500 t0 %= 8;
1501 break;
1502 default:
1503 return 1;
1504 break;
1505 } 1542 }
1506 if (msrpm[t1] & ((1 << param) << t0))
1507 return 1;
1508 1543
1509 return 0; 1544 return vmexit;
1545}
1546
1547static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
1548{
1549 struct vmcb_control_area *dst = &dst_vmcb->control;
1550 struct vmcb_control_area *from = &from_vmcb->control;
1551
1552 dst->intercept_cr_read = from->intercept_cr_read;
1553 dst->intercept_cr_write = from->intercept_cr_write;
1554 dst->intercept_dr_read = from->intercept_dr_read;
1555 dst->intercept_dr_write = from->intercept_dr_write;
1556 dst->intercept_exceptions = from->intercept_exceptions;
1557 dst->intercept = from->intercept;
1558 dst->iopm_base_pa = from->iopm_base_pa;
1559 dst->msrpm_base_pa = from->msrpm_base_pa;
1560 dst->tsc_offset = from->tsc_offset;
1561 dst->asid = from->asid;
1562 dst->tlb_ctl = from->tlb_ctl;
1563 dst->int_ctl = from->int_ctl;
1564 dst->int_vector = from->int_vector;
1565 dst->int_state = from->int_state;
1566 dst->exit_code = from->exit_code;
1567 dst->exit_code_hi = from->exit_code_hi;
1568 dst->exit_info_1 = from->exit_info_1;
1569 dst->exit_info_2 = from->exit_info_2;
1570 dst->exit_int_info = from->exit_int_info;
1571 dst->exit_int_info_err = from->exit_int_info_err;
1572 dst->nested_ctl = from->nested_ctl;
1573 dst->event_inj = from->event_inj;
1574 dst->event_inj_err = from->event_inj_err;
1575 dst->nested_cr3 = from->nested_cr3;
1576 dst->lbr_ctl = from->lbr_ctl;
1510} 1577}
1511 1578
1512static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override) 1579static int nested_svm_vmexit(struct vcpu_svm *svm)
1513{ 1580{
1514 bool k = kvm_override; 1581 struct vmcb *nested_vmcb;
1515 1582 struct vmcb *hsave = svm->nested.hsave;
1516 switch (svm->vmcb->control.exit_code) { 1583 struct vmcb *vmcb = svm->vmcb;
1517 case SVM_EXIT_MSR:
1518 return nested_svm_do(svm, svm->nested_vmcb,
1519 svm->nested_vmcb_msrpm, NULL,
1520 nested_svm_exit_handled_msr);
1521 default: break;
1522 }
1523 1584
1524 return nested_svm_do(svm, svm->nested_vmcb, 0, &k, 1585 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
1525 nested_svm_exit_handled_real); 1586 if (!nested_vmcb)
1526} 1587 return 1;
1527
1528static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
1529 void *arg2, void *opaque)
1530{
1531 struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1532 struct vmcb *hsave = svm->hsave;
1533 u64 nested_save[] = { nested_vmcb->save.cr0,
1534 nested_vmcb->save.cr3,
1535 nested_vmcb->save.cr4,
1536 nested_vmcb->save.efer,
1537 nested_vmcb->control.intercept_cr_read,
1538 nested_vmcb->control.intercept_cr_write,
1539 nested_vmcb->control.intercept_dr_read,
1540 nested_vmcb->control.intercept_dr_write,
1541 nested_vmcb->control.intercept_exceptions,
1542 nested_vmcb->control.intercept,
1543 nested_vmcb->control.msrpm_base_pa,
1544 nested_vmcb->control.iopm_base_pa,
1545 nested_vmcb->control.tsc_offset };
1546 1588
1547 /* Give the current vmcb to the guest */ 1589 /* Give the current vmcb to the guest */
1548 memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb)); 1590 disable_gif(svm);
1549 nested_vmcb->save.cr0 = nested_save[0]; 1591
1550 if (!npt_enabled) 1592 nested_vmcb->save.es = vmcb->save.es;
1551 nested_vmcb->save.cr3 = nested_save[1]; 1593 nested_vmcb->save.cs = vmcb->save.cs;
1552 nested_vmcb->save.cr4 = nested_save[2]; 1594 nested_vmcb->save.ss = vmcb->save.ss;
1553 nested_vmcb->save.efer = nested_save[3]; 1595 nested_vmcb->save.ds = vmcb->save.ds;
1554 nested_vmcb->control.intercept_cr_read = nested_save[4]; 1596 nested_vmcb->save.gdtr = vmcb->save.gdtr;
1555 nested_vmcb->control.intercept_cr_write = nested_save[5]; 1597 nested_vmcb->save.idtr = vmcb->save.idtr;
1556 nested_vmcb->control.intercept_dr_read = nested_save[6]; 1598 if (npt_enabled)
1557 nested_vmcb->control.intercept_dr_write = nested_save[7]; 1599 nested_vmcb->save.cr3 = vmcb->save.cr3;
1558 nested_vmcb->control.intercept_exceptions = nested_save[8]; 1600 nested_vmcb->save.cr2 = vmcb->save.cr2;
1559 nested_vmcb->control.intercept = nested_save[9]; 1601 nested_vmcb->save.rflags = vmcb->save.rflags;
1560 nested_vmcb->control.msrpm_base_pa = nested_save[10]; 1602 nested_vmcb->save.rip = vmcb->save.rip;
1561 nested_vmcb->control.iopm_base_pa = nested_save[11]; 1603 nested_vmcb->save.rsp = vmcb->save.rsp;
1562 nested_vmcb->control.tsc_offset = nested_save[12]; 1604 nested_vmcb->save.rax = vmcb->save.rax;
1605 nested_vmcb->save.dr7 = vmcb->save.dr7;
1606 nested_vmcb->save.dr6 = vmcb->save.dr6;
1607 nested_vmcb->save.cpl = vmcb->save.cpl;
1608
1609 nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
1610 nested_vmcb->control.int_vector = vmcb->control.int_vector;
1611 nested_vmcb->control.int_state = vmcb->control.int_state;
1612 nested_vmcb->control.exit_code = vmcb->control.exit_code;
1613 nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
1614 nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
1615 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
1616 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
1617 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
1618 nested_vmcb->control.tlb_ctl = 0;
1619 nested_vmcb->control.event_inj = 0;
1620 nested_vmcb->control.event_inj_err = 0;
1563 1621
1564 /* We always set V_INTR_MASKING and remember the old value in hflags */ 1622 /* We always set V_INTR_MASKING and remember the old value in hflags */
1565 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) 1623 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1566 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; 1624 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
1567 1625
1568 if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) &&
1569 (nested_vmcb->control.int_vector)) {
1570 nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n",
1571 nested_vmcb->control.int_vector);
1572 }
1573
1574 /* Restore the original control entries */ 1626 /* Restore the original control entries */
1575 svm->vmcb->control = hsave->control; 1627 copy_vmcb_control_area(vmcb, hsave);
1576 1628
1577 /* Kill any pending exceptions */ 1629 /* Kill any pending exceptions */
1578 if (svm->vcpu.arch.exception.pending == true) 1630 if (svm->vcpu.arch.exception.pending == true)
1579 nsvm_printk("WARNING: Pending Exception\n"); 1631 nsvm_printk("WARNING: Pending Exception\n");
1580 svm->vcpu.arch.exception.pending = false; 1632
1633 kvm_clear_exception_queue(&svm->vcpu);
1634 kvm_clear_interrupt_queue(&svm->vcpu);
1581 1635
1582 /* Restore selected save entries */ 1636 /* Restore selected save entries */
1583 svm->vmcb->save.es = hsave->save.es; 1637 svm->vmcb->save.es = hsave->save.es;
@@ -1603,19 +1657,10 @@ static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
1603 svm->vmcb->save.cpl = 0; 1657 svm->vmcb->save.cpl = 0;
1604 svm->vmcb->control.exit_int_info = 0; 1658 svm->vmcb->control.exit_int_info = 0;
1605 1659
1606 svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
1607 /* Exit nested SVM mode */ 1660 /* Exit nested SVM mode */
1608 svm->nested_vmcb = 0; 1661 svm->nested.vmcb = 0;
1609 1662
1610 return 0; 1663 nested_svm_unmap(nested_vmcb, KM_USER0);
1611}
1612
1613static int nested_svm_vmexit(struct vcpu_svm *svm)
1614{
1615 nsvm_printk("VMexit\n");
1616 if (nested_svm_do(svm, svm->nested_vmcb, 0,
1617 NULL, nested_svm_vmexit_real))
1618 return 1;
1619 1664
1620 kvm_mmu_reset_context(&svm->vcpu); 1665 kvm_mmu_reset_context(&svm->vcpu);
1621 kvm_mmu_load(&svm->vcpu); 1666 kvm_mmu_load(&svm->vcpu);
@@ -1623,38 +1668,63 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1623 return 0; 1668 return 0;
1624} 1669}
1625 1670
1626static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1, 1671static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
1627 void *arg2, void *opaque)
1628{ 1672{
1673 u32 *nested_msrpm;
1629 int i; 1674 int i;
1630 u32 *nested_msrpm = (u32*)arg1; 1675
1676 nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0);
1677 if (!nested_msrpm)
1678 return false;
1679
1631 for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++) 1680 for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
1632 svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i]; 1681 svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
1633 svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm);
1634 1682
1635 return 0; 1683 svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
1684
1685 nested_svm_unmap(nested_msrpm, KM_USER0);
1686
1687 return true;
1636} 1688}
1637 1689
1638static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1, 1690static bool nested_svm_vmrun(struct vcpu_svm *svm)
1639 void *arg2, void *opaque)
1640{ 1691{
1641 struct vmcb *nested_vmcb = (struct vmcb *)arg1; 1692 struct vmcb *nested_vmcb;
1642 struct vmcb *hsave = svm->hsave; 1693 struct vmcb *hsave = svm->nested.hsave;
1694 struct vmcb *vmcb = svm->vmcb;
1695
1696 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
1697 if (!nested_vmcb)
1698 return false;
1643 1699
1644 /* nested_vmcb is our indicator if nested SVM is activated */ 1700 /* nested_vmcb is our indicator if nested SVM is activated */
1645 svm->nested_vmcb = svm->vmcb->save.rax; 1701 svm->nested.vmcb = svm->vmcb->save.rax;
1646 1702
1647 /* Clear internal status */ 1703 /* Clear internal status */
1648 svm->vcpu.arch.exception.pending = false; 1704 kvm_clear_exception_queue(&svm->vcpu);
1705 kvm_clear_interrupt_queue(&svm->vcpu);
1649 1706
1650 /* Save the old vmcb, so we don't need to pick what we save, but 1707 /* Save the old vmcb, so we don't need to pick what we save, but
1651 can restore everything when a VMEXIT occurs */ 1708 can restore everything when a VMEXIT occurs */
1652 memcpy(hsave, svm->vmcb, sizeof(struct vmcb)); 1709 hsave->save.es = vmcb->save.es;
1653 /* We need to remember the original CR3 in the SPT case */ 1710 hsave->save.cs = vmcb->save.cs;
1654 if (!npt_enabled) 1711 hsave->save.ss = vmcb->save.ss;
1655 hsave->save.cr3 = svm->vcpu.arch.cr3; 1712 hsave->save.ds = vmcb->save.ds;
1656 hsave->save.cr4 = svm->vcpu.arch.cr4; 1713 hsave->save.gdtr = vmcb->save.gdtr;
1657 hsave->save.rip = svm->next_rip; 1714 hsave->save.idtr = vmcb->save.idtr;
1715 hsave->save.efer = svm->vcpu.arch.shadow_efer;
1716 hsave->save.cr0 = svm->vcpu.arch.cr0;
1717 hsave->save.cr4 = svm->vcpu.arch.cr4;
1718 hsave->save.rflags = vmcb->save.rflags;
1719 hsave->save.rip = svm->next_rip;
1720 hsave->save.rsp = vmcb->save.rsp;
1721 hsave->save.rax = vmcb->save.rax;
1722 if (npt_enabled)
1723 hsave->save.cr3 = vmcb->save.cr3;
1724 else
1725 hsave->save.cr3 = svm->vcpu.arch.cr3;
1726
1727 copy_vmcb_control_area(hsave, vmcb);
1658 1728
1659 if (svm->vmcb->save.rflags & X86_EFLAGS_IF) 1729 if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
1660 svm->vcpu.arch.hflags |= HF_HIF_MASK; 1730 svm->vcpu.arch.hflags |= HF_HIF_MASK;
@@ -1679,7 +1749,7 @@ static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
1679 kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); 1749 kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
1680 kvm_mmu_reset_context(&svm->vcpu); 1750 kvm_mmu_reset_context(&svm->vcpu);
1681 } 1751 }
1682 svm->vmcb->save.cr2 = nested_vmcb->save.cr2; 1752 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
1683 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); 1753 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
1684 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); 1754 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
1685 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); 1755 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
@@ -1706,7 +1776,15 @@ static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
1706 1776
1707 svm->vmcb->control.intercept |= nested_vmcb->control.intercept; 1777 svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
1708 1778
1709 svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa; 1779 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
1780
1781 /* cache intercepts */
1782 svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read;
1783 svm->nested.intercept_cr_write = nested_vmcb->control.intercept_cr_write;
1784 svm->nested.intercept_dr_read = nested_vmcb->control.intercept_dr_read;
1785 svm->nested.intercept_dr_write = nested_vmcb->control.intercept_dr_write;
1786 svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
1787 svm->nested.intercept = nested_vmcb->control.intercept;
1710 1788
1711 force_new_asid(&svm->vcpu); 1789 force_new_asid(&svm->vcpu);
1712 svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info; 1790 svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
@@ -1734,12 +1812,14 @@ static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
1734 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; 1812 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
1735 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; 1813 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
1736 1814
1737 svm->vcpu.arch.hflags |= HF_GIF_MASK; 1815 nested_svm_unmap(nested_vmcb, KM_USER0);
1738 1816
1739 return 0; 1817 enable_gif(svm);
1818
1819 return true;
1740} 1820}
1741 1821
1742static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) 1822static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
1743{ 1823{
1744 to_vmcb->save.fs = from_vmcb->save.fs; 1824 to_vmcb->save.fs = from_vmcb->save.fs;
1745 to_vmcb->save.gs = from_vmcb->save.gs; 1825 to_vmcb->save.gs = from_vmcb->save.gs;
@@ -1753,44 +1833,44 @@ static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
1753 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs; 1833 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
1754 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp; 1834 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
1755 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; 1835 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
1756
1757 return 1;
1758}
1759
1760static int nested_svm_vmload(struct vcpu_svm *svm, void *nested_vmcb,
1761 void *arg2, void *opaque)
1762{
1763 return nested_svm_vmloadsave((struct vmcb *)nested_vmcb, svm->vmcb);
1764}
1765
1766static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
1767 void *arg2, void *opaque)
1768{
1769 return nested_svm_vmloadsave(svm->vmcb, (struct vmcb *)nested_vmcb);
1770} 1836}
1771 1837
1772static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1838static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1773{ 1839{
1840 struct vmcb *nested_vmcb;
1841
1774 if (nested_svm_check_permissions(svm)) 1842 if (nested_svm_check_permissions(svm))
1775 return 1; 1843 return 1;
1776 1844
1777 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 1845 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1778 skip_emulated_instruction(&svm->vcpu); 1846 skip_emulated_instruction(&svm->vcpu);
1779 1847
1780 nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmload); 1848 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
1849 if (!nested_vmcb)
1850 return 1;
1851
1852 nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
1853 nested_svm_unmap(nested_vmcb, KM_USER0);
1781 1854
1782 return 1; 1855 return 1;
1783} 1856}
1784 1857
1785static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1858static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1786{ 1859{
1860 struct vmcb *nested_vmcb;
1861
1787 if (nested_svm_check_permissions(svm)) 1862 if (nested_svm_check_permissions(svm))
1788 return 1; 1863 return 1;
1789 1864
1790 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 1865 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1791 skip_emulated_instruction(&svm->vcpu); 1866 skip_emulated_instruction(&svm->vcpu);
1792 1867
1793 nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmsave); 1868 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
1869 if (!nested_vmcb)
1870 return 1;
1871
1872 nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
1873 nested_svm_unmap(nested_vmcb, KM_USER0);
1794 1874
1795 return 1; 1875 return 1;
1796} 1876}
@@ -1798,19 +1878,29 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1798static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1878static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1799{ 1879{
1800 nsvm_printk("VMrun\n"); 1880 nsvm_printk("VMrun\n");
1881
1801 if (nested_svm_check_permissions(svm)) 1882 if (nested_svm_check_permissions(svm))
1802 return 1; 1883 return 1;
1803 1884
1804 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 1885 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1805 skip_emulated_instruction(&svm->vcpu); 1886 skip_emulated_instruction(&svm->vcpu);
1806 1887
1807 if (nested_svm_do(svm, svm->vmcb->save.rax, 0, 1888 if (!nested_svm_vmrun(svm))
1808 NULL, nested_svm_vmrun))
1809 return 1; 1889 return 1;
1810 1890
1811 if (nested_svm_do(svm, svm->nested_vmcb_msrpm, 0, 1891 if (!nested_svm_vmrun_msrpm(svm))
1812 NULL, nested_svm_vmrun_msrpm)) 1892 goto failed;
1813 return 1; 1893
1894 return 1;
1895
1896failed:
1897
1898 svm->vmcb->control.exit_code = SVM_EXIT_ERR;
1899 svm->vmcb->control.exit_code_hi = 0;
1900 svm->vmcb->control.exit_info_1 = 0;
1901 svm->vmcb->control.exit_info_2 = 0;
1902
1903 nested_svm_vmexit(svm);
1814 1904
1815 return 1; 1905 return 1;
1816} 1906}
@@ -1823,7 +1913,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1823 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 1913 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1824 skip_emulated_instruction(&svm->vcpu); 1914 skip_emulated_instruction(&svm->vcpu);
1825 1915
1826 svm->vcpu.arch.hflags |= HF_GIF_MASK; 1916 enable_gif(svm);
1827 1917
1828 return 1; 1918 return 1;
1829} 1919}
@@ -1836,7 +1926,7 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1836 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 1926 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1837 skip_emulated_instruction(&svm->vcpu); 1927 skip_emulated_instruction(&svm->vcpu);
1838 1928
1839 svm->vcpu.arch.hflags &= ~HF_GIF_MASK; 1929 disable_gif(svm);
1840 1930
1841 /* After a CLGI no interrupts should come */ 1931 /* After a CLGI no interrupts should come */
1842 svm_clear_vintr(svm); 1932 svm_clear_vintr(svm);
@@ -1845,6 +1935,19 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1845 return 1; 1935 return 1;
1846} 1936}
1847 1937
1938static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1939{
1940 struct kvm_vcpu *vcpu = &svm->vcpu;
1941 nsvm_printk("INVLPGA\n");
1942
1943 /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
1944 kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
1945
1946 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1947 skip_emulated_instruction(&svm->vcpu);
1948 return 1;
1949}
1950
1848static int invalid_op_interception(struct vcpu_svm *svm, 1951static int invalid_op_interception(struct vcpu_svm *svm,
1849 struct kvm_run *kvm_run) 1952 struct kvm_run *kvm_run)
1850{ 1953{
@@ -1953,7 +2056,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
1953 struct vcpu_svm *svm = to_svm(vcpu); 2056 struct vcpu_svm *svm = to_svm(vcpu);
1954 2057
1955 switch (ecx) { 2058 switch (ecx) {
1956 case MSR_IA32_TIME_STAMP_COUNTER: { 2059 case MSR_IA32_TSC: {
1957 u64 tsc; 2060 u64 tsc;
1958 2061
1959 rdtscll(tsc); 2062 rdtscll(tsc);
@@ -1981,10 +2084,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
1981 *data = svm->vmcb->save.sysenter_cs; 2084 *data = svm->vmcb->save.sysenter_cs;
1982 break; 2085 break;
1983 case MSR_IA32_SYSENTER_EIP: 2086 case MSR_IA32_SYSENTER_EIP:
1984 *data = svm->vmcb->save.sysenter_eip; 2087 *data = svm->sysenter_eip;
1985 break; 2088 break;
1986 case MSR_IA32_SYSENTER_ESP: 2089 case MSR_IA32_SYSENTER_ESP:
1987 *data = svm->vmcb->save.sysenter_esp; 2090 *data = svm->sysenter_esp;
1988 break; 2091 break;
1989 /* Nobody will change the following 5 values in the VMCB so 2092 /* Nobody will change the following 5 values in the VMCB so
1990 we can safely return them on rdmsr. They will always be 0 2093 we can safely return them on rdmsr. They will always be 0
@@ -2005,7 +2108,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2005 *data = svm->vmcb->save.last_excp_to; 2108 *data = svm->vmcb->save.last_excp_to;
2006 break; 2109 break;
2007 case MSR_VM_HSAVE_PA: 2110 case MSR_VM_HSAVE_PA:
2008 *data = svm->hsave_msr; 2111 *data = svm->nested.hsave_msr;
2009 break; 2112 break;
2010 case MSR_VM_CR: 2113 case MSR_VM_CR:
2011 *data = 0; 2114 *data = 0;
@@ -2027,8 +2130,7 @@ static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
2027 if (svm_get_msr(&svm->vcpu, ecx, &data)) 2130 if (svm_get_msr(&svm->vcpu, ecx, &data))
2028 kvm_inject_gp(&svm->vcpu, 0); 2131 kvm_inject_gp(&svm->vcpu, 0);
2029 else { 2132 else {
2030 KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data, 2133 trace_kvm_msr_read(ecx, data);
2031 (u32)(data >> 32), handler);
2032 2134
2033 svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; 2135 svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
2034 svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32; 2136 svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
@@ -2043,7 +2145,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2043 struct vcpu_svm *svm = to_svm(vcpu); 2145 struct vcpu_svm *svm = to_svm(vcpu);
2044 2146
2045 switch (ecx) { 2147 switch (ecx) {
2046 case MSR_IA32_TIME_STAMP_COUNTER: { 2148 case MSR_IA32_TSC: {
2047 u64 tsc; 2149 u64 tsc;
2048 2150
2049 rdtscll(tsc); 2151 rdtscll(tsc);
@@ -2071,9 +2173,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2071 svm->vmcb->save.sysenter_cs = data; 2173 svm->vmcb->save.sysenter_cs = data;
2072 break; 2174 break;
2073 case MSR_IA32_SYSENTER_EIP: 2175 case MSR_IA32_SYSENTER_EIP:
2176 svm->sysenter_eip = data;
2074 svm->vmcb->save.sysenter_eip = data; 2177 svm->vmcb->save.sysenter_eip = data;
2075 break; 2178 break;
2076 case MSR_IA32_SYSENTER_ESP: 2179 case MSR_IA32_SYSENTER_ESP:
2180 svm->sysenter_esp = data;
2077 svm->vmcb->save.sysenter_esp = data; 2181 svm->vmcb->save.sysenter_esp = data;
2078 break; 2182 break;
2079 case MSR_IA32_DEBUGCTLMSR: 2183 case MSR_IA32_DEBUGCTLMSR:
@@ -2091,24 +2195,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2091 else 2195 else
2092 svm_disable_lbrv(svm); 2196 svm_disable_lbrv(svm);
2093 break; 2197 break;
2094 case MSR_K7_EVNTSEL0:
2095 case MSR_K7_EVNTSEL1:
2096 case MSR_K7_EVNTSEL2:
2097 case MSR_K7_EVNTSEL3:
2098 case MSR_K7_PERFCTR0:
2099 case MSR_K7_PERFCTR1:
2100 case MSR_K7_PERFCTR2:
2101 case MSR_K7_PERFCTR3:
2102 /*
2103 * Just discard all writes to the performance counters; this
2104 * should keep both older linux and windows 64-bit guests
2105 * happy
2106 */
2107 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
2108
2109 break;
2110 case MSR_VM_HSAVE_PA: 2198 case MSR_VM_HSAVE_PA:
2111 svm->hsave_msr = data; 2199 svm->nested.hsave_msr = data;
2200 break;
2201 case MSR_VM_CR:
2202 case MSR_VM_IGNNE:
2203 pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
2112 break; 2204 break;
2113 default: 2205 default:
2114 return kvm_set_msr_common(vcpu, ecx, data); 2206 return kvm_set_msr_common(vcpu, ecx, data);
@@ -2122,8 +2214,7 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
2122 u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) 2214 u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
2123 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); 2215 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
2124 2216
2125 KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32), 2217 trace_kvm_msr_write(ecx, data);
2126 handler);
2127 2218
2128 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 2219 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2129 if (svm_set_msr(&svm->vcpu, ecx, data)) 2220 if (svm_set_msr(&svm->vcpu, ecx, data))
@@ -2144,8 +2235,6 @@ static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
2144static int interrupt_window_interception(struct vcpu_svm *svm, 2235static int interrupt_window_interception(struct vcpu_svm *svm,
2145 struct kvm_run *kvm_run) 2236 struct kvm_run *kvm_run)
2146{ 2237{
2147 KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
2148
2149 svm_clear_vintr(svm); 2238 svm_clear_vintr(svm);
2150 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; 2239 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
2151 /* 2240 /*
@@ -2201,7 +2290,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
2201 [SVM_EXIT_INVD] = emulate_on_interception, 2290 [SVM_EXIT_INVD] = emulate_on_interception,
2202 [SVM_EXIT_HLT] = halt_interception, 2291 [SVM_EXIT_HLT] = halt_interception,
2203 [SVM_EXIT_INVLPG] = invlpg_interception, 2292 [SVM_EXIT_INVLPG] = invlpg_interception,
2204 [SVM_EXIT_INVLPGA] = invalid_op_interception, 2293 [SVM_EXIT_INVLPGA] = invlpga_interception,
2205 [SVM_EXIT_IOIO] = io_interception, 2294 [SVM_EXIT_IOIO] = io_interception,
2206 [SVM_EXIT_MSR] = msr_interception, 2295 [SVM_EXIT_MSR] = msr_interception,
2207 [SVM_EXIT_TASK_SWITCH] = task_switch_interception, 2296 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
@@ -2224,20 +2313,26 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2224 struct vcpu_svm *svm = to_svm(vcpu); 2313 struct vcpu_svm *svm = to_svm(vcpu);
2225 u32 exit_code = svm->vmcb->control.exit_code; 2314 u32 exit_code = svm->vmcb->control.exit_code;
2226 2315
2227 KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip, 2316 trace_kvm_exit(exit_code, svm->vmcb->save.rip);
2228 (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
2229 2317
2230 if (is_nested(svm)) { 2318 if (is_nested(svm)) {
2319 int vmexit;
2320
2231 nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", 2321 nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
2232 exit_code, svm->vmcb->control.exit_info_1, 2322 exit_code, svm->vmcb->control.exit_info_1,
2233 svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); 2323 svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
2234 if (nested_svm_exit_handled(svm, true)) { 2324
2235 nested_svm_vmexit(svm); 2325 vmexit = nested_svm_exit_special(svm);
2236 nsvm_printk("-> #VMEXIT\n"); 2326
2327 if (vmexit == NESTED_EXIT_CONTINUE)
2328 vmexit = nested_svm_exit_handled(svm);
2329
2330 if (vmexit == NESTED_EXIT_DONE)
2237 return 1; 2331 return 1;
2238 }
2239 } 2332 }
2240 2333
2334 svm_complete_interrupts(svm);
2335
2241 if (npt_enabled) { 2336 if (npt_enabled) {
2242 int mmu_reload = 0; 2337 int mmu_reload = 0;
2243 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { 2338 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
@@ -2246,12 +2341,6 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2246 } 2341 }
2247 vcpu->arch.cr0 = svm->vmcb->save.cr0; 2342 vcpu->arch.cr0 = svm->vmcb->save.cr0;
2248 vcpu->arch.cr3 = svm->vmcb->save.cr3; 2343 vcpu->arch.cr3 = svm->vmcb->save.cr3;
2249 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
2250 if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
2251 kvm_inject_gp(vcpu, 0);
2252 return 1;
2253 }
2254 }
2255 if (mmu_reload) { 2344 if (mmu_reload) {
2256 kvm_mmu_reset_context(vcpu); 2345 kvm_mmu_reset_context(vcpu);
2257 kvm_mmu_load(vcpu); 2346 kvm_mmu_load(vcpu);
@@ -2319,7 +2408,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
2319{ 2408{
2320 struct vmcb_control_area *control; 2409 struct vmcb_control_area *control;
2321 2410
2322 KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler); 2411 trace_kvm_inj_virq(irq);
2323 2412
2324 ++svm->vcpu.stat.irq_injections; 2413 ++svm->vcpu.stat.irq_injections;
2325 control = &svm->vmcb->control; 2414 control = &svm->vmcb->control;
@@ -2329,21 +2418,14 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
2329 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); 2418 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
2330} 2419}
2331 2420
2332static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
2333{
2334 struct vcpu_svm *svm = to_svm(vcpu);
2335
2336 svm->vmcb->control.event_inj = nr |
2337 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
2338}
2339
2340static void svm_set_irq(struct kvm_vcpu *vcpu) 2421static void svm_set_irq(struct kvm_vcpu *vcpu)
2341{ 2422{
2342 struct vcpu_svm *svm = to_svm(vcpu); 2423 struct vcpu_svm *svm = to_svm(vcpu);
2343 2424
2344 nested_svm_intr(svm); 2425 BUG_ON(!(gif_set(svm)));
2345 2426
2346 svm_queue_irq(vcpu, vcpu->arch.interrupt.nr); 2427 svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
2428 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
2347} 2429}
2348 2430
2349static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) 2431static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
@@ -2371,13 +2453,25 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
2371 struct vmcb *vmcb = svm->vmcb; 2453 struct vmcb *vmcb = svm->vmcb;
2372 return (vmcb->save.rflags & X86_EFLAGS_IF) && 2454 return (vmcb->save.rflags & X86_EFLAGS_IF) &&
2373 !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && 2455 !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
2374 (svm->vcpu.arch.hflags & HF_GIF_MASK); 2456 gif_set(svm) &&
2457 !(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK));
2375} 2458}
2376 2459
2377static void enable_irq_window(struct kvm_vcpu *vcpu) 2460static void enable_irq_window(struct kvm_vcpu *vcpu)
2378{ 2461{
2379 svm_set_vintr(to_svm(vcpu)); 2462 struct vcpu_svm *svm = to_svm(vcpu);
2380 svm_inject_irq(to_svm(vcpu), 0x0); 2463 nsvm_printk("Trying to open IRQ window\n");
2464
2465 nested_svm_intr(svm);
2466
2467 /* In case GIF=0 we can't rely on the CPU to tell us when
2468 * GIF becomes 1, because that's a separate STGI/VMRUN intercept.
2469 * The next time we get that intercept, this function will be
2470 * called again though and we'll get the vintr intercept. */
2471 if (gif_set(svm)) {
2472 svm_set_vintr(svm);
2473 svm_inject_irq(svm, 0x0);
2474 }
2381} 2475}
2382 2476
2383static void enable_nmi_window(struct kvm_vcpu *vcpu) 2477static void enable_nmi_window(struct kvm_vcpu *vcpu)
@@ -2456,6 +2550,8 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
2456 case SVM_EXITINTINFO_TYPE_EXEPT: 2550 case SVM_EXITINTINFO_TYPE_EXEPT:
2457 /* In case of software exception do not reinject an exception 2551 /* In case of software exception do not reinject an exception
2458 vector, but re-execute and instruction instead */ 2552 vector, but re-execute and instruction instead */
2553 if (is_nested(svm))
2554 break;
2459 if (kvm_exception_is_soft(vector)) 2555 if (kvm_exception_is_soft(vector))
2460 break; 2556 break;
2461 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { 2557 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
@@ -2498,9 +2594,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2498 fs_selector = kvm_read_fs(); 2594 fs_selector = kvm_read_fs();
2499 gs_selector = kvm_read_gs(); 2595 gs_selector = kvm_read_gs();
2500 ldt_selector = kvm_read_ldt(); 2596 ldt_selector = kvm_read_ldt();
2501 svm->host_cr2 = kvm_read_cr2(); 2597 svm->vmcb->save.cr2 = vcpu->arch.cr2;
2502 if (!is_nested(svm))
2503 svm->vmcb->save.cr2 = vcpu->arch.cr2;
2504 /* required for live migration with NPT */ 2598 /* required for live migration with NPT */
2505 if (npt_enabled) 2599 if (npt_enabled)
2506 svm->vmcb->save.cr3 = vcpu->arch.cr3; 2600 svm->vmcb->save.cr3 = vcpu->arch.cr3;
@@ -2585,8 +2679,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2585 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; 2679 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
2586 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; 2680 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
2587 2681
2588 kvm_write_cr2(svm->host_cr2);
2589
2590 kvm_load_fs(fs_selector); 2682 kvm_load_fs(fs_selector);
2591 kvm_load_gs(gs_selector); 2683 kvm_load_gs(gs_selector);
2592 kvm_load_ldt(ldt_selector); 2684 kvm_load_ldt(ldt_selector);
@@ -2602,7 +2694,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2602 2694
2603 svm->next_rip = 0; 2695 svm->next_rip = 0;
2604 2696
2605 svm_complete_interrupts(svm); 2697 if (npt_enabled) {
2698 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
2699 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
2700 }
2606} 2701}
2607 2702
2608#undef R 2703#undef R
@@ -2673,6 +2768,64 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
2673 return 0; 2768 return 0;
2674} 2769}
2675 2770
2771static const struct trace_print_flags svm_exit_reasons_str[] = {
2772 { SVM_EXIT_READ_CR0, "read_cr0" },
2773 { SVM_EXIT_READ_CR3, "read_cr3" },
2774 { SVM_EXIT_READ_CR4, "read_cr4" },
2775 { SVM_EXIT_READ_CR8, "read_cr8" },
2776 { SVM_EXIT_WRITE_CR0, "write_cr0" },
2777 { SVM_EXIT_WRITE_CR3, "write_cr3" },
2778 { SVM_EXIT_WRITE_CR4, "write_cr4" },
2779 { SVM_EXIT_WRITE_CR8, "write_cr8" },
2780 { SVM_EXIT_READ_DR0, "read_dr0" },
2781 { SVM_EXIT_READ_DR1, "read_dr1" },
2782 { SVM_EXIT_READ_DR2, "read_dr2" },
2783 { SVM_EXIT_READ_DR3, "read_dr3" },
2784 { SVM_EXIT_WRITE_DR0, "write_dr0" },
2785 { SVM_EXIT_WRITE_DR1, "write_dr1" },
2786 { SVM_EXIT_WRITE_DR2, "write_dr2" },
2787 { SVM_EXIT_WRITE_DR3, "write_dr3" },
2788 { SVM_EXIT_WRITE_DR5, "write_dr5" },
2789 { SVM_EXIT_WRITE_DR7, "write_dr7" },
2790 { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" },
2791 { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" },
2792 { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" },
2793 { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" },
2794 { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" },
2795 { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" },
2796 { SVM_EXIT_INTR, "interrupt" },
2797 { SVM_EXIT_NMI, "nmi" },
2798 { SVM_EXIT_SMI, "smi" },
2799 { SVM_EXIT_INIT, "init" },
2800 { SVM_EXIT_VINTR, "vintr" },
2801 { SVM_EXIT_CPUID, "cpuid" },
2802 { SVM_EXIT_INVD, "invd" },
2803 { SVM_EXIT_HLT, "hlt" },
2804 { SVM_EXIT_INVLPG, "invlpg" },
2805 { SVM_EXIT_INVLPGA, "invlpga" },
2806 { SVM_EXIT_IOIO, "io" },
2807 { SVM_EXIT_MSR, "msr" },
2808 { SVM_EXIT_TASK_SWITCH, "task_switch" },
2809 { SVM_EXIT_SHUTDOWN, "shutdown" },
2810 { SVM_EXIT_VMRUN, "vmrun" },
2811 { SVM_EXIT_VMMCALL, "hypercall" },
2812 { SVM_EXIT_VMLOAD, "vmload" },
2813 { SVM_EXIT_VMSAVE, "vmsave" },
2814 { SVM_EXIT_STGI, "stgi" },
2815 { SVM_EXIT_CLGI, "clgi" },
2816 { SVM_EXIT_SKINIT, "skinit" },
2817 { SVM_EXIT_WBINVD, "wbinvd" },
2818 { SVM_EXIT_MONITOR, "monitor" },
2819 { SVM_EXIT_MWAIT, "mwait" },
2820 { SVM_EXIT_NPF, "npf" },
2821 { -1, NULL }
2822};
2823
2824static bool svm_gb_page_enable(void)
2825{
2826 return true;
2827}
2828
2676static struct kvm_x86_ops svm_x86_ops = { 2829static struct kvm_x86_ops svm_x86_ops = {
2677 .cpu_has_kvm_support = has_svm, 2830 .cpu_has_kvm_support = has_svm,
2678 .disabled_by_bios = is_disabled, 2831 .disabled_by_bios = is_disabled,
@@ -2710,6 +2863,7 @@ static struct kvm_x86_ops svm_x86_ops = {
2710 .set_gdt = svm_set_gdt, 2863 .set_gdt = svm_set_gdt,
2711 .get_dr = svm_get_dr, 2864 .get_dr = svm_get_dr,
2712 .set_dr = svm_set_dr, 2865 .set_dr = svm_set_dr,
2866 .cache_reg = svm_cache_reg,
2713 .get_rflags = svm_get_rflags, 2867 .get_rflags = svm_get_rflags,
2714 .set_rflags = svm_set_rflags, 2868 .set_rflags = svm_set_rflags,
2715 2869
@@ -2733,6 +2887,9 @@ static struct kvm_x86_ops svm_x86_ops = {
2733 .set_tss_addr = svm_set_tss_addr, 2887 .set_tss_addr = svm_set_tss_addr,
2734 .get_tdp_level = get_npt_level, 2888 .get_tdp_level = get_npt_level,
2735 .get_mt_mask = svm_get_mt_mask, 2889 .get_mt_mask = svm_get_mt_mask,
2890
2891 .exit_reasons_str = svm_exit_reasons_str,
2892 .gb_page_enable = svm_gb_page_enable,
2736}; 2893};
2737 2894
2738static int __init svm_init(void) 2895static int __init svm_init(void)