diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-06 16:21:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-06 16:21:18 -0400 |
commit | 6de410c2b0cc055ae9ee640c84331f6a70878d9b (patch) | |
tree | 49dfc7df2f1977c2d665c99266ded92afc98734b /drivers/kvm/vmx.c | |
parent | c6799ade4ae04b53a5f677e5289116155ff01574 (diff) | |
parent | 2ff81f70b56dc1cdd3bf2f08414608069db6ef1a (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (66 commits)
KVM: Remove unused 'instruction_length'
KVM: Don't require explicit indication of completion of mmio or pio
KVM: Remove extraneous guest entry on mmio read
KVM: SVM: Only save/restore MSRs when needed
KVM: fix an if() condition
KVM: VMX: Add lazy FPU support for VT
KVM: VMX: Properly shadow the CR0 register in the vcpu struct
KVM: Don't complain about cpu erratum AA15
KVM: Lazy FPU support for SVM
KVM: Allow passing 64-bit values to the emulated read/write API
KVM: Per-vcpu statistics
KVM: VMX: Avoid unnecessary vcpu_load()/vcpu_put() cycles
KVM: MMU: Avoid heavy ASSERT at non debug mode.
KVM: VMX: Only save/restore MSR_K6_STAR if necessary
KVM: Fold drivers/kvm/kvm_vmx.h into drivers/kvm/vmx.c
KVM: VMX: Don't switch 64-bit msrs for 32-bit guests
KVM: VMX: Reduce unnecessary saving of host msrs
KVM: Handle guest page faults when emulating mmio
KVM: SVM: Report hardware exit reason to userspace instead of dmesg
KVM: Retry sleeping allocation if atomic allocation fails
...
Diffstat (limited to 'drivers/kvm/vmx.c')
-rw-r--r-- | drivers/kvm/vmx.c | 273 |
1 files changed, 187 insertions, 86 deletions
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index fbbf9d6b299f..724db0027f00 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c | |||
@@ -17,7 +17,6 @@ | |||
17 | 17 | ||
18 | #include "kvm.h" | 18 | #include "kvm.h" |
19 | #include "vmx.h" | 19 | #include "vmx.h" |
20 | #include "kvm_vmx.h" | ||
21 | #include <linux/module.h> | 20 | #include <linux/module.h> |
22 | #include <linux/kernel.h> | 21 | #include <linux/kernel.h> |
23 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
@@ -70,6 +69,10 @@ static struct kvm_vmx_segment_field { | |||
70 | VMX_SEGMENT_FIELD(LDTR), | 69 | VMX_SEGMENT_FIELD(LDTR), |
71 | }; | 70 | }; |
72 | 71 | ||
72 | /* | ||
73 | * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it | ||
74 | * away by decrementing the array size. | ||
75 | */ | ||
73 | static const u32 vmx_msr_index[] = { | 76 | static const u32 vmx_msr_index[] = { |
74 | #ifdef CONFIG_X86_64 | 77 | #ifdef CONFIG_X86_64 |
75 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE, | 78 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE, |
@@ -78,6 +81,19 @@ static const u32 vmx_msr_index[] = { | |||
78 | }; | 81 | }; |
79 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 82 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
80 | 83 | ||
84 | #ifdef CONFIG_X86_64 | ||
85 | static unsigned msr_offset_kernel_gs_base; | ||
86 | #define NR_64BIT_MSRS 4 | ||
87 | /* | ||
88 | * avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt | ||
89 | * mechanism (cpu bug AA24) | ||
90 | */ | ||
91 | #define NR_BAD_MSRS 2 | ||
92 | #else | ||
93 | #define NR_64BIT_MSRS 0 | ||
94 | #define NR_BAD_MSRS 0 | ||
95 | #endif | ||
96 | |||
81 | static inline int is_page_fault(u32 intr_info) | 97 | static inline int is_page_fault(u32 intr_info) |
82 | { | 98 | { |
83 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 99 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
@@ -85,6 +101,13 @@ static inline int is_page_fault(u32 intr_info) | |||
85 | (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); | 101 | (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); |
86 | } | 102 | } |
87 | 103 | ||
104 | static inline int is_no_device(u32 intr_info) | ||
105 | { | ||
106 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | ||
107 | INTR_INFO_VALID_MASK)) == | ||
108 | (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); | ||
109 | } | ||
110 | |||
88 | static inline int is_external_interrupt(u32 intr_info) | 111 | static inline int is_external_interrupt(u32 intr_info) |
89 | { | 112 | { |
90 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 113 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
@@ -200,6 +223,16 @@ static void vmcs_write64(unsigned long field, u64 value) | |||
200 | #endif | 223 | #endif |
201 | } | 224 | } |
202 | 225 | ||
226 | static void vmcs_clear_bits(unsigned long field, u32 mask) | ||
227 | { | ||
228 | vmcs_writel(field, vmcs_readl(field) & ~mask); | ||
229 | } | ||
230 | |||
231 | static void vmcs_set_bits(unsigned long field, u32 mask) | ||
232 | { | ||
233 | vmcs_writel(field, vmcs_readl(field) | mask); | ||
234 | } | ||
235 | |||
203 | /* | 236 | /* |
204 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes | 237 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes |
205 | * vcpu mutex is already taken. | 238 | * vcpu mutex is already taken. |
@@ -297,6 +330,44 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) | |||
297 | } | 330 | } |
298 | 331 | ||
299 | /* | 332 | /* |
333 | * Set up the vmcs to automatically save and restore system | ||
334 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy | ||
335 | * mode, as fiddling with msrs is very expensive. | ||
336 | */ | ||
337 | static void setup_msrs(struct kvm_vcpu *vcpu) | ||
338 | { | ||
339 | int nr_skip, nr_good_msrs; | ||
340 | |||
341 | if (is_long_mode(vcpu)) | ||
342 | nr_skip = NR_BAD_MSRS; | ||
343 | else | ||
344 | nr_skip = NR_64BIT_MSRS; | ||
345 | nr_good_msrs = vcpu->nmsrs - nr_skip; | ||
346 | |||
347 | /* | ||
348 | * MSR_K6_STAR is only needed on long mode guests, and only | ||
349 | * if efer.sce is enabled. | ||
350 | */ | ||
351 | if (find_msr_entry(vcpu, MSR_K6_STAR)) { | ||
352 | --nr_good_msrs; | ||
353 | #ifdef CONFIG_X86_64 | ||
354 | if (is_long_mode(vcpu) && (vcpu->shadow_efer & EFER_SCE)) | ||
355 | ++nr_good_msrs; | ||
356 | #endif | ||
357 | } | ||
358 | |||
359 | vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, | ||
360 | virt_to_phys(vcpu->guest_msrs + nr_skip)); | ||
361 | vmcs_writel(VM_EXIT_MSR_STORE_ADDR, | ||
362 | virt_to_phys(vcpu->guest_msrs + nr_skip)); | ||
363 | vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, | ||
364 | virt_to_phys(vcpu->host_msrs + nr_skip)); | ||
365 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */ | ||
366 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ | ||
367 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ | ||
368 | } | ||
369 | |||
370 | /* | ||
300 | * reads and returns guest's timestamp counter "register" | 371 | * reads and returns guest's timestamp counter "register" |
301 | * guest_tsc = host_tsc + tsc_offset -- 21.3 | 372 | * guest_tsc = host_tsc + tsc_offset -- 21.3 |
302 | */ | 373 | */ |
@@ -712,6 +783,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
712 | 783 | ||
713 | vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); | 784 | vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); |
714 | vmcs_write32(GUEST_CS_LIMIT, 0xffff); | 785 | vmcs_write32(GUEST_CS_LIMIT, 0xffff); |
786 | if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000) | ||
787 | vmcs_writel(GUEST_CS_BASE, 0xf0000); | ||
715 | vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); | 788 | vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); |
716 | 789 | ||
717 | fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es); | 790 | fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es); |
@@ -754,11 +827,8 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
754 | 827 | ||
755 | #endif | 828 | #endif |
756 | 829 | ||
757 | static void vmx_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu) | 830 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
758 | { | 831 | { |
759 | vcpu->cr0 &= KVM_GUEST_CR0_MASK; | ||
760 | vcpu->cr0 |= vmcs_readl(GUEST_CR0) & ~KVM_GUEST_CR0_MASK; | ||
761 | |||
762 | vcpu->cr4 &= KVM_GUEST_CR4_MASK; | 832 | vcpu->cr4 &= KVM_GUEST_CR4_MASK; |
763 | vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; | 833 | vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; |
764 | } | 834 | } |
@@ -780,22 +850,11 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
780 | } | 850 | } |
781 | #endif | 851 | #endif |
782 | 852 | ||
783 | vmcs_writel(CR0_READ_SHADOW, cr0); | 853 | if (!(cr0 & CR0_TS_MASK)) { |
784 | vmcs_writel(GUEST_CR0, | 854 | vcpu->fpu_active = 1; |
785 | (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); | 855 | vmcs_clear_bits(EXCEPTION_BITMAP, CR0_TS_MASK); |
786 | vcpu->cr0 = cr0; | 856 | } |
787 | } | ||
788 | |||
789 | /* | ||
790 | * Used when restoring the VM to avoid corrupting segment registers | ||
791 | */ | ||
792 | static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0) | ||
793 | { | ||
794 | if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK)) | ||
795 | enter_rmode(vcpu); | ||
796 | 857 | ||
797 | vcpu->rmode.active = ((cr0 & CR0_PE_MASK) == 0); | ||
798 | update_exception_bitmap(vcpu); | ||
799 | vmcs_writel(CR0_READ_SHADOW, cr0); | 858 | vmcs_writel(CR0_READ_SHADOW, cr0); |
800 | vmcs_writel(GUEST_CR0, | 859 | vmcs_writel(GUEST_CR0, |
801 | (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); | 860 | (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); |
@@ -805,6 +864,12 @@ static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
805 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 864 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
806 | { | 865 | { |
807 | vmcs_writel(GUEST_CR3, cr3); | 866 | vmcs_writel(GUEST_CR3, cr3); |
867 | |||
868 | if (!(vcpu->cr0 & CR0_TS_MASK)) { | ||
869 | vcpu->fpu_active = 0; | ||
870 | vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); | ||
871 | vmcs_set_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); | ||
872 | } | ||
808 | } | 873 | } |
809 | 874 | ||
810 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 875 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
@@ -835,6 +900,7 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
835 | 900 | ||
836 | msr->data = efer & ~EFER_LME; | 901 | msr->data = efer & ~EFER_LME; |
837 | } | 902 | } |
903 | setup_msrs(vcpu); | ||
838 | } | 904 | } |
839 | 905 | ||
840 | #endif | 906 | #endif |
@@ -878,7 +944,14 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
878 | vmcs_writel(sf->base, var->base); | 944 | vmcs_writel(sf->base, var->base); |
879 | vmcs_write32(sf->limit, var->limit); | 945 | vmcs_write32(sf->limit, var->limit); |
880 | vmcs_write16(sf->selector, var->selector); | 946 | vmcs_write16(sf->selector, var->selector); |
881 | if (var->unusable) | 947 | if (vcpu->rmode.active && var->s) { |
948 | /* | ||
949 | * Hack real-mode segments into vm86 compatibility. | ||
950 | */ | ||
951 | if (var->base == 0xffff0000 && var->selector == 0xf000) | ||
952 | vmcs_writel(sf->base, 0xf0000); | ||
953 | ar = 0xf3; | ||
954 | } else if (var->unusable) | ||
882 | ar = 1 << 16; | 955 | ar = 1 << 16; |
883 | else { | 956 | else { |
884 | ar = var->type & 15; | 957 | ar = var->type & 15; |
@@ -933,9 +1006,9 @@ static int init_rmode_tss(struct kvm* kvm) | |||
933 | gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | 1006 | gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; |
934 | char *page; | 1007 | char *page; |
935 | 1008 | ||
936 | p1 = _gfn_to_page(kvm, fn++); | 1009 | p1 = gfn_to_page(kvm, fn++); |
937 | p2 = _gfn_to_page(kvm, fn++); | 1010 | p2 = gfn_to_page(kvm, fn++); |
938 | p3 = _gfn_to_page(kvm, fn); | 1011 | p3 = gfn_to_page(kvm, fn); |
939 | 1012 | ||
940 | if (!p1 || !p2 || !p3) { | 1013 | if (!p1 || !p2 || !p3) { |
941 | kvm_printf(kvm,"%s: gfn_to_page failed\n", __FUNCTION__); | 1014 | kvm_printf(kvm,"%s: gfn_to_page failed\n", __FUNCTION__); |
@@ -991,7 +1064,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
991 | struct descriptor_table dt; | 1064 | struct descriptor_table dt; |
992 | int i; | 1065 | int i; |
993 | int ret = 0; | 1066 | int ret = 0; |
994 | int nr_good_msrs; | ||
995 | extern asmlinkage void kvm_vmx_return(void); | 1067 | extern asmlinkage void kvm_vmx_return(void); |
996 | 1068 | ||
997 | if (!init_rmode_tss(vcpu->kvm)) { | 1069 | if (!init_rmode_tss(vcpu->kvm)) { |
@@ -1136,23 +1208,17 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1136 | vcpu->host_msrs[j].reserved = 0; | 1208 | vcpu->host_msrs[j].reserved = 0; |
1137 | vcpu->host_msrs[j].data = data; | 1209 | vcpu->host_msrs[j].data = data; |
1138 | vcpu->guest_msrs[j] = vcpu->host_msrs[j]; | 1210 | vcpu->guest_msrs[j] = vcpu->host_msrs[j]; |
1211 | #ifdef CONFIG_X86_64 | ||
1212 | if (index == MSR_KERNEL_GS_BASE) | ||
1213 | msr_offset_kernel_gs_base = j; | ||
1214 | #endif | ||
1139 | ++vcpu->nmsrs; | 1215 | ++vcpu->nmsrs; |
1140 | } | 1216 | } |
1141 | printk(KERN_DEBUG "kvm: msrs: %d\n", vcpu->nmsrs); | ||
1142 | 1217 | ||
1143 | nr_good_msrs = vcpu->nmsrs - NR_BAD_MSRS; | 1218 | setup_msrs(vcpu); |
1144 | vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, | 1219 | |
1145 | virt_to_phys(vcpu->guest_msrs + NR_BAD_MSRS)); | ||
1146 | vmcs_writel(VM_EXIT_MSR_STORE_ADDR, | ||
1147 | virt_to_phys(vcpu->guest_msrs + NR_BAD_MSRS)); | ||
1148 | vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, | ||
1149 | virt_to_phys(vcpu->host_msrs + NR_BAD_MSRS)); | ||
1150 | vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS, | 1220 | vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS, |
1151 | (HOST_IS_64 << 9)); /* 22.2,1, 20.7.1 */ | 1221 | (HOST_IS_64 << 9)); /* 22.2,1, 20.7.1 */ |
1152 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */ | ||
1153 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ | ||
1154 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ | ||
1155 | |||
1156 | 1222 | ||
1157 | /* 22.2.1, 20.8.1 */ | 1223 | /* 22.2.1, 20.8.1 */ |
1158 | vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS, | 1224 | vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS, |
@@ -1164,7 +1230,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1164 | vmcs_writel(TPR_THRESHOLD, 0); | 1230 | vmcs_writel(TPR_THRESHOLD, 0); |
1165 | #endif | 1231 | #endif |
1166 | 1232 | ||
1167 | vmcs_writel(CR0_GUEST_HOST_MASK, KVM_GUEST_CR0_MASK); | 1233 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); |
1168 | vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); | 1234 | vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); |
1169 | 1235 | ||
1170 | vcpu->cr0 = 0x60000010; | 1236 | vcpu->cr0 = 0x60000010; |
@@ -1190,7 +1256,7 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq) | |||
1190 | u16 sp = vmcs_readl(GUEST_RSP); | 1256 | u16 sp = vmcs_readl(GUEST_RSP); |
1191 | u32 ss_limit = vmcs_read32(GUEST_SS_LIMIT); | 1257 | u32 ss_limit = vmcs_read32(GUEST_SS_LIMIT); |
1192 | 1258 | ||
1193 | if (sp > ss_limit || sp - 6 > sp) { | 1259 | if (sp > ss_limit || sp < 6 ) { |
1194 | vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n", | 1260 | vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n", |
1195 | __FUNCTION__, | 1261 | __FUNCTION__, |
1196 | vmcs_readl(GUEST_RSP), | 1262 | vmcs_readl(GUEST_RSP), |
@@ -1330,6 +1396,15 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1330 | asm ("int $2"); | 1396 | asm ("int $2"); |
1331 | return 1; | 1397 | return 1; |
1332 | } | 1398 | } |
1399 | |||
1400 | if (is_no_device(intr_info)) { | ||
1401 | vcpu->fpu_active = 1; | ||
1402 | vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); | ||
1403 | if (!(vcpu->cr0 & CR0_TS_MASK)) | ||
1404 | vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); | ||
1405 | return 1; | ||
1406 | } | ||
1407 | |||
1333 | error_code = 0; | 1408 | error_code = 0; |
1334 | rip = vmcs_readl(GUEST_RIP); | 1409 | rip = vmcs_readl(GUEST_RIP); |
1335 | if (intr_info & INTR_INFO_DELIEVER_CODE_MASK) | 1410 | if (intr_info & INTR_INFO_DELIEVER_CODE_MASK) |
@@ -1355,7 +1430,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1355 | case EMULATE_DONE: | 1430 | case EMULATE_DONE: |
1356 | return 1; | 1431 | return 1; |
1357 | case EMULATE_DO_MMIO: | 1432 | case EMULATE_DO_MMIO: |
1358 | ++kvm_stat.mmio_exits; | 1433 | ++vcpu->stat.mmio_exits; |
1359 | kvm_run->exit_reason = KVM_EXIT_MMIO; | 1434 | kvm_run->exit_reason = KVM_EXIT_MMIO; |
1360 | return 0; | 1435 | return 0; |
1361 | case EMULATE_FAIL: | 1436 | case EMULATE_FAIL: |
@@ -1384,7 +1459,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1384 | static int handle_external_interrupt(struct kvm_vcpu *vcpu, | 1459 | static int handle_external_interrupt(struct kvm_vcpu *vcpu, |
1385 | struct kvm_run *kvm_run) | 1460 | struct kvm_run *kvm_run) |
1386 | { | 1461 | { |
1387 | ++kvm_stat.irq_exits; | 1462 | ++vcpu->stat.irq_exits; |
1388 | return 1; | 1463 | return 1; |
1389 | } | 1464 | } |
1390 | 1465 | ||
@@ -1394,7 +1469,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1394 | return 0; | 1469 | return 0; |
1395 | } | 1470 | } |
1396 | 1471 | ||
1397 | static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) | 1472 | static int get_io_count(struct kvm_vcpu *vcpu, unsigned long *count) |
1398 | { | 1473 | { |
1399 | u64 inst; | 1474 | u64 inst; |
1400 | gva_t rip; | 1475 | gva_t rip; |
@@ -1439,33 +1514,35 @@ static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) | |||
1439 | done: | 1514 | done: |
1440 | countr_size *= 8; | 1515 | countr_size *= 8; |
1441 | *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size)); | 1516 | *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size)); |
1517 | //printk("cx: %lx\n", vcpu->regs[VCPU_REGS_RCX]); | ||
1442 | return 1; | 1518 | return 1; |
1443 | } | 1519 | } |
1444 | 1520 | ||
1445 | static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1521 | static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
1446 | { | 1522 | { |
1447 | u64 exit_qualification; | 1523 | u64 exit_qualification; |
1524 | int size, down, in, string, rep; | ||
1525 | unsigned port; | ||
1526 | unsigned long count; | ||
1527 | gva_t address; | ||
1448 | 1528 | ||
1449 | ++kvm_stat.io_exits; | 1529 | ++vcpu->stat.io_exits; |
1450 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | 1530 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); |
1451 | kvm_run->exit_reason = KVM_EXIT_IO; | 1531 | in = (exit_qualification & 8) != 0; |
1452 | if (exit_qualification & 8) | 1532 | size = (exit_qualification & 7) + 1; |
1453 | kvm_run->io.direction = KVM_EXIT_IO_IN; | 1533 | string = (exit_qualification & 16) != 0; |
1454 | else | 1534 | down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; |
1455 | kvm_run->io.direction = KVM_EXIT_IO_OUT; | 1535 | count = 1; |
1456 | kvm_run->io.size = (exit_qualification & 7) + 1; | 1536 | rep = (exit_qualification & 32) != 0; |
1457 | kvm_run->io.string = (exit_qualification & 16) != 0; | 1537 | port = exit_qualification >> 16; |
1458 | kvm_run->io.string_down | 1538 | address = 0; |
1459 | = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; | 1539 | if (string) { |
1460 | kvm_run->io.rep = (exit_qualification & 32) != 0; | 1540 | if (rep && !get_io_count(vcpu, &count)) |
1461 | kvm_run->io.port = exit_qualification >> 16; | ||
1462 | if (kvm_run->io.string) { | ||
1463 | if (!get_io_count(vcpu, &kvm_run->io.count)) | ||
1464 | return 1; | 1541 | return 1; |
1465 | kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS); | 1542 | address = vmcs_readl(GUEST_LINEAR_ADDRESS); |
1466 | } else | 1543 | } |
1467 | kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */ | 1544 | return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, |
1468 | return 0; | 1545 | address, rep, port); |
1469 | } | 1546 | } |
1470 | 1547 | ||
1471 | static void | 1548 | static void |
@@ -1514,6 +1591,15 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1514 | return 1; | 1591 | return 1; |
1515 | }; | 1592 | }; |
1516 | break; | 1593 | break; |
1594 | case 2: /* clts */ | ||
1595 | vcpu_load_rsp_rip(vcpu); | ||
1596 | vcpu->fpu_active = 1; | ||
1597 | vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); | ||
1598 | vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); | ||
1599 | vcpu->cr0 &= ~CR0_TS_MASK; | ||
1600 | vmcs_writel(CR0_READ_SHADOW, vcpu->cr0); | ||
1601 | skip_emulated_instruction(vcpu); | ||
1602 | return 1; | ||
1517 | case 1: /*mov from cr*/ | 1603 | case 1: /*mov from cr*/ |
1518 | switch (cr) { | 1604 | switch (cr) { |
1519 | case 3: | 1605 | case 3: |
@@ -1523,8 +1609,6 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1523 | skip_emulated_instruction(vcpu); | 1609 | skip_emulated_instruction(vcpu); |
1524 | return 1; | 1610 | return 1; |
1525 | case 8: | 1611 | case 8: |
1526 | printk(KERN_DEBUG "handle_cr: read CR8 " | ||
1527 | "cpu erratum AA15\n"); | ||
1528 | vcpu_load_rsp_rip(vcpu); | 1612 | vcpu_load_rsp_rip(vcpu); |
1529 | vcpu->regs[reg] = vcpu->cr8; | 1613 | vcpu->regs[reg] = vcpu->cr8; |
1530 | vcpu_put_rsp_rip(vcpu); | 1614 | vcpu_put_rsp_rip(vcpu); |
@@ -1583,8 +1667,8 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1583 | 1667 | ||
1584 | static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1668 | static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
1585 | { | 1669 | { |
1586 | kvm_run->exit_reason = KVM_EXIT_CPUID; | 1670 | kvm_emulate_cpuid(vcpu); |
1587 | return 0; | 1671 | return 1; |
1588 | } | 1672 | } |
1589 | 1673 | ||
1590 | static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1674 | static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
@@ -1639,7 +1723,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
1639 | if (kvm_run->request_interrupt_window && | 1723 | if (kvm_run->request_interrupt_window && |
1640 | !vcpu->irq_summary) { | 1724 | !vcpu->irq_summary) { |
1641 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 1725 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
1642 | ++kvm_stat.irq_window_exits; | 1726 | ++vcpu->stat.irq_window_exits; |
1643 | return 0; | 1727 | return 0; |
1644 | } | 1728 | } |
1645 | return 1; | 1729 | return 1; |
@@ -1652,13 +1736,13 @@ static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1652 | return 1; | 1736 | return 1; |
1653 | 1737 | ||
1654 | kvm_run->exit_reason = KVM_EXIT_HLT; | 1738 | kvm_run->exit_reason = KVM_EXIT_HLT; |
1655 | ++kvm_stat.halt_exits; | 1739 | ++vcpu->stat.halt_exits; |
1656 | return 0; | 1740 | return 0; |
1657 | } | 1741 | } |
1658 | 1742 | ||
1659 | static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1743 | static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
1660 | { | 1744 | { |
1661 | vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP)+3); | 1745 | skip_emulated_instruction(vcpu); |
1662 | return kvm_hypercall(vcpu, kvm_run); | 1746 | return kvm_hypercall(vcpu, kvm_run); |
1663 | } | 1747 | } |
1664 | 1748 | ||
@@ -1699,7 +1783,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1699 | exit_reason != EXIT_REASON_EXCEPTION_NMI ) | 1783 | exit_reason != EXIT_REASON_EXCEPTION_NMI ) |
1700 | printk(KERN_WARNING "%s: unexpected, valid vectoring info and " | 1784 | printk(KERN_WARNING "%s: unexpected, valid vectoring info and " |
1701 | "exit reason is 0x%x\n", __FUNCTION__, exit_reason); | 1785 | "exit reason is 0x%x\n", __FUNCTION__, exit_reason); |
1702 | kvm_run->instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | ||
1703 | if (exit_reason < kvm_vmx_max_exit_handlers | 1786 | if (exit_reason < kvm_vmx_max_exit_handlers |
1704 | && kvm_vmx_exit_handlers[exit_reason]) | 1787 | && kvm_vmx_exit_handlers[exit_reason]) |
1705 | return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); | 1788 | return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); |
@@ -1763,11 +1846,21 @@ again: | |||
1763 | if (vcpu->guest_debug.enabled) | 1846 | if (vcpu->guest_debug.enabled) |
1764 | kvm_guest_debug_pre(vcpu); | 1847 | kvm_guest_debug_pre(vcpu); |
1765 | 1848 | ||
1766 | fx_save(vcpu->host_fx_image); | 1849 | if (vcpu->fpu_active) { |
1767 | fx_restore(vcpu->guest_fx_image); | 1850 | fx_save(vcpu->host_fx_image); |
1851 | fx_restore(vcpu->guest_fx_image); | ||
1852 | } | ||
1853 | /* | ||
1854 | * Loading guest fpu may have cleared host cr0.ts | ||
1855 | */ | ||
1856 | vmcs_writel(HOST_CR0, read_cr0()); | ||
1768 | 1857 | ||
1769 | save_msrs(vcpu->host_msrs, vcpu->nmsrs); | 1858 | #ifdef CONFIG_X86_64 |
1770 | load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); | 1859 | if (is_long_mode(vcpu)) { |
1860 | save_msrs(vcpu->host_msrs + msr_offset_kernel_gs_base, 1); | ||
1861 | load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); | ||
1862 | } | ||
1863 | #endif | ||
1771 | 1864 | ||
1772 | asm ( | 1865 | asm ( |
1773 | /* Store host registers */ | 1866 | /* Store host registers */ |
@@ -1909,21 +2002,28 @@ again: | |||
1909 | 2002 | ||
1910 | reload_tss(); | 2003 | reload_tss(); |
1911 | } | 2004 | } |
1912 | ++kvm_stat.exits; | 2005 | ++vcpu->stat.exits; |
1913 | 2006 | ||
1914 | save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); | 2007 | #ifdef CONFIG_X86_64 |
1915 | load_msrs(vcpu->host_msrs, NR_BAD_MSRS); | 2008 | if (is_long_mode(vcpu)) { |
2009 | save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); | ||
2010 | load_msrs(vcpu->host_msrs, NR_BAD_MSRS); | ||
2011 | } | ||
2012 | #endif | ||
2013 | |||
2014 | if (vcpu->fpu_active) { | ||
2015 | fx_save(vcpu->guest_fx_image); | ||
2016 | fx_restore(vcpu->host_fx_image); | ||
2017 | } | ||
1916 | 2018 | ||
1917 | fx_save(vcpu->guest_fx_image); | ||
1918 | fx_restore(vcpu->host_fx_image); | ||
1919 | vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; | 2019 | vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; |
1920 | 2020 | ||
1921 | asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); | 2021 | asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); |
1922 | 2022 | ||
1923 | kvm_run->exit_type = 0; | ||
1924 | if (fail) { | 2023 | if (fail) { |
1925 | kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; | 2024 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
1926 | kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR); | 2025 | kvm_run->fail_entry.hardware_entry_failure_reason |
2026 | = vmcs_read32(VM_INSTRUCTION_ERROR); | ||
1927 | r = 0; | 2027 | r = 0; |
1928 | } else { | 2028 | } else { |
1929 | /* | 2029 | /* |
@@ -1933,19 +2033,20 @@ again: | |||
1933 | profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); | 2033 | profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); |
1934 | 2034 | ||
1935 | vcpu->launched = 1; | 2035 | vcpu->launched = 1; |
1936 | kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; | ||
1937 | r = kvm_handle_exit(kvm_run, vcpu); | 2036 | r = kvm_handle_exit(kvm_run, vcpu); |
1938 | if (r > 0) { | 2037 | if (r > 0) { |
1939 | /* Give scheduler a change to reschedule. */ | 2038 | /* Give scheduler a change to reschedule. */ |
1940 | if (signal_pending(current)) { | 2039 | if (signal_pending(current)) { |
1941 | ++kvm_stat.signal_exits; | 2040 | ++vcpu->stat.signal_exits; |
1942 | post_kvm_run_save(vcpu, kvm_run); | 2041 | post_kvm_run_save(vcpu, kvm_run); |
2042 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
1943 | return -EINTR; | 2043 | return -EINTR; |
1944 | } | 2044 | } |
1945 | 2045 | ||
1946 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | 2046 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { |
1947 | ++kvm_stat.request_irq_exits; | 2047 | ++vcpu->stat.request_irq_exits; |
1948 | post_kvm_run_save(vcpu, kvm_run); | 2048 | post_kvm_run_save(vcpu, kvm_run); |
2049 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
1949 | return -EINTR; | 2050 | return -EINTR; |
1950 | } | 2051 | } |
1951 | 2052 | ||
@@ -1969,7 +2070,7 @@ static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, | |||
1969 | { | 2070 | { |
1970 | u32 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 2071 | u32 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
1971 | 2072 | ||
1972 | ++kvm_stat.pf_guest; | 2073 | ++vcpu->stat.pf_guest; |
1973 | 2074 | ||
1974 | if (is_page_fault(vect_info)) { | 2075 | if (is_page_fault(vect_info)) { |
1975 | printk(KERN_DEBUG "inject_page_fault: " | 2076 | printk(KERN_DEBUG "inject_page_fault: " |
@@ -2026,6 +2127,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) | |||
2026 | vmcs_clear(vmcs); | 2127 | vmcs_clear(vmcs); |
2027 | vcpu->vmcs = vmcs; | 2128 | vcpu->vmcs = vmcs; |
2028 | vcpu->launched = 0; | 2129 | vcpu->launched = 0; |
2130 | vcpu->fpu_active = 1; | ||
2029 | 2131 | ||
2030 | return 0; | 2132 | return 0; |
2031 | 2133 | ||
@@ -2062,9 +2164,8 @@ static struct kvm_arch_ops vmx_arch_ops = { | |||
2062 | .get_segment = vmx_get_segment, | 2164 | .get_segment = vmx_get_segment, |
2063 | .set_segment = vmx_set_segment, | 2165 | .set_segment = vmx_set_segment, |
2064 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, | 2166 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, |
2065 | .decache_cr0_cr4_guest_bits = vmx_decache_cr0_cr4_guest_bits, | 2167 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, |
2066 | .set_cr0 = vmx_set_cr0, | 2168 | .set_cr0 = vmx_set_cr0, |
2067 | .set_cr0_no_modeswitch = vmx_set_cr0_no_modeswitch, | ||
2068 | .set_cr3 = vmx_set_cr3, | 2169 | .set_cr3 = vmx_set_cr3, |
2069 | .set_cr4 = vmx_set_cr4, | 2170 | .set_cr4 = vmx_set_cr4, |
2070 | #ifdef CONFIG_X86_64 | 2171 | #ifdef CONFIG_X86_64 |