aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/kvm/vmx.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-06 16:21:18 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-06 16:21:18 -0400
commit6de410c2b0cc055ae9ee640c84331f6a70878d9b (patch)
tree49dfc7df2f1977c2d665c99266ded92afc98734b /drivers/kvm/vmx.c
parentc6799ade4ae04b53a5f677e5289116155ff01574 (diff)
parent2ff81f70b56dc1cdd3bf2f08414608069db6ef1a (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (66 commits) KVM: Remove unused 'instruction_length' KVM: Don't require explicit indication of completion of mmio or pio KVM: Remove extraneous guest entry on mmio read KVM: SVM: Only save/restore MSRs when needed KVM: fix an if() condition KVM: VMX: Add lazy FPU support for VT KVM: VMX: Properly shadow the CR0 register in the vcpu struct KVM: Don't complain about cpu erratum AA15 KVM: Lazy FPU support for SVM KVM: Allow passing 64-bit values to the emulated read/write API KVM: Per-vcpu statistics KVM: VMX: Avoid unnecessary vcpu_load()/vcpu_put() cycles KVM: MMU: Avoid heavy ASSERT at non debug mode. KVM: VMX: Only save/restore MSR_K6_STAR if necessary KVM: Fold drivers/kvm/kvm_vmx.h into drivers/kvm/vmx.c KVM: VMX: Don't switch 64-bit msrs for 32-bit guests KVM: VMX: Reduce unnecessary saving of host msrs KVM: Handle guest page faults when emulating mmio KVM: SVM: Report hardware exit reason to userspace instead of dmesg KVM: Retry sleeping allocation if atomic allocation fails ...
Diffstat (limited to 'drivers/kvm/vmx.c')
-rw-r--r--drivers/kvm/vmx.c273
1 files changed, 187 insertions, 86 deletions
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index fbbf9d6b299f..724db0027f00 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -17,7 +17,6 @@
17 17
18#include "kvm.h" 18#include "kvm.h"
19#include "vmx.h" 19#include "vmx.h"
20#include "kvm_vmx.h"
21#include <linux/module.h> 20#include <linux/module.h>
22#include <linux/kernel.h> 21#include <linux/kernel.h>
23#include <linux/mm.h> 22#include <linux/mm.h>
@@ -70,6 +69,10 @@ static struct kvm_vmx_segment_field {
70 VMX_SEGMENT_FIELD(LDTR), 69 VMX_SEGMENT_FIELD(LDTR),
71}; 70};
72 71
72/*
73 * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it
74 * away by decrementing the array size.
75 */
73static const u32 vmx_msr_index[] = { 76static const u32 vmx_msr_index[] = {
74#ifdef CONFIG_X86_64 77#ifdef CONFIG_X86_64
75 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE, 78 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE,
@@ -78,6 +81,19 @@ static const u32 vmx_msr_index[] = {
78}; 81};
79#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) 82#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
80 83
84#ifdef CONFIG_X86_64
85static unsigned msr_offset_kernel_gs_base;
86#define NR_64BIT_MSRS 4
87/*
88 * avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt
89 * mechanism (cpu bug AA24)
90 */
91#define NR_BAD_MSRS 2
92#else
93#define NR_64BIT_MSRS 0
94#define NR_BAD_MSRS 0
95#endif
96
81static inline int is_page_fault(u32 intr_info) 97static inline int is_page_fault(u32 intr_info)
82{ 98{
83 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 99 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -85,6 +101,13 @@ static inline int is_page_fault(u32 intr_info)
85 (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); 101 (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
86} 102}
87 103
104static inline int is_no_device(u32 intr_info)
105{
106 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
107 INTR_INFO_VALID_MASK)) ==
108 (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
109}
110
88static inline int is_external_interrupt(u32 intr_info) 111static inline int is_external_interrupt(u32 intr_info)
89{ 112{
90 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) 113 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -200,6 +223,16 @@ static void vmcs_write64(unsigned long field, u64 value)
200#endif 223#endif
201} 224}
202 225
226static void vmcs_clear_bits(unsigned long field, u32 mask)
227{
228 vmcs_writel(field, vmcs_readl(field) & ~mask);
229}
230
231static void vmcs_set_bits(unsigned long field, u32 mask)
232{
233 vmcs_writel(field, vmcs_readl(field) | mask);
234}
235
203/* 236/*
204 * Switches to specified vcpu, until a matching vcpu_put(), but assumes 237 * Switches to specified vcpu, until a matching vcpu_put(), but assumes
205 * vcpu mutex is already taken. 238 * vcpu mutex is already taken.
@@ -297,6 +330,44 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
297} 330}
298 331
299/* 332/*
333 * Set up the vmcs to automatically save and restore system
334 * msrs. Don't touch the 64-bit msrs if the guest is in legacy
335 * mode, as fiddling with msrs is very expensive.
336 */
337static void setup_msrs(struct kvm_vcpu *vcpu)
338{
339 int nr_skip, nr_good_msrs;
340
341 if (is_long_mode(vcpu))
342 nr_skip = NR_BAD_MSRS;
343 else
344 nr_skip = NR_64BIT_MSRS;
345 nr_good_msrs = vcpu->nmsrs - nr_skip;
346
347 /*
348 * MSR_K6_STAR is only needed on long mode guests, and only
349 * if efer.sce is enabled.
350 */
351 if (find_msr_entry(vcpu, MSR_K6_STAR)) {
352 --nr_good_msrs;
353#ifdef CONFIG_X86_64
354 if (is_long_mode(vcpu) && (vcpu->shadow_efer & EFER_SCE))
355 ++nr_good_msrs;
356#endif
357 }
358
359 vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR,
360 virt_to_phys(vcpu->guest_msrs + nr_skip));
361 vmcs_writel(VM_EXIT_MSR_STORE_ADDR,
362 virt_to_phys(vcpu->guest_msrs + nr_skip));
363 vmcs_writel(VM_EXIT_MSR_LOAD_ADDR,
364 virt_to_phys(vcpu->host_msrs + nr_skip));
365 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */
366 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
367 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
368}
369
370/*
300 * reads and returns guest's timestamp counter "register" 371 * reads and returns guest's timestamp counter "register"
301 * guest_tsc = host_tsc + tsc_offset -- 21.3 372 * guest_tsc = host_tsc + tsc_offset -- 21.3
302 */ 373 */
@@ -712,6 +783,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
712 783
713 vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); 784 vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
714 vmcs_write32(GUEST_CS_LIMIT, 0xffff); 785 vmcs_write32(GUEST_CS_LIMIT, 0xffff);
786 if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000)
787 vmcs_writel(GUEST_CS_BASE, 0xf0000);
715 vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); 788 vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
716 789
717 fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es); 790 fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es);
@@ -754,11 +827,8 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
754 827
755#endif 828#endif
756 829
757static void vmx_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu) 830static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
758{ 831{
759 vcpu->cr0 &= KVM_GUEST_CR0_MASK;
760 vcpu->cr0 |= vmcs_readl(GUEST_CR0) & ~KVM_GUEST_CR0_MASK;
761
762 vcpu->cr4 &= KVM_GUEST_CR4_MASK; 832 vcpu->cr4 &= KVM_GUEST_CR4_MASK;
763 vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; 833 vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
764} 834}
@@ -780,22 +850,11 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
780 } 850 }
781#endif 851#endif
782 852
783 vmcs_writel(CR0_READ_SHADOW, cr0); 853 if (!(cr0 & CR0_TS_MASK)) {
784 vmcs_writel(GUEST_CR0, 854 vcpu->fpu_active = 1;
785 (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); 855 vmcs_clear_bits(EXCEPTION_BITMAP, CR0_TS_MASK);
786 vcpu->cr0 = cr0; 856 }
787}
788
789/*
790 * Used when restoring the VM to avoid corrupting segment registers
791 */
792static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0)
793{
794 if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK))
795 enter_rmode(vcpu);
796 857
797 vcpu->rmode.active = ((cr0 & CR0_PE_MASK) == 0);
798 update_exception_bitmap(vcpu);
799 vmcs_writel(CR0_READ_SHADOW, cr0); 858 vmcs_writel(CR0_READ_SHADOW, cr0);
800 vmcs_writel(GUEST_CR0, 859 vmcs_writel(GUEST_CR0,
801 (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); 860 (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
@@ -805,6 +864,12 @@ static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0)
805static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 864static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
806{ 865{
807 vmcs_writel(GUEST_CR3, cr3); 866 vmcs_writel(GUEST_CR3, cr3);
867
868 if (!(vcpu->cr0 & CR0_TS_MASK)) {
869 vcpu->fpu_active = 0;
870 vmcs_set_bits(GUEST_CR0, CR0_TS_MASK);
871 vmcs_set_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR);
872 }
808} 873}
809 874
810static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 875static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -835,6 +900,7 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
835 900
836 msr->data = efer & ~EFER_LME; 901 msr->data = efer & ~EFER_LME;
837 } 902 }
903 setup_msrs(vcpu);
838} 904}
839 905
840#endif 906#endif
@@ -878,7 +944,14 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
878 vmcs_writel(sf->base, var->base); 944 vmcs_writel(sf->base, var->base);
879 vmcs_write32(sf->limit, var->limit); 945 vmcs_write32(sf->limit, var->limit);
880 vmcs_write16(sf->selector, var->selector); 946 vmcs_write16(sf->selector, var->selector);
881 if (var->unusable) 947 if (vcpu->rmode.active && var->s) {
948 /*
949 * Hack real-mode segments into vm86 compatibility.
950 */
951 if (var->base == 0xffff0000 && var->selector == 0xf000)
952 vmcs_writel(sf->base, 0xf0000);
953 ar = 0xf3;
954 } else if (var->unusable)
882 ar = 1 << 16; 955 ar = 1 << 16;
883 else { 956 else {
884 ar = var->type & 15; 957 ar = var->type & 15;
@@ -933,9 +1006,9 @@ static int init_rmode_tss(struct kvm* kvm)
933 gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; 1006 gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
934 char *page; 1007 char *page;
935 1008
936 p1 = _gfn_to_page(kvm, fn++); 1009 p1 = gfn_to_page(kvm, fn++);
937 p2 = _gfn_to_page(kvm, fn++); 1010 p2 = gfn_to_page(kvm, fn++);
938 p3 = _gfn_to_page(kvm, fn); 1011 p3 = gfn_to_page(kvm, fn);
939 1012
940 if (!p1 || !p2 || !p3) { 1013 if (!p1 || !p2 || !p3) {
941 kvm_printf(kvm,"%s: gfn_to_page failed\n", __FUNCTION__); 1014 kvm_printf(kvm,"%s: gfn_to_page failed\n", __FUNCTION__);
@@ -991,7 +1064,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
991 struct descriptor_table dt; 1064 struct descriptor_table dt;
992 int i; 1065 int i;
993 int ret = 0; 1066 int ret = 0;
994 int nr_good_msrs;
995 extern asmlinkage void kvm_vmx_return(void); 1067 extern asmlinkage void kvm_vmx_return(void);
996 1068
997 if (!init_rmode_tss(vcpu->kvm)) { 1069 if (!init_rmode_tss(vcpu->kvm)) {
@@ -1136,23 +1208,17 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1136 vcpu->host_msrs[j].reserved = 0; 1208 vcpu->host_msrs[j].reserved = 0;
1137 vcpu->host_msrs[j].data = data; 1209 vcpu->host_msrs[j].data = data;
1138 vcpu->guest_msrs[j] = vcpu->host_msrs[j]; 1210 vcpu->guest_msrs[j] = vcpu->host_msrs[j];
1211#ifdef CONFIG_X86_64
1212 if (index == MSR_KERNEL_GS_BASE)
1213 msr_offset_kernel_gs_base = j;
1214#endif
1139 ++vcpu->nmsrs; 1215 ++vcpu->nmsrs;
1140 } 1216 }
1141 printk(KERN_DEBUG "kvm: msrs: %d\n", vcpu->nmsrs);
1142 1217
1143 nr_good_msrs = vcpu->nmsrs - NR_BAD_MSRS; 1218 setup_msrs(vcpu);
1144 vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, 1219
1145 virt_to_phys(vcpu->guest_msrs + NR_BAD_MSRS));
1146 vmcs_writel(VM_EXIT_MSR_STORE_ADDR,
1147 virt_to_phys(vcpu->guest_msrs + NR_BAD_MSRS));
1148 vmcs_writel(VM_EXIT_MSR_LOAD_ADDR,
1149 virt_to_phys(vcpu->host_msrs + NR_BAD_MSRS));
1150 vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS, 1220 vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS,
1151 (HOST_IS_64 << 9)); /* 22.2,1, 20.7.1 */ 1221 (HOST_IS_64 << 9)); /* 22.2,1, 20.7.1 */
1152 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */
1153 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
1154 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
1155
1156 1222
1157 /* 22.2.1, 20.8.1 */ 1223 /* 22.2.1, 20.8.1 */
1158 vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS, 1224 vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS,
@@ -1164,7 +1230,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1164 vmcs_writel(TPR_THRESHOLD, 0); 1230 vmcs_writel(TPR_THRESHOLD, 0);
1165#endif 1231#endif
1166 1232
1167 vmcs_writel(CR0_GUEST_HOST_MASK, KVM_GUEST_CR0_MASK); 1233 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
1168 vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); 1234 vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
1169 1235
1170 vcpu->cr0 = 0x60000010; 1236 vcpu->cr0 = 0x60000010;
@@ -1190,7 +1256,7 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
1190 u16 sp = vmcs_readl(GUEST_RSP); 1256 u16 sp = vmcs_readl(GUEST_RSP);
1191 u32 ss_limit = vmcs_read32(GUEST_SS_LIMIT); 1257 u32 ss_limit = vmcs_read32(GUEST_SS_LIMIT);
1192 1258
1193 if (sp > ss_limit || sp - 6 > sp) { 1259 if (sp > ss_limit || sp < 6 ) {
1194 vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n", 1260 vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n",
1195 __FUNCTION__, 1261 __FUNCTION__,
1196 vmcs_readl(GUEST_RSP), 1262 vmcs_readl(GUEST_RSP),
@@ -1330,6 +1396,15 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1330 asm ("int $2"); 1396 asm ("int $2");
1331 return 1; 1397 return 1;
1332 } 1398 }
1399
1400 if (is_no_device(intr_info)) {
1401 vcpu->fpu_active = 1;
1402 vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR);
1403 if (!(vcpu->cr0 & CR0_TS_MASK))
1404 vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK);
1405 return 1;
1406 }
1407
1333 error_code = 0; 1408 error_code = 0;
1334 rip = vmcs_readl(GUEST_RIP); 1409 rip = vmcs_readl(GUEST_RIP);
1335 if (intr_info & INTR_INFO_DELIEVER_CODE_MASK) 1410 if (intr_info & INTR_INFO_DELIEVER_CODE_MASK)
@@ -1355,7 +1430,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1355 case EMULATE_DONE: 1430 case EMULATE_DONE:
1356 return 1; 1431 return 1;
1357 case EMULATE_DO_MMIO: 1432 case EMULATE_DO_MMIO:
1358 ++kvm_stat.mmio_exits; 1433 ++vcpu->stat.mmio_exits;
1359 kvm_run->exit_reason = KVM_EXIT_MMIO; 1434 kvm_run->exit_reason = KVM_EXIT_MMIO;
1360 return 0; 1435 return 0;
1361 case EMULATE_FAIL: 1436 case EMULATE_FAIL:
@@ -1384,7 +1459,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1384static int handle_external_interrupt(struct kvm_vcpu *vcpu, 1459static int handle_external_interrupt(struct kvm_vcpu *vcpu,
1385 struct kvm_run *kvm_run) 1460 struct kvm_run *kvm_run)
1386{ 1461{
1387 ++kvm_stat.irq_exits; 1462 ++vcpu->stat.irq_exits;
1388 return 1; 1463 return 1;
1389} 1464}
1390 1465
@@ -1394,7 +1469,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1394 return 0; 1469 return 0;
1395} 1470}
1396 1471
1397static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) 1472static int get_io_count(struct kvm_vcpu *vcpu, unsigned long *count)
1398{ 1473{
1399 u64 inst; 1474 u64 inst;
1400 gva_t rip; 1475 gva_t rip;
@@ -1439,33 +1514,35 @@ static int get_io_count(struct kvm_vcpu *vcpu, u64 *count)
1439done: 1514done:
1440 countr_size *= 8; 1515 countr_size *= 8;
1441 *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size)); 1516 *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size));
1517 //printk("cx: %lx\n", vcpu->regs[VCPU_REGS_RCX]);
1442 return 1; 1518 return 1;
1443} 1519}
1444 1520
1445static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1521static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1446{ 1522{
1447 u64 exit_qualification; 1523 u64 exit_qualification;
1524 int size, down, in, string, rep;
1525 unsigned port;
1526 unsigned long count;
1527 gva_t address;
1448 1528
1449 ++kvm_stat.io_exits; 1529 ++vcpu->stat.io_exits;
1450 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 1530 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
1451 kvm_run->exit_reason = KVM_EXIT_IO; 1531 in = (exit_qualification & 8) != 0;
1452 if (exit_qualification & 8) 1532 size = (exit_qualification & 7) + 1;
1453 kvm_run->io.direction = KVM_EXIT_IO_IN; 1533 string = (exit_qualification & 16) != 0;
1454 else 1534 down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0;
1455 kvm_run->io.direction = KVM_EXIT_IO_OUT; 1535 count = 1;
1456 kvm_run->io.size = (exit_qualification & 7) + 1; 1536 rep = (exit_qualification & 32) != 0;
1457 kvm_run->io.string = (exit_qualification & 16) != 0; 1537 port = exit_qualification >> 16;
1458 kvm_run->io.string_down 1538 address = 0;
1459 = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; 1539 if (string) {
1460 kvm_run->io.rep = (exit_qualification & 32) != 0; 1540 if (rep && !get_io_count(vcpu, &count))
1461 kvm_run->io.port = exit_qualification >> 16;
1462 if (kvm_run->io.string) {
1463 if (!get_io_count(vcpu, &kvm_run->io.count))
1464 return 1; 1541 return 1;
1465 kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS); 1542 address = vmcs_readl(GUEST_LINEAR_ADDRESS);
1466 } else 1543 }
1467 kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */ 1544 return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down,
1468 return 0; 1545 address, rep, port);
1469} 1546}
1470 1547
1471static void 1548static void
@@ -1514,6 +1591,15 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1514 return 1; 1591 return 1;
1515 }; 1592 };
1516 break; 1593 break;
1594 case 2: /* clts */
1595 vcpu_load_rsp_rip(vcpu);
1596 vcpu->fpu_active = 1;
1597 vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR);
1598 vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK);
1599 vcpu->cr0 &= ~CR0_TS_MASK;
1600 vmcs_writel(CR0_READ_SHADOW, vcpu->cr0);
1601 skip_emulated_instruction(vcpu);
1602 return 1;
1517 case 1: /*mov from cr*/ 1603 case 1: /*mov from cr*/
1518 switch (cr) { 1604 switch (cr) {
1519 case 3: 1605 case 3:
@@ -1523,8 +1609,6 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1523 skip_emulated_instruction(vcpu); 1609 skip_emulated_instruction(vcpu);
1524 return 1; 1610 return 1;
1525 case 8: 1611 case 8:
1526 printk(KERN_DEBUG "handle_cr: read CR8 "
1527 "cpu erratum AA15\n");
1528 vcpu_load_rsp_rip(vcpu); 1612 vcpu_load_rsp_rip(vcpu);
1529 vcpu->regs[reg] = vcpu->cr8; 1613 vcpu->regs[reg] = vcpu->cr8;
1530 vcpu_put_rsp_rip(vcpu); 1614 vcpu_put_rsp_rip(vcpu);
@@ -1583,8 +1667,8 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1583 1667
1584static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1668static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1585{ 1669{
1586 kvm_run->exit_reason = KVM_EXIT_CPUID; 1670 kvm_emulate_cpuid(vcpu);
1587 return 0; 1671 return 1;
1588} 1672}
1589 1673
1590static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1674static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -1639,7 +1723,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
1639 if (kvm_run->request_interrupt_window && 1723 if (kvm_run->request_interrupt_window &&
1640 !vcpu->irq_summary) { 1724 !vcpu->irq_summary) {
1641 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 1725 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
1642 ++kvm_stat.irq_window_exits; 1726 ++vcpu->stat.irq_window_exits;
1643 return 0; 1727 return 0;
1644 } 1728 }
1645 return 1; 1729 return 1;
@@ -1652,13 +1736,13 @@ static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1652 return 1; 1736 return 1;
1653 1737
1654 kvm_run->exit_reason = KVM_EXIT_HLT; 1738 kvm_run->exit_reason = KVM_EXIT_HLT;
1655 ++kvm_stat.halt_exits; 1739 ++vcpu->stat.halt_exits;
1656 return 0; 1740 return 0;
1657} 1741}
1658 1742
1659static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1743static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1660{ 1744{
1661 vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP)+3); 1745 skip_emulated_instruction(vcpu);
1662 return kvm_hypercall(vcpu, kvm_run); 1746 return kvm_hypercall(vcpu, kvm_run);
1663} 1747}
1664 1748
@@ -1699,7 +1783,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1699 exit_reason != EXIT_REASON_EXCEPTION_NMI ) 1783 exit_reason != EXIT_REASON_EXCEPTION_NMI )
1700 printk(KERN_WARNING "%s: unexpected, valid vectoring info and " 1784 printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
1701 "exit reason is 0x%x\n", __FUNCTION__, exit_reason); 1785 "exit reason is 0x%x\n", __FUNCTION__, exit_reason);
1702 kvm_run->instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
1703 if (exit_reason < kvm_vmx_max_exit_handlers 1786 if (exit_reason < kvm_vmx_max_exit_handlers
1704 && kvm_vmx_exit_handlers[exit_reason]) 1787 && kvm_vmx_exit_handlers[exit_reason])
1705 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); 1788 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -1763,11 +1846,21 @@ again:
1763 if (vcpu->guest_debug.enabled) 1846 if (vcpu->guest_debug.enabled)
1764 kvm_guest_debug_pre(vcpu); 1847 kvm_guest_debug_pre(vcpu);
1765 1848
1766 fx_save(vcpu->host_fx_image); 1849 if (vcpu->fpu_active) {
1767 fx_restore(vcpu->guest_fx_image); 1850 fx_save(vcpu->host_fx_image);
1851 fx_restore(vcpu->guest_fx_image);
1852 }
1853 /*
1854 * Loading guest fpu may have cleared host cr0.ts
1855 */
1856 vmcs_writel(HOST_CR0, read_cr0());
1768 1857
1769 save_msrs(vcpu->host_msrs, vcpu->nmsrs); 1858#ifdef CONFIG_X86_64
1770 load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); 1859 if (is_long_mode(vcpu)) {
1860 save_msrs(vcpu->host_msrs + msr_offset_kernel_gs_base, 1);
1861 load_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
1862 }
1863#endif
1771 1864
1772 asm ( 1865 asm (
1773 /* Store host registers */ 1866 /* Store host registers */
@@ -1909,21 +2002,28 @@ again:
1909 2002
1910 reload_tss(); 2003 reload_tss();
1911 } 2004 }
1912 ++kvm_stat.exits; 2005 ++vcpu->stat.exits;
1913 2006
1914 save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); 2007#ifdef CONFIG_X86_64
1915 load_msrs(vcpu->host_msrs, NR_BAD_MSRS); 2008 if (is_long_mode(vcpu)) {
2009 save_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
2010 load_msrs(vcpu->host_msrs, NR_BAD_MSRS);
2011 }
2012#endif
2013
2014 if (vcpu->fpu_active) {
2015 fx_save(vcpu->guest_fx_image);
2016 fx_restore(vcpu->host_fx_image);
2017 }
1916 2018
1917 fx_save(vcpu->guest_fx_image);
1918 fx_restore(vcpu->host_fx_image);
1919 vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; 2019 vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
1920 2020
1921 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 2021 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
1922 2022
1923 kvm_run->exit_type = 0;
1924 if (fail) { 2023 if (fail) {
1925 kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; 2024 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
1926 kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR); 2025 kvm_run->fail_entry.hardware_entry_failure_reason
2026 = vmcs_read32(VM_INSTRUCTION_ERROR);
1927 r = 0; 2027 r = 0;
1928 } else { 2028 } else {
1929 /* 2029 /*
@@ -1933,19 +2033,20 @@ again:
1933 profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); 2033 profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP));
1934 2034
1935 vcpu->launched = 1; 2035 vcpu->launched = 1;
1936 kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT;
1937 r = kvm_handle_exit(kvm_run, vcpu); 2036 r = kvm_handle_exit(kvm_run, vcpu);
1938 if (r > 0) { 2037 if (r > 0) {
1939 /* Give scheduler a change to reschedule. */ 2038 /* Give scheduler a change to reschedule. */
1940 if (signal_pending(current)) { 2039 if (signal_pending(current)) {
1941 ++kvm_stat.signal_exits; 2040 ++vcpu->stat.signal_exits;
1942 post_kvm_run_save(vcpu, kvm_run); 2041 post_kvm_run_save(vcpu, kvm_run);
2042 kvm_run->exit_reason = KVM_EXIT_INTR;
1943 return -EINTR; 2043 return -EINTR;
1944 } 2044 }
1945 2045
1946 if (dm_request_for_irq_injection(vcpu, kvm_run)) { 2046 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
1947 ++kvm_stat.request_irq_exits; 2047 ++vcpu->stat.request_irq_exits;
1948 post_kvm_run_save(vcpu, kvm_run); 2048 post_kvm_run_save(vcpu, kvm_run);
2049 kvm_run->exit_reason = KVM_EXIT_INTR;
1949 return -EINTR; 2050 return -EINTR;
1950 } 2051 }
1951 2052
@@ -1969,7 +2070,7 @@ static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,
1969{ 2070{
1970 u32 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 2071 u32 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
1971 2072
1972 ++kvm_stat.pf_guest; 2073 ++vcpu->stat.pf_guest;
1973 2074
1974 if (is_page_fault(vect_info)) { 2075 if (is_page_fault(vect_info)) {
1975 printk(KERN_DEBUG "inject_page_fault: " 2076 printk(KERN_DEBUG "inject_page_fault: "
@@ -2026,6 +2127,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
2026 vmcs_clear(vmcs); 2127 vmcs_clear(vmcs);
2027 vcpu->vmcs = vmcs; 2128 vcpu->vmcs = vmcs;
2028 vcpu->launched = 0; 2129 vcpu->launched = 0;
2130 vcpu->fpu_active = 1;
2029 2131
2030 return 0; 2132 return 0;
2031 2133
@@ -2062,9 +2164,8 @@ static struct kvm_arch_ops vmx_arch_ops = {
2062 .get_segment = vmx_get_segment, 2164 .get_segment = vmx_get_segment,
2063 .set_segment = vmx_set_segment, 2165 .set_segment = vmx_set_segment,
2064 .get_cs_db_l_bits = vmx_get_cs_db_l_bits, 2166 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
2065 .decache_cr0_cr4_guest_bits = vmx_decache_cr0_cr4_guest_bits, 2167 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
2066 .set_cr0 = vmx_set_cr0, 2168 .set_cr0 = vmx_set_cr0,
2067 .set_cr0_no_modeswitch = vmx_set_cr0_no_modeswitch,
2068 .set_cr3 = vmx_set_cr3, 2169 .set_cr3 = vmx_set_cr3,
2069 .set_cr4 = vmx_set_cr4, 2170 .set_cr4 = vmx_set_cr4,
2070#ifdef CONFIG_X86_64 2171#ifdef CONFIG_X86_64