aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-21 09:06:09 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-21 09:06:09 -0400
commite66d90fb4abd0a27ee96f57a32fb561221c4d6ae (patch)
tree3337cba94c7444b06fdb0e8b487287d07b71f4a0 /arch/x86/kvm/vmx.c
parent55ca089e2579de90f048aca2a3030b8b2f864813 (diff)
parent14b395e35d1afdd8019d11b92e28041fad591b71 (diff)
Merge branch 'linus' into xen-64bit
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c230
1 files changed, 173 insertions, 57 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 10ce6ee4c491..0cac63701719 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -30,6 +30,8 @@
30#include <asm/io.h> 30#include <asm/io.h>
31#include <asm/desc.h> 31#include <asm/desc.h>
32 32
33#define __ex(x) __kvm_handle_fault_on_reboot(x)
34
33MODULE_AUTHOR("Qumranet"); 35MODULE_AUTHOR("Qumranet");
34MODULE_LICENSE("GPL"); 36MODULE_LICENSE("GPL");
35 37
@@ -53,6 +55,7 @@ struct vmcs {
53 55
54struct vcpu_vmx { 56struct vcpu_vmx {
55 struct kvm_vcpu vcpu; 57 struct kvm_vcpu vcpu;
58 struct list_head local_vcpus_link;
56 int launched; 59 int launched;
57 u8 fail; 60 u8 fail;
58 u32 idt_vectoring_info; 61 u32 idt_vectoring_info;
@@ -88,9 +91,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
88} 91}
89 92
90static int init_rmode(struct kvm *kvm); 93static int init_rmode(struct kvm *kvm);
94static u64 construct_eptp(unsigned long root_hpa);
91 95
92static DEFINE_PER_CPU(struct vmcs *, vmxarea); 96static DEFINE_PER_CPU(struct vmcs *, vmxarea);
93static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 97static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
98static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu);
94 99
95static struct page *vmx_io_bitmap_a; 100static struct page *vmx_io_bitmap_a;
96static struct page *vmx_io_bitmap_b; 101static struct page *vmx_io_bitmap_b;
@@ -260,6 +265,11 @@ static inline int cpu_has_vmx_vpid(void)
260 SECONDARY_EXEC_ENABLE_VPID); 265 SECONDARY_EXEC_ENABLE_VPID);
261} 266}
262 267
268static inline int cpu_has_virtual_nmis(void)
269{
270 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
271}
272
263static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) 273static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
264{ 274{
265 int i; 275 int i;
@@ -278,7 +288,7 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
278 u64 gva; 288 u64 gva;
279 } operand = { vpid, 0, gva }; 289 } operand = { vpid, 0, gva };
280 290
281 asm volatile (ASM_VMX_INVVPID 291 asm volatile (__ex(ASM_VMX_INVVPID)
282 /* CF==1 or ZF==1 --> rc = -1 */ 292 /* CF==1 or ZF==1 --> rc = -1 */
283 "; ja 1f ; ud2 ; 1:" 293 "; ja 1f ; ud2 ; 1:"
284 : : "a"(&operand), "c"(ext) : "cc", "memory"); 294 : : "a"(&operand), "c"(ext) : "cc", "memory");
@@ -290,7 +300,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa)
290 u64 eptp, gpa; 300 u64 eptp, gpa;
291 } operand = {eptp, gpa}; 301 } operand = {eptp, gpa};
292 302
293 asm volatile (ASM_VMX_INVEPT 303 asm volatile (__ex(ASM_VMX_INVEPT)
294 /* CF==1 or ZF==1 --> rc = -1 */ 304 /* CF==1 or ZF==1 --> rc = -1 */
295 "; ja 1f ; ud2 ; 1:\n" 305 "; ja 1f ; ud2 ; 1:\n"
296 : : "a" (&operand), "c" (ext) : "cc", "memory"); 306 : : "a" (&operand), "c" (ext) : "cc", "memory");
@@ -311,7 +321,7 @@ static void vmcs_clear(struct vmcs *vmcs)
311 u64 phys_addr = __pa(vmcs); 321 u64 phys_addr = __pa(vmcs);
312 u8 error; 322 u8 error;
313 323
314 asm volatile (ASM_VMX_VMCLEAR_RAX "; setna %0" 324 asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0"
315 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) 325 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
316 : "cc", "memory"); 326 : "cc", "memory");
317 if (error) 327 if (error)
@@ -329,6 +339,9 @@ static void __vcpu_clear(void *arg)
329 if (per_cpu(current_vmcs, cpu) == vmx->vmcs) 339 if (per_cpu(current_vmcs, cpu) == vmx->vmcs)
330 per_cpu(current_vmcs, cpu) = NULL; 340 per_cpu(current_vmcs, cpu) = NULL;
331 rdtscll(vmx->vcpu.arch.host_tsc); 341 rdtscll(vmx->vcpu.arch.host_tsc);
342 list_del(&vmx->local_vcpus_link);
343 vmx->vcpu.cpu = -1;
344 vmx->launched = 0;
332} 345}
333 346
334static void vcpu_clear(struct vcpu_vmx *vmx) 347static void vcpu_clear(struct vcpu_vmx *vmx)
@@ -336,7 +349,6 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
336 if (vmx->vcpu.cpu == -1) 349 if (vmx->vcpu.cpu == -1)
337 return; 350 return;
338 smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); 351 smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1);
339 vmx->launched = 0;
340} 352}
341 353
342static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) 354static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
@@ -378,7 +390,7 @@ static unsigned long vmcs_readl(unsigned long field)
378{ 390{
379 unsigned long value; 391 unsigned long value;
380 392
381 asm volatile (ASM_VMX_VMREAD_RDX_RAX 393 asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX)
382 : "=a"(value) : "d"(field) : "cc"); 394 : "=a"(value) : "d"(field) : "cc");
383 return value; 395 return value;
384} 396}
@@ -413,7 +425,7 @@ static void vmcs_writel(unsigned long field, unsigned long value)
413{ 425{
414 u8 error; 426 u8 error;
415 427
416 asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0" 428 asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0"
417 : "=q"(error) : "a"(value), "d"(field) : "cc"); 429 : "=q"(error) : "a"(value), "d"(field) : "cc");
418 if (unlikely(error)) 430 if (unlikely(error))
419 vmwrite_error(field, value); 431 vmwrite_error(field, value);
@@ -431,10 +443,8 @@ static void vmcs_write32(unsigned long field, u32 value)
431 443
432static void vmcs_write64(unsigned long field, u64 value) 444static void vmcs_write64(unsigned long field, u64 value)
433{ 445{
434#ifdef CONFIG_X86_64
435 vmcs_writel(field, value);
436#else
437 vmcs_writel(field, value); 446 vmcs_writel(field, value);
447#ifndef CONFIG_X86_64
438 asm volatile (""); 448 asm volatile ("");
439 vmcs_writel(field+1, value >> 32); 449 vmcs_writel(field+1, value >> 32);
440#endif 450#endif
@@ -474,7 +484,7 @@ static void reload_tss(void)
474 struct descriptor_table gdt; 484 struct descriptor_table gdt;
475 struct desc_struct *descs; 485 struct desc_struct *descs;
476 486
477 get_gdt(&gdt); 487 kvm_get_gdt(&gdt);
478 descs = (void *)gdt.base; 488 descs = (void *)gdt.base;
479 descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ 489 descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
480 load_TR_desc(); 490 load_TR_desc();
@@ -530,9 +540,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
530 * Set host fs and gs selectors. Unfortunately, 22.2.3 does not 540 * Set host fs and gs selectors. Unfortunately, 22.2.3 does not
531 * allow segment selectors with cpl > 0 or ti == 1. 541 * allow segment selectors with cpl > 0 or ti == 1.
532 */ 542 */
533 vmx->host_state.ldt_sel = read_ldt(); 543 vmx->host_state.ldt_sel = kvm_read_ldt();
534 vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; 544 vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
535 vmx->host_state.fs_sel = read_fs(); 545 vmx->host_state.fs_sel = kvm_read_fs();
536 if (!(vmx->host_state.fs_sel & 7)) { 546 if (!(vmx->host_state.fs_sel & 7)) {
537 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); 547 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
538 vmx->host_state.fs_reload_needed = 0; 548 vmx->host_state.fs_reload_needed = 0;
@@ -540,7 +550,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
540 vmcs_write16(HOST_FS_SELECTOR, 0); 550 vmcs_write16(HOST_FS_SELECTOR, 0);
541 vmx->host_state.fs_reload_needed = 1; 551 vmx->host_state.fs_reload_needed = 1;
542 } 552 }
543 vmx->host_state.gs_sel = read_gs(); 553 vmx->host_state.gs_sel = kvm_read_gs();
544 if (!(vmx->host_state.gs_sel & 7)) 554 if (!(vmx->host_state.gs_sel & 7))
545 vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); 555 vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
546 else { 556 else {
@@ -576,15 +586,15 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
576 ++vmx->vcpu.stat.host_state_reload; 586 ++vmx->vcpu.stat.host_state_reload;
577 vmx->host_state.loaded = 0; 587 vmx->host_state.loaded = 0;
578 if (vmx->host_state.fs_reload_needed) 588 if (vmx->host_state.fs_reload_needed)
579 load_fs(vmx->host_state.fs_sel); 589 kvm_load_fs(vmx->host_state.fs_sel);
580 if (vmx->host_state.gs_ldt_reload_needed) { 590 if (vmx->host_state.gs_ldt_reload_needed) {
581 load_ldt(vmx->host_state.ldt_sel); 591 kvm_load_ldt(vmx->host_state.ldt_sel);
582 /* 592 /*
583 * If we have to reload gs, we must take care to 593 * If we have to reload gs, we must take care to
584 * preserve our gs base. 594 * preserve our gs base.
585 */ 595 */
586 local_irq_save(flags); 596 local_irq_save(flags);
587 load_gs(vmx->host_state.gs_sel); 597 kvm_load_gs(vmx->host_state.gs_sel);
588#ifdef CONFIG_X86_64 598#ifdef CONFIG_X86_64
589 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); 599 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
590#endif 600#endif
@@ -617,13 +627,17 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
617 vcpu_clear(vmx); 627 vcpu_clear(vmx);
618 kvm_migrate_timers(vcpu); 628 kvm_migrate_timers(vcpu);
619 vpid_sync_vcpu_all(vmx); 629 vpid_sync_vcpu_all(vmx);
630 local_irq_disable();
631 list_add(&vmx->local_vcpus_link,
632 &per_cpu(vcpus_on_cpu, cpu));
633 local_irq_enable();
620 } 634 }
621 635
622 if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { 636 if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
623 u8 error; 637 u8 error;
624 638
625 per_cpu(current_vmcs, cpu) = vmx->vmcs; 639 per_cpu(current_vmcs, cpu) = vmx->vmcs;
626 asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0" 640 asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
627 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) 641 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
628 : "cc"); 642 : "cc");
629 if (error) 643 if (error)
@@ -640,8 +654,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
640 * Linux uses per-cpu TSS and GDT, so set these when switching 654 * Linux uses per-cpu TSS and GDT, so set these when switching
641 * processors. 655 * processors.
642 */ 656 */
643 vmcs_writel(HOST_TR_BASE, read_tr_base()); /* 22.2.4 */ 657 vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
644 get_gdt(&dt); 658 kvm_get_gdt(&dt);
645 vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ 659 vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */
646 660
647 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); 661 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
@@ -684,11 +698,6 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
684 update_exception_bitmap(vcpu); 698 update_exception_bitmap(vcpu);
685} 699}
686 700
687static void vmx_vcpu_decache(struct kvm_vcpu *vcpu)
688{
689 vcpu_clear(to_vmx(vcpu));
690}
691
692static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 701static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
693{ 702{
694 return vmcs_readl(GUEST_RFLAGS); 703 return vmcs_readl(GUEST_RFLAGS);
@@ -913,6 +922,18 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
913 case MSR_IA32_TIME_STAMP_COUNTER: 922 case MSR_IA32_TIME_STAMP_COUNTER:
914 guest_write_tsc(data); 923 guest_write_tsc(data);
915 break; 924 break;
925 case MSR_P6_PERFCTR0:
926 case MSR_P6_PERFCTR1:
927 case MSR_P6_EVNTSEL0:
928 case MSR_P6_EVNTSEL1:
929 /*
930 * Just discard all writes to the performance counters; this
931 * should keep both older linux and windows 64-bit guests
932 * happy
933 */
934 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
935
936 break;
916 default: 937 default:
917 vmx_load_host_state(vmx); 938 vmx_load_host_state(vmx);
918 msr = find_msr_entry(vmx, msr_index); 939 msr = find_msr_entry(vmx, msr_index);
@@ -1022,6 +1043,7 @@ static void hardware_enable(void *garbage)
1022 u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); 1043 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
1023 u64 old; 1044 u64 old;
1024 1045
1046 INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
1025 rdmsrl(MSR_IA32_FEATURE_CONTROL, old); 1047 rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
1026 if ((old & (MSR_IA32_FEATURE_CONTROL_LOCKED | 1048 if ((old & (MSR_IA32_FEATURE_CONTROL_LOCKED |
1027 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) 1049 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED))
@@ -1032,13 +1054,25 @@ static void hardware_enable(void *garbage)
1032 MSR_IA32_FEATURE_CONTROL_LOCKED | 1054 MSR_IA32_FEATURE_CONTROL_LOCKED |
1033 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED); 1055 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED);
1034 write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ 1056 write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */
1035 asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) 1057 asm volatile (ASM_VMX_VMXON_RAX
1058 : : "a"(&phys_addr), "m"(phys_addr)
1036 : "memory", "cc"); 1059 : "memory", "cc");
1037} 1060}
1038 1061
1062static void vmclear_local_vcpus(void)
1063{
1064 int cpu = raw_smp_processor_id();
1065 struct vcpu_vmx *vmx, *n;
1066
1067 list_for_each_entry_safe(vmx, n, &per_cpu(vcpus_on_cpu, cpu),
1068 local_vcpus_link)
1069 __vcpu_clear(vmx);
1070}
1071
1039static void hardware_disable(void *garbage) 1072static void hardware_disable(void *garbage)
1040{ 1073{
1041 asm volatile (ASM_VMX_VMXOFF : : : "cc"); 1074 vmclear_local_vcpus();
1075 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
1042 write_cr4(read_cr4() & ~X86_CR4_VMXE); 1076 write_cr4(read_cr4() & ~X86_CR4_VMXE);
1043} 1077}
1044 1078
@@ -1072,7 +1106,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1072 u32 _vmentry_control = 0; 1106 u32 _vmentry_control = 0;
1073 1107
1074 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; 1108 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
1075 opt = 0; 1109 opt = PIN_BASED_VIRTUAL_NMIS;
1076 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, 1110 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
1077 &_pin_based_exec_control) < 0) 1111 &_pin_based_exec_control) < 0)
1078 return -EIO; 1112 return -EIO;
@@ -1389,6 +1423,8 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
1389static void vmx_flush_tlb(struct kvm_vcpu *vcpu) 1423static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
1390{ 1424{
1391 vpid_sync_vcpu_all(to_vmx(vcpu)); 1425 vpid_sync_vcpu_all(to_vmx(vcpu));
1426 if (vm_need_ept())
1427 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
1392} 1428}
1393 1429
1394static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 1430static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
@@ -1420,7 +1456,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1420 if (!(cr0 & X86_CR0_PG)) { 1456 if (!(cr0 & X86_CR0_PG)) {
1421 /* From paging/starting to nonpaging */ 1457 /* From paging/starting to nonpaging */
1422 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 1458 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
1423 vmcs_config.cpu_based_exec_ctrl | 1459 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
1424 (CPU_BASED_CR3_LOAD_EXITING | 1460 (CPU_BASED_CR3_LOAD_EXITING |
1425 CPU_BASED_CR3_STORE_EXITING)); 1461 CPU_BASED_CR3_STORE_EXITING));
1426 vcpu->arch.cr0 = cr0; 1462 vcpu->arch.cr0 = cr0;
@@ -1430,7 +1466,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1430 } else if (!is_paging(vcpu)) { 1466 } else if (!is_paging(vcpu)) {
1431 /* From nonpaging to paging */ 1467 /* From nonpaging to paging */
1432 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 1468 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
1433 vmcs_config.cpu_based_exec_ctrl & 1469 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
1434 ~(CPU_BASED_CR3_LOAD_EXITING | 1470 ~(CPU_BASED_CR3_LOAD_EXITING |
1435 CPU_BASED_CR3_STORE_EXITING)); 1471 CPU_BASED_CR3_STORE_EXITING));
1436 vcpu->arch.cr0 = cr0; 1472 vcpu->arch.cr0 = cr0;
@@ -1821,7 +1857,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx)
1821 spin_unlock(&vmx_vpid_lock); 1857 spin_unlock(&vmx_vpid_lock);
1822} 1858}
1823 1859
1824void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) 1860static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
1825{ 1861{
1826 void *va; 1862 void *va;
1827 1863
@@ -1907,8 +1943,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1907 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ 1943 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
1908 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 1944 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
1909 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 1945 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
1910 vmcs_write16(HOST_FS_SELECTOR, read_fs()); /* 22.2.4 */ 1946 vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs()); /* 22.2.4 */
1911 vmcs_write16(HOST_GS_SELECTOR, read_gs()); /* 22.2.4 */ 1947 vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs()); /* 22.2.4 */
1912 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 1948 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
1913#ifdef CONFIG_X86_64 1949#ifdef CONFIG_X86_64
1914 rdmsrl(MSR_FS_BASE, a); 1950 rdmsrl(MSR_FS_BASE, a);
@@ -1922,7 +1958,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1922 1958
1923 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ 1959 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
1924 1960
1925 get_idt(&dt); 1961 kvm_get_idt(&dt);
1926 vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ 1962 vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */
1927 1963
1928 asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); 1964 asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return));
@@ -2114,6 +2150,13 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
2114 irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); 2150 irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
2115} 2151}
2116 2152
2153static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
2154{
2155 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2156 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
2157 vcpu->arch.nmi_pending = 0;
2158}
2159
2117static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) 2160static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
2118{ 2161{
2119 int word_index = __ffs(vcpu->arch.irq_summary); 2162 int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2554,8 +2597,6 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2554 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 2597 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
2555 offset = exit_qualification & 0xffful; 2598 offset = exit_qualification & 0xffful;
2556 2599
2557 KVMTRACE_1D(APIC_ACCESS, vcpu, (u32)offset, handler);
2558
2559 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 2600 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
2560 2601
2561 if (er != EMULATE_DONE) { 2602 if (er != EMULATE_DONE) {
@@ -2639,6 +2680,19 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2639 return 1; 2680 return 1;
2640} 2681}
2641 2682
2683static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2684{
2685 u32 cpu_based_vm_exec_control;
2686
2687 /* clear pending NMI */
2688 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2689 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
2690 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2691 ++vcpu->stat.nmi_window_exits;
2692
2693 return 1;
2694}
2695
2642/* 2696/*
2643 * The exit handlers return 1 if the exit was handled fully and guest execution 2697 * The exit handlers return 1 if the exit was handled fully and guest execution
2644 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 2698 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -2649,6 +2703,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
2649 [EXIT_REASON_EXCEPTION_NMI] = handle_exception, 2703 [EXIT_REASON_EXCEPTION_NMI] = handle_exception,
2650 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 2704 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
2651 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, 2705 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
2706 [EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
2652 [EXIT_REASON_IO_INSTRUCTION] = handle_io, 2707 [EXIT_REASON_IO_INSTRUCTION] = handle_io,
2653 [EXIT_REASON_CR_ACCESS] = handle_cr, 2708 [EXIT_REASON_CR_ACCESS] = handle_cr,
2654 [EXIT_REASON_DR_ACCESS] = handle_dr, 2709 [EXIT_REASON_DR_ACCESS] = handle_dr,
@@ -2736,17 +2791,52 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
2736 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 2791 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2737} 2792}
2738 2793
2794static void enable_nmi_window(struct kvm_vcpu *vcpu)
2795{
2796 u32 cpu_based_vm_exec_control;
2797
2798 if (!cpu_has_virtual_nmis())
2799 return;
2800
2801 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2802 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
2803 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2804}
2805
2806static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
2807{
2808 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2809 return !(guest_intr & (GUEST_INTR_STATE_NMI |
2810 GUEST_INTR_STATE_MOV_SS |
2811 GUEST_INTR_STATE_STI));
2812}
2813
2814static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
2815{
2816 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2817 return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
2818 GUEST_INTR_STATE_STI)) &&
2819 (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
2820}
2821
2822static void enable_intr_window(struct kvm_vcpu *vcpu)
2823{
2824 if (vcpu->arch.nmi_pending)
2825 enable_nmi_window(vcpu);
2826 else if (kvm_cpu_has_interrupt(vcpu))
2827 enable_irq_window(vcpu);
2828}
2829
2739static void vmx_intr_assist(struct kvm_vcpu *vcpu) 2830static void vmx_intr_assist(struct kvm_vcpu *vcpu)
2740{ 2831{
2741 struct vcpu_vmx *vmx = to_vmx(vcpu); 2832 struct vcpu_vmx *vmx = to_vmx(vcpu);
2742 u32 idtv_info_field, intr_info_field; 2833 u32 idtv_info_field, intr_info_field, exit_intr_info_field;
2743 int has_ext_irq, interrupt_window_open;
2744 int vector; 2834 int vector;
2745 2835
2746 update_tpr_threshold(vcpu); 2836 update_tpr_threshold(vcpu);
2747 2837
2748 has_ext_irq = kvm_cpu_has_interrupt(vcpu);
2749 intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); 2838 intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
2839 exit_intr_info_field = vmcs_read32(VM_EXIT_INTR_INFO);
2750 idtv_info_field = vmx->idt_vectoring_info; 2840 idtv_info_field = vmx->idt_vectoring_info;
2751 if (intr_info_field & INTR_INFO_VALID_MASK) { 2841 if (intr_info_field & INTR_INFO_VALID_MASK) {
2752 if (idtv_info_field & INTR_INFO_VALID_MASK) { 2842 if (idtv_info_field & INTR_INFO_VALID_MASK) {
@@ -2754,8 +2844,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
2754 if (printk_ratelimit()) 2844 if (printk_ratelimit())
2755 printk(KERN_ERR "Fault when IDT_Vectoring\n"); 2845 printk(KERN_ERR "Fault when IDT_Vectoring\n");
2756 } 2846 }
2757 if (has_ext_irq) 2847 enable_intr_window(vcpu);
2758 enable_irq_window(vcpu);
2759 return; 2848 return;
2760 } 2849 }
2761 if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) { 2850 if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) {
@@ -2765,30 +2854,56 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
2765 u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK; 2854 u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK;
2766 2855
2767 vmx_inject_irq(vcpu, vect); 2856 vmx_inject_irq(vcpu, vect);
2768 if (unlikely(has_ext_irq)) 2857 enable_intr_window(vcpu);
2769 enable_irq_window(vcpu);
2770 return; 2858 return;
2771 } 2859 }
2772 2860
2773 KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler); 2861 KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler);
2774 2862
2775 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); 2863 /*
2864 * SDM 3: 25.7.1.2
2865 * Clear bit "block by NMI" before VM entry if a NMI delivery
2866 * faulted.
2867 */
2868 if ((idtv_info_field & VECTORING_INFO_TYPE_MASK)
2869 == INTR_TYPE_NMI_INTR && cpu_has_virtual_nmis())
2870 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
2871 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
2872 ~GUEST_INTR_STATE_NMI);
2873
2874 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field
2875 & ~INTR_INFO_RESVD_BITS_MASK);
2776 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 2876 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
2777 vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); 2877 vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
2778 2878
2779 if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK)) 2879 if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK))
2780 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, 2880 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
2781 vmcs_read32(IDT_VECTORING_ERROR_CODE)); 2881 vmcs_read32(IDT_VECTORING_ERROR_CODE));
2782 if (unlikely(has_ext_irq)) 2882 enable_intr_window(vcpu);
2783 enable_irq_window(vcpu);
2784 return; 2883 return;
2785 } 2884 }
2786 if (!has_ext_irq) 2885 if (cpu_has_virtual_nmis()) {
2886 /*
2887 * SDM 3: 25.7.1.2
2888 * Re-set bit "block by NMI" before VM entry if vmexit caused by
2889 * a guest IRET fault.
2890 */
2891 if ((exit_intr_info_field & INTR_INFO_UNBLOCK_NMI) &&
2892 (exit_intr_info_field & INTR_INFO_VECTOR_MASK) != 8)
2893 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
2894 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) |
2895 GUEST_INTR_STATE_NMI);
2896 else if (vcpu->arch.nmi_pending) {
2897 if (vmx_nmi_enabled(vcpu))
2898 vmx_inject_nmi(vcpu);
2899 enable_intr_window(vcpu);
2900 return;
2901 }
2902
2903 }
2904 if (!kvm_cpu_has_interrupt(vcpu))
2787 return; 2905 return;
2788 interrupt_window_open = 2906 if (vmx_irq_enabled(vcpu)) {
2789 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
2790 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
2791 if (interrupt_window_open) {
2792 vector = kvm_cpu_get_interrupt(vcpu); 2907 vector = kvm_cpu_get_interrupt(vcpu);
2793 vmx_inject_irq(vcpu, vector); 2908 vmx_inject_irq(vcpu, vector);
2794 kvm_timer_intr_post(vcpu, vector); 2909 kvm_timer_intr_post(vcpu, vector);
@@ -2838,7 +2953,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2838 "push %%edx; push %%ebp;" 2953 "push %%edx; push %%ebp;"
2839 "push %%ecx \n\t" 2954 "push %%ecx \n\t"
2840#endif 2955#endif
2841 ASM_VMX_VMWRITE_RSP_RDX "\n\t" 2956 __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
2842 /* Check if vmlaunch of vmresume is needed */ 2957 /* Check if vmlaunch of vmresume is needed */
2843 "cmpl $0, %c[launched](%0) \n\t" 2958 "cmpl $0, %c[launched](%0) \n\t"
2844 /* Load guest registers. Don't clobber flags. */ 2959 /* Load guest registers. Don't clobber flags. */
@@ -2873,9 +2988,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2873#endif 2988#endif
2874 /* Enter guest mode */ 2989 /* Enter guest mode */
2875 "jne .Llaunched \n\t" 2990 "jne .Llaunched \n\t"
2876 ASM_VMX_VMLAUNCH "\n\t" 2991 __ex(ASM_VMX_VMLAUNCH) "\n\t"
2877 "jmp .Lkvm_vmx_return \n\t" 2992 "jmp .Lkvm_vmx_return \n\t"
2878 ".Llaunched: " ASM_VMX_VMRESUME "\n\t" 2993 ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t"
2879 ".Lkvm_vmx_return: " 2994 ".Lkvm_vmx_return: "
2880 /* Save guest registers, load host registers, keep flags */ 2995 /* Save guest registers, load host registers, keep flags */
2881#ifdef CONFIG_X86_64 2996#ifdef CONFIG_X86_64
@@ -2949,7 +3064,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2949 fixup_rmode_irq(vmx); 3064 fixup_rmode_irq(vmx);
2950 3065
2951 vcpu->arch.interrupt_window_open = 3066 vcpu->arch.interrupt_window_open =
2952 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; 3067 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
3068 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
2953 3069
2954 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 3070 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
2955 vmx->launched = 1; 3071 vmx->launched = 1;
@@ -2957,7 +3073,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2957 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 3073 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
2958 3074
2959 /* We need to handle NMIs before interrupts are enabled */ 3075 /* We need to handle NMIs before interrupts are enabled */
2960 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ 3076 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
3077 (intr_info & INTR_INFO_VALID_MASK)) {
2961 KVMTRACE_0D(NMI, vcpu, handler); 3078 KVMTRACE_0D(NMI, vcpu, handler);
2962 asm("int $2"); 3079 asm("int $2");
2963 } 3080 }
@@ -2968,7 +3085,7 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
2968 struct vcpu_vmx *vmx = to_vmx(vcpu); 3085 struct vcpu_vmx *vmx = to_vmx(vcpu);
2969 3086
2970 if (vmx->vmcs) { 3087 if (vmx->vmcs) {
2971 on_each_cpu(__vcpu_clear, vmx, 1); 3088 vcpu_clear(vmx);
2972 free_vmcs(vmx->vmcs); 3089 free_vmcs(vmx->vmcs);
2973 vmx->vmcs = NULL; 3090 vmx->vmcs = NULL;
2974 } 3091 }
@@ -3095,7 +3212,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
3095 .prepare_guest_switch = vmx_save_host_state, 3212 .prepare_guest_switch = vmx_save_host_state,
3096 .vcpu_load = vmx_vcpu_load, 3213 .vcpu_load = vmx_vcpu_load,
3097 .vcpu_put = vmx_vcpu_put, 3214 .vcpu_put = vmx_vcpu_put,
3098 .vcpu_decache = vmx_vcpu_decache,
3099 3215
3100 .set_guest_debug = set_guest_debug, 3216 .set_guest_debug = set_guest_debug,
3101 .guest_debug_pre = kvm_guest_debug_pre, 3217 .guest_debug_pre = kvm_guest_debug_pre,