aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c350
1 files changed, 236 insertions, 114 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a4018b01e1f9..6259d7467648 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -16,7 +16,6 @@
16 */ 16 */
17 17
18#include "irq.h" 18#include "irq.h"
19#include "vmx.h"
20#include "mmu.h" 19#include "mmu.h"
21 20
22#include <linux/kvm_host.h> 21#include <linux/kvm_host.h>
@@ -31,6 +30,8 @@
31 30
32#include <asm/io.h> 31#include <asm/io.h>
33#include <asm/desc.h> 32#include <asm/desc.h>
33#include <asm/vmx.h>
34#include <asm/virtext.h>
34 35
35#define __ex(x) __kvm_handle_fault_on_reboot(x) 36#define __ex(x) __kvm_handle_fault_on_reboot(x)
36 37
@@ -90,6 +91,11 @@ struct vcpu_vmx {
90 } rmode; 91 } rmode;
91 int vpid; 92 int vpid;
92 bool emulation_required; 93 bool emulation_required;
94
95 /* Support for vnmi-less CPUs */
96 int soft_vnmi_blocked;
97 ktime_t entry_time;
98 s64 vnmi_blocked_time;
93}; 99};
94 100
95static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 101static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -122,7 +128,7 @@ static struct vmcs_config {
122 u32 vmentry_ctrl; 128 u32 vmentry_ctrl;
123} vmcs_config; 129} vmcs_config;
124 130
125struct vmx_capability { 131static struct vmx_capability {
126 u32 ept; 132 u32 ept;
127 u32 vpid; 133 u32 vpid;
128} vmx_capability; 134} vmx_capability;
@@ -957,6 +963,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
957 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); 963 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
958 964
959 break; 965 break;
966 case MSR_IA32_CR_PAT:
967 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
968 vmcs_write64(GUEST_IA32_PAT, data);
969 vcpu->arch.pat = data;
970 break;
971 }
972 /* Otherwise falls through to kvm_set_msr_common */
960 default: 973 default:
961 vmx_load_host_state(vmx); 974 vmx_load_host_state(vmx);
962 msr = find_msr_entry(vmx, msr_index); 975 msr = find_msr_entry(vmx, msr_index);
@@ -1032,8 +1045,7 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu)
1032 1045
1033static __init int cpu_has_kvm_support(void) 1046static __init int cpu_has_kvm_support(void)
1034{ 1047{
1035 unsigned long ecx = cpuid_ecx(1); 1048 return cpu_has_vmx();
1036 return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
1037} 1049}
1038 1050
1039static __init int vmx_disabled_by_bios(void) 1051static __init int vmx_disabled_by_bios(void)
@@ -1079,13 +1091,22 @@ static void vmclear_local_vcpus(void)
1079 __vcpu_clear(vmx); 1091 __vcpu_clear(vmx);
1080} 1092}
1081 1093
1082static void hardware_disable(void *garbage) 1094
1095/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
1096 * tricks.
1097 */
1098static void kvm_cpu_vmxoff(void)
1083{ 1099{
1084 vmclear_local_vcpus();
1085 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); 1100 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
1086 write_cr4(read_cr4() & ~X86_CR4_VMXE); 1101 write_cr4(read_cr4() & ~X86_CR4_VMXE);
1087} 1102}
1088 1103
1104static void hardware_disable(void *garbage)
1105{
1106 vmclear_local_vcpus();
1107 kvm_cpu_vmxoff();
1108}
1109
1089static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, 1110static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
1090 u32 msr, u32 *result) 1111 u32 msr, u32 *result)
1091{ 1112{
@@ -1176,12 +1197,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1176#ifdef CONFIG_X86_64 1197#ifdef CONFIG_X86_64
1177 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; 1198 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
1178#endif 1199#endif
1179 opt = 0; 1200 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
1180 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, 1201 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
1181 &_vmexit_control) < 0) 1202 &_vmexit_control) < 0)
1182 return -EIO; 1203 return -EIO;
1183 1204
1184 min = opt = 0; 1205 min = 0;
1206 opt = VM_ENTRY_LOAD_IA32_PAT;
1185 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, 1207 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
1186 &_vmentry_control) < 0) 1208 &_vmentry_control) < 0)
1187 return -EIO; 1209 return -EIO;
@@ -2087,8 +2109,9 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
2087 */ 2109 */
2088static int vmx_vcpu_setup(struct vcpu_vmx *vmx) 2110static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2089{ 2111{
2090 u32 host_sysenter_cs; 2112 u32 host_sysenter_cs, msr_low, msr_high;
2091 u32 junk; 2113 u32 junk;
2114 u64 host_pat;
2092 unsigned long a; 2115 unsigned long a;
2093 struct descriptor_table dt; 2116 struct descriptor_table dt;
2094 int i; 2117 int i;
@@ -2176,6 +2199,20 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2176 rdmsrl(MSR_IA32_SYSENTER_EIP, a); 2199 rdmsrl(MSR_IA32_SYSENTER_EIP, a);
2177 vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ 2200 vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */
2178 2201
2202 if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
2203 rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
2204 host_pat = msr_low | ((u64) msr_high << 32);
2205 vmcs_write64(HOST_IA32_PAT, host_pat);
2206 }
2207 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2208 rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
2209 host_pat = msr_low | ((u64) msr_high << 32);
2210 /* Write the default value follow host pat */
2211 vmcs_write64(GUEST_IA32_PAT, host_pat);
2212 /* Keep arch.pat sync with GUEST_IA32_PAT */
2213 vmx->vcpu.arch.pat = host_pat;
2214 }
2215
2179 for (i = 0; i < NR_VMX_MSR; ++i) { 2216 for (i = 0; i < NR_VMX_MSR; ++i) {
2180 u32 index = vmx_msr_index[i]; 2217 u32 index = vmx_msr_index[i];
2181 u32 data_low, data_high; 2218 u32 data_low, data_high;
@@ -2230,6 +2267,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2230 2267
2231 vmx->vcpu.arch.rmode.active = 0; 2268 vmx->vcpu.arch.rmode.active = 0;
2232 2269
2270 vmx->soft_vnmi_blocked = 0;
2271
2233 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 2272 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
2234 kvm_set_cr8(&vmx->vcpu, 0); 2273 kvm_set_cr8(&vmx->vcpu, 0);
2235 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 2274 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
@@ -2335,6 +2374,29 @@ out:
2335 return ret; 2374 return ret;
2336} 2375}
2337 2376
2377static void enable_irq_window(struct kvm_vcpu *vcpu)
2378{
2379 u32 cpu_based_vm_exec_control;
2380
2381 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2382 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
2383 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2384}
2385
2386static void enable_nmi_window(struct kvm_vcpu *vcpu)
2387{
2388 u32 cpu_based_vm_exec_control;
2389
2390 if (!cpu_has_virtual_nmis()) {
2391 enable_irq_window(vcpu);
2392 return;
2393 }
2394
2395 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2396 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
2397 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2398}
2399
2338static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) 2400static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
2339{ 2401{
2340 struct vcpu_vmx *vmx = to_vmx(vcpu); 2402 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2358,10 +2420,54 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
2358 2420
2359static void vmx_inject_nmi(struct kvm_vcpu *vcpu) 2421static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
2360{ 2422{
2423 struct vcpu_vmx *vmx = to_vmx(vcpu);
2424
2425 if (!cpu_has_virtual_nmis()) {
2426 /*
2427 * Tracking the NMI-blocked state in software is built upon
2428 * finding the next open IRQ window. This, in turn, depends on
2429 * well-behaving guests: They have to keep IRQs disabled at
2430 * least as long as the NMI handler runs. Otherwise we may
2431 * cause NMI nesting, maybe breaking the guest. But as this is
2432 * highly unlikely, we can live with the residual risk.
2433 */
2434 vmx->soft_vnmi_blocked = 1;
2435 vmx->vnmi_blocked_time = 0;
2436 }
2437
2438 ++vcpu->stat.nmi_injections;
2439 if (vcpu->arch.rmode.active) {
2440 vmx->rmode.irq.pending = true;
2441 vmx->rmode.irq.vector = NMI_VECTOR;
2442 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
2443 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2444 NMI_VECTOR | INTR_TYPE_SOFT_INTR |
2445 INTR_INFO_VALID_MASK);
2446 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
2447 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
2448 return;
2449 }
2361 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 2450 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2362 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); 2451 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
2363} 2452}
2364 2453
2454static void vmx_update_window_states(struct kvm_vcpu *vcpu)
2455{
2456 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2457
2458 vcpu->arch.nmi_window_open =
2459 !(guest_intr & (GUEST_INTR_STATE_STI |
2460 GUEST_INTR_STATE_MOV_SS |
2461 GUEST_INTR_STATE_NMI));
2462 if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
2463 vcpu->arch.nmi_window_open = 0;
2464
2465 vcpu->arch.interrupt_window_open =
2466 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
2467 !(guest_intr & (GUEST_INTR_STATE_STI |
2468 GUEST_INTR_STATE_MOV_SS)));
2469}
2470
2365static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) 2471static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
2366{ 2472{
2367 int word_index = __ffs(vcpu->arch.irq_summary); 2473 int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2374,40 +2480,49 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
2374 kvm_queue_interrupt(vcpu, irq); 2480 kvm_queue_interrupt(vcpu, irq);
2375} 2481}
2376 2482
2377
2378static void do_interrupt_requests(struct kvm_vcpu *vcpu, 2483static void do_interrupt_requests(struct kvm_vcpu *vcpu,
2379 struct kvm_run *kvm_run) 2484 struct kvm_run *kvm_run)
2380{ 2485{
2381 u32 cpu_based_vm_exec_control; 2486 vmx_update_window_states(vcpu);
2382
2383 vcpu->arch.interrupt_window_open =
2384 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
2385 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
2386 2487
2387 if (vcpu->arch.interrupt_window_open && 2488 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
2388 vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) 2489 if (vcpu->arch.interrupt.pending) {
2389 kvm_do_inject_irq(vcpu); 2490 enable_nmi_window(vcpu);
2491 } else if (vcpu->arch.nmi_window_open) {
2492 vcpu->arch.nmi_pending = false;
2493 vcpu->arch.nmi_injected = true;
2494 } else {
2495 enable_nmi_window(vcpu);
2496 return;
2497 }
2498 }
2499 if (vcpu->arch.nmi_injected) {
2500 vmx_inject_nmi(vcpu);
2501 if (vcpu->arch.nmi_pending)
2502 enable_nmi_window(vcpu);
2503 else if (vcpu->arch.irq_summary
2504 || kvm_run->request_interrupt_window)
2505 enable_irq_window(vcpu);
2506 return;
2507 }
2390 2508
2391 if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending) 2509 if (vcpu->arch.interrupt_window_open) {
2392 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 2510 if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
2511 kvm_do_inject_irq(vcpu);
2393 2512
2394 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 2513 if (vcpu->arch.interrupt.pending)
2514 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
2515 }
2395 if (!vcpu->arch.interrupt_window_open && 2516 if (!vcpu->arch.interrupt_window_open &&
2396 (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) 2517 (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
2397 /* 2518 enable_irq_window(vcpu);
2398 * Interrupts blocked. Wait for unblock.
2399 */
2400 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
2401 else
2402 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
2403 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2404} 2519}
2405 2520
2406static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) 2521static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
2407{ 2522{
2408 int ret; 2523 int ret;
2409 struct kvm_userspace_memory_region tss_mem = { 2524 struct kvm_userspace_memory_region tss_mem = {
2410 .slot = 8, 2525 .slot = TSS_PRIVATE_MEMSLOT,
2411 .guest_phys_addr = addr, 2526 .guest_phys_addr = addr,
2412 .memory_size = PAGE_SIZE * 3, 2527 .memory_size = PAGE_SIZE * 3,
2413 .flags = 0, 2528 .flags = 0,
@@ -2492,7 +2607,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2492 set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); 2607 set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
2493 } 2608 }
2494 2609
2495 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ 2610 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
2496 return 1; /* already handled by vmx_vcpu_run() */ 2611 return 1; /* already handled by vmx_vcpu_run() */
2497 2612
2498 if (is_no_device(intr_info)) { 2613 if (is_no_device(intr_info)) {
@@ -2581,6 +2696,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2581 rep = (exit_qualification & 32) != 0; 2696 rep = (exit_qualification & 32) != 0;
2582 port = exit_qualification >> 16; 2697 port = exit_qualification >> 16;
2583 2698
2699 skip_emulated_instruction(vcpu);
2584 return kvm_emulate_pio(vcpu, kvm_run, in, size, port); 2700 return kvm_emulate_pio(vcpu, kvm_run, in, size, port);
2585} 2701}
2586 2702
@@ -2767,6 +2883,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
2767 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 2883 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2768 2884
2769 KVMTRACE_0D(PEND_INTR, vcpu, handler); 2885 KVMTRACE_0D(PEND_INTR, vcpu, handler);
2886 ++vcpu->stat.irq_window_exits;
2770 2887
2771 /* 2888 /*
2772 * If the user space waits to inject interrupts, exit as soon as 2889 * If the user space waits to inject interrupts, exit as soon as
@@ -2775,7 +2892,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
2775 if (kvm_run->request_interrupt_window && 2892 if (kvm_run->request_interrupt_window &&
2776 !vcpu->arch.irq_summary) { 2893 !vcpu->arch.irq_summary) {
2777 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 2894 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
2778 ++vcpu->stat.irq_window_exits;
2779 return 0; 2895 return 0;
2780 } 2896 }
2781 return 1; 2897 return 1;
@@ -2832,6 +2948,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2832 2948
2833static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2949static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2834{ 2950{
2951 struct vcpu_vmx *vmx = to_vmx(vcpu);
2835 unsigned long exit_qualification; 2952 unsigned long exit_qualification;
2836 u16 tss_selector; 2953 u16 tss_selector;
2837 int reason; 2954 int reason;
@@ -2839,6 +2956,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2839 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 2956 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
2840 2957
2841 reason = (u32)exit_qualification >> 30; 2958 reason = (u32)exit_qualification >> 30;
2959 if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
2960 (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
2961 (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
2962 == INTR_TYPE_NMI_INTR) {
2963 vcpu->arch.nmi_injected = false;
2964 if (cpu_has_virtual_nmis())
2965 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
2966 GUEST_INTR_STATE_NMI);
2967 }
2842 tss_selector = exit_qualification; 2968 tss_selector = exit_qualification;
2843 2969
2844 return kvm_task_switch(vcpu, tss_selector, reason); 2970 return kvm_task_switch(vcpu, tss_selector, reason);
@@ -2927,16 +3053,12 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
2927 while (!guest_state_valid(vcpu)) { 3053 while (!guest_state_valid(vcpu)) {
2928 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 3054 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
2929 3055
2930 switch (err) { 3056 if (err == EMULATE_DO_MMIO)
2931 case EMULATE_DONE: 3057 break;
2932 break; 3058
2933 case EMULATE_DO_MMIO: 3059 if (err != EMULATE_DONE) {
2934 kvm_report_emulation_failure(vcpu, "mmio"); 3060 kvm_report_emulation_failure(vcpu, "emulation failure");
2935 /* TODO: Handle MMIO */ 3061 return;
2936 return;
2937 default:
2938 kvm_report_emulation_failure(vcpu, "emulation failure");
2939 return;
2940 } 3062 }
2941 3063
2942 if (signal_pending(current)) 3064 if (signal_pending(current))
@@ -2948,8 +3070,10 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
2948 local_irq_disable(); 3070 local_irq_disable();
2949 preempt_disable(); 3071 preempt_disable();
2950 3072
2951 /* Guest state should be valid now, no more emulation should be needed */ 3073 /* Guest state should be valid now except if we need to
2952 vmx->emulation_required = 0; 3074 * emulate an MMIO */
3075 if (guest_state_valid(vcpu))
3076 vmx->emulation_required = 0;
2953} 3077}
2954 3078
2955/* 3079/*
@@ -2996,6 +3120,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2996 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), 3120 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
2997 (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); 3121 (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
2998 3122
3123 /* If we need to emulate an MMIO from handle_invalid_guest_state
3124 * we just return 0 */
3125 if (vmx->emulation_required && emulate_invalid_guest_state)
3126 return 0;
3127
2999 /* Access CR3 don't cause VMExit in paging mode, so we need 3128 /* Access CR3 don't cause VMExit in paging mode, so we need
3000 * to sync with guest real CR3. */ 3129 * to sync with guest real CR3. */
3001 if (vm_need_ept() && is_paging(vcpu)) { 3130 if (vm_need_ept() && is_paging(vcpu)) {
@@ -3012,9 +3141,32 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3012 3141
3013 if ((vectoring_info & VECTORING_INFO_VALID_MASK) && 3142 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
3014 (exit_reason != EXIT_REASON_EXCEPTION_NMI && 3143 (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
3015 exit_reason != EXIT_REASON_EPT_VIOLATION)) 3144 exit_reason != EXIT_REASON_EPT_VIOLATION &&
3016 printk(KERN_WARNING "%s: unexpected, valid vectoring info and " 3145 exit_reason != EXIT_REASON_TASK_SWITCH))
3017 "exit reason is 0x%x\n", __func__, exit_reason); 3146 printk(KERN_WARNING "%s: unexpected, valid vectoring info "
3147 "(0x%x) and exit reason is 0x%x\n",
3148 __func__, vectoring_info, exit_reason);
3149
3150 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
3151 if (vcpu->arch.interrupt_window_open) {
3152 vmx->soft_vnmi_blocked = 0;
3153 vcpu->arch.nmi_window_open = 1;
3154 } else if (vmx->vnmi_blocked_time > 1000000000LL &&
3155 vcpu->arch.nmi_pending) {
3156 /*
3157 * This CPU don't support us in finding the end of an
3158 * NMI-blocked window if the guest runs with IRQs
3159 * disabled. So we pull the trigger after 1 s of
3160 * futile waiting, but inform the user about this.
3161 */
3162 printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
3163 "state on VCPU %d after 1 s timeout\n",
3164 __func__, vcpu->vcpu_id);
3165 vmx->soft_vnmi_blocked = 0;
3166 vmx->vcpu.arch.nmi_window_open = 1;
3167 }
3168 }
3169
3018 if (exit_reason < kvm_vmx_max_exit_handlers 3170 if (exit_reason < kvm_vmx_max_exit_handlers
3019 && kvm_vmx_exit_handlers[exit_reason]) 3171 && kvm_vmx_exit_handlers[exit_reason])
3020 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); 3172 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -3042,51 +3194,6 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu)
3042 vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); 3194 vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
3043} 3195}
3044 3196
3045static void enable_irq_window(struct kvm_vcpu *vcpu)
3046{
3047 u32 cpu_based_vm_exec_control;
3048
3049 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
3050 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
3051 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
3052}
3053
3054static void enable_nmi_window(struct kvm_vcpu *vcpu)
3055{
3056 u32 cpu_based_vm_exec_control;
3057
3058 if (!cpu_has_virtual_nmis())
3059 return;
3060
3061 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
3062 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
3063 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
3064}
3065
3066static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
3067{
3068 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3069 return !(guest_intr & (GUEST_INTR_STATE_NMI |
3070 GUEST_INTR_STATE_MOV_SS |
3071 GUEST_INTR_STATE_STI));
3072}
3073
3074static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
3075{
3076 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3077 return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
3078 GUEST_INTR_STATE_STI)) &&
3079 (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
3080}
3081
3082static void enable_intr_window(struct kvm_vcpu *vcpu)
3083{
3084 if (vcpu->arch.nmi_pending)
3085 enable_nmi_window(vcpu);
3086 else if (kvm_cpu_has_interrupt(vcpu))
3087 enable_irq_window(vcpu);
3088}
3089
3090static void vmx_complete_interrupts(struct vcpu_vmx *vmx) 3197static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3091{ 3198{
3092 u32 exit_intr_info; 3199 u32 exit_intr_info;
@@ -3109,7 +3216,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3109 if (unblock_nmi && vector != DF_VECTOR) 3216 if (unblock_nmi && vector != DF_VECTOR)
3110 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 3217 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
3111 GUEST_INTR_STATE_NMI); 3218 GUEST_INTR_STATE_NMI);
3112 } 3219 } else if (unlikely(vmx->soft_vnmi_blocked))
3220 vmx->vnmi_blocked_time +=
3221 ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
3113 3222
3114 idt_vectoring_info = vmx->idt_vectoring_info; 3223 idt_vectoring_info = vmx->idt_vectoring_info;
3115 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; 3224 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
@@ -3147,26 +3256,29 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3147{ 3256{
3148 update_tpr_threshold(vcpu); 3257 update_tpr_threshold(vcpu);
3149 3258
3150 if (cpu_has_virtual_nmis()) { 3259 vmx_update_window_states(vcpu);
3151 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 3260
3152 if (vcpu->arch.interrupt.pending) { 3261 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
3153 enable_nmi_window(vcpu); 3262 if (vcpu->arch.interrupt.pending) {
3154 } else if (vmx_nmi_enabled(vcpu)) { 3263 enable_nmi_window(vcpu);
3155 vcpu->arch.nmi_pending = false; 3264 } else if (vcpu->arch.nmi_window_open) {
3156 vcpu->arch.nmi_injected = true; 3265 vcpu->arch.nmi_pending = false;
3157 } else { 3266 vcpu->arch.nmi_injected = true;
3158 enable_intr_window(vcpu); 3267 } else {
3159 return; 3268 enable_nmi_window(vcpu);
3160 }
3161 }
3162 if (vcpu->arch.nmi_injected) {
3163 vmx_inject_nmi(vcpu);
3164 enable_intr_window(vcpu);
3165 return; 3269 return;
3166 } 3270 }
3167 } 3271 }
3272 if (vcpu->arch.nmi_injected) {
3273 vmx_inject_nmi(vcpu);
3274 if (vcpu->arch.nmi_pending)
3275 enable_nmi_window(vcpu);
3276 else if (kvm_cpu_has_interrupt(vcpu))
3277 enable_irq_window(vcpu);
3278 return;
3279 }
3168 if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { 3280 if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
3169 if (vmx_irq_enabled(vcpu)) 3281 if (vcpu->arch.interrupt_window_open)
3170 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); 3282 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
3171 else 3283 else
3172 enable_irq_window(vcpu); 3284 enable_irq_window(vcpu);
@@ -3174,6 +3286,8 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3174 if (vcpu->arch.interrupt.pending) { 3286 if (vcpu->arch.interrupt.pending) {
3175 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 3287 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
3176 kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); 3288 kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
3289 if (kvm_cpu_has_interrupt(vcpu))
3290 enable_irq_window(vcpu);
3177 } 3291 }
3178} 3292}
3179 3293
@@ -3213,6 +3327,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3213 struct vcpu_vmx *vmx = to_vmx(vcpu); 3327 struct vcpu_vmx *vmx = to_vmx(vcpu);
3214 u32 intr_info; 3328 u32 intr_info;
3215 3329
3330 /* Record the guest's net vcpu time for enforced NMI injections. */
3331 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
3332 vmx->entry_time = ktime_get();
3333
3216 /* Handle invalid guest state instead of entering VMX */ 3334 /* Handle invalid guest state instead of entering VMX */
3217 if (vmx->emulation_required && emulate_invalid_guest_state) { 3335 if (vmx->emulation_required && emulate_invalid_guest_state) {
3218 handle_invalid_guest_state(vcpu, kvm_run); 3336 handle_invalid_guest_state(vcpu, kvm_run);
@@ -3327,9 +3445,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3327 if (vmx->rmode.irq.pending) 3445 if (vmx->rmode.irq.pending)
3328 fixup_rmode_irq(vmx); 3446 fixup_rmode_irq(vmx);
3329 3447
3330 vcpu->arch.interrupt_window_open = 3448 vmx_update_window_states(vcpu);
3331 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
3332 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
3333 3449
3334 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 3450 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
3335 vmx->launched = 1; 3451 vmx->launched = 1;
@@ -3337,7 +3453,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3337 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 3453 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
3338 3454
3339 /* We need to handle NMIs before interrupts are enabled */ 3455 /* We need to handle NMIs before interrupts are enabled */
3340 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 && 3456 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
3341 (intr_info & INTR_INFO_VALID_MASK)) { 3457 (intr_info & INTR_INFO_VALID_MASK)) {
3342 KVMTRACE_0D(NMI, vcpu, handler); 3458 KVMTRACE_0D(NMI, vcpu, handler);
3343 asm("int $2"); 3459 asm("int $2");
@@ -3455,6 +3571,11 @@ static int get_ept_level(void)
3455 return VMX_EPT_DEFAULT_GAW + 1; 3571 return VMX_EPT_DEFAULT_GAW + 1;
3456} 3572}
3457 3573
3574static int vmx_get_mt_mask_shift(void)
3575{
3576 return VMX_EPT_MT_EPTE_SHIFT;
3577}
3578
3458static struct kvm_x86_ops vmx_x86_ops = { 3579static struct kvm_x86_ops vmx_x86_ops = {
3459 .cpu_has_kvm_support = cpu_has_kvm_support, 3580 .cpu_has_kvm_support = cpu_has_kvm_support,
3460 .disabled_by_bios = vmx_disabled_by_bios, 3581 .disabled_by_bios = vmx_disabled_by_bios,
@@ -3510,6 +3631,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
3510 3631
3511 .set_tss_addr = vmx_set_tss_addr, 3632 .set_tss_addr = vmx_set_tss_addr,
3512 .get_tdp_level = get_ept_level, 3633 .get_tdp_level = get_ept_level,
3634 .get_mt_mask_shift = vmx_get_mt_mask_shift,
3513}; 3635};
3514 3636
3515static int __init vmx_init(void) 3637static int __init vmx_init(void)
@@ -3566,10 +3688,10 @@ static int __init vmx_init(void)
3566 bypass_guest_pf = 0; 3688 bypass_guest_pf = 0;
3567 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | 3689 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3568 VMX_EPT_WRITABLE_MASK | 3690 VMX_EPT_WRITABLE_MASK |
3569 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
3570 VMX_EPT_IGMT_BIT); 3691 VMX_EPT_IGMT_BIT);
3571 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, 3692 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
3572 VMX_EPT_EXECUTABLE_MASK); 3693 VMX_EPT_EXECUTABLE_MASK,
3694 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
3573 kvm_enable_tdp(); 3695 kvm_enable_tdp();
3574 } else 3696 } else
3575 kvm_disable_tdp(); 3697 kvm_disable_tdp();