aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorJan Kiszka <jan.kiszka@siemens.com>2008-09-26 03:30:57 -0400
committerAvi Kivity <avi@redhat.com>2008-12-31 09:51:43 -0500
commit3b86cd9967242f3f3d775ee015fb814a349ed5e6 (patch)
tree4ff775f594bb3e7c3933fac18b0d3ba6e5b8012f /arch
parent487b391d6ea9b1d0e2e0440466fb3130e78c98d9 (diff)
KVM: VMX: work around lacking VNMI support
Older VMX supporting CPUs do not provide the "Virtual NMI" feature for tracking the NMI-blocked state after injecting such events. For now KVM is unable to inject NMIs on those CPUs. Derived from Sheng Yang's suggestion to use the IRQ window notification for detecting the end of NMI handlers, this patch implements virtual NMI support without impact on the host's ability to receive real NMIs. The downside is that the given approach requires some heuristics that can cause NMI nesting in vary rare corner cases. The approach works as follows: - inject NMI and set a software-based NMI-blocked flag - arm the IRQ window start notification whenever an NMI window is requested - if the guest exits due to an opening IRQ window, clear the emulated NMI-blocked flag - if the guest net execution time with NMI-blocked but without an IRQ window exceeds 1 second, force NMI-blocked reset and inject anyway This approach covers most practical scenarios: - succeeding NMIs are seperated by at least one open IRQ window - the guest may spin with IRQs disabled (e.g. due to a bug), but leaving the NMI handler takes much less time than one second - the guest does not rely on strict ordering or timing of NMIs (would be problematic in virtualized environments anyway) Successfully tested with the 'nmi n' monitor command, the kgdbts testsuite on smp guests (additional patches required to add debug register support to kvm) + the kernel's nmi_watchdog=1, and a Siemens- specific board emulation (+ guest) that comes with its own NMI watchdog mechanism. Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kvm/vmx.c174
1 files changed, 115 insertions, 59 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f16a62c7926..2180109d794 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -90,6 +90,11 @@ struct vcpu_vmx {
90 } rmode; 90 } rmode;
91 int vpid; 91 int vpid;
92 bool emulation_required; 92 bool emulation_required;
93
94 /* Support for vnmi-less CPUs */
95 int soft_vnmi_blocked;
96 ktime_t entry_time;
97 s64 vnmi_blocked_time;
93}; 98};
94 99
95static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 100static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -2230,6 +2235,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2230 2235
2231 vmx->vcpu.arch.rmode.active = 0; 2236 vmx->vcpu.arch.rmode.active = 0;
2232 2237
2238 vmx->soft_vnmi_blocked = 0;
2239
2233 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 2240 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
2234 kvm_set_cr8(&vmx->vcpu, 0); 2241 kvm_set_cr8(&vmx->vcpu, 0);
2235 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 2242 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
@@ -2335,6 +2342,29 @@ out:
2335 return ret; 2342 return ret;
2336} 2343}
2337 2344
2345static void enable_irq_window(struct kvm_vcpu *vcpu)
2346{
2347 u32 cpu_based_vm_exec_control;
2348
2349 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2350 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
2351 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2352}
2353
2354static void enable_nmi_window(struct kvm_vcpu *vcpu)
2355{
2356 u32 cpu_based_vm_exec_control;
2357
2358 if (!cpu_has_virtual_nmis()) {
2359 enable_irq_window(vcpu);
2360 return;
2361 }
2362
2363 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2364 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
2365 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2366}
2367
2338static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) 2368static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
2339{ 2369{
2340 struct vcpu_vmx *vmx = to_vmx(vcpu); 2370 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2360,6 +2390,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
2360{ 2390{
2361 struct vcpu_vmx *vmx = to_vmx(vcpu); 2391 struct vcpu_vmx *vmx = to_vmx(vcpu);
2362 2392
2393 if (!cpu_has_virtual_nmis()) {
2394 /*
2395 * Tracking the NMI-blocked state in software is built upon
2396 * finding the next open IRQ window. This, in turn, depends on
2397 * well-behaving guests: They have to keep IRQs disabled at
2398 * least as long as the NMI handler runs. Otherwise we may
2399 * cause NMI nesting, maybe breaking the guest. But as this is
2400 * highly unlikely, we can live with the residual risk.
2401 */
2402 vmx->soft_vnmi_blocked = 1;
2403 vmx->vnmi_blocked_time = 0;
2404 }
2405
2363 ++vcpu->stat.nmi_injections; 2406 ++vcpu->stat.nmi_injections;
2364 if (vcpu->arch.rmode.active) { 2407 if (vcpu->arch.rmode.active) {
2365 vmx->rmode.irq.pending = true; 2408 vmx->rmode.irq.pending = true;
@@ -2384,6 +2427,8 @@ static void vmx_update_window_states(struct kvm_vcpu *vcpu)
2384 !(guest_intr & (GUEST_INTR_STATE_STI | 2427 !(guest_intr & (GUEST_INTR_STATE_STI |
2385 GUEST_INTR_STATE_MOV_SS | 2428 GUEST_INTR_STATE_MOV_SS |
2386 GUEST_INTR_STATE_NMI)); 2429 GUEST_INTR_STATE_NMI));
2430 if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
2431 vcpu->arch.nmi_window_open = 0;
2387 2432
2388 vcpu->arch.interrupt_window_open = 2433 vcpu->arch.interrupt_window_open =
2389 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && 2434 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
@@ -2403,55 +2448,31 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
2403 kvm_queue_interrupt(vcpu, irq); 2448 kvm_queue_interrupt(vcpu, irq);
2404} 2449}
2405 2450
2406static void enable_irq_window(struct kvm_vcpu *vcpu)
2407{
2408 u32 cpu_based_vm_exec_control;
2409
2410 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2411 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
2412 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2413}
2414
2415static void enable_nmi_window(struct kvm_vcpu *vcpu)
2416{
2417 u32 cpu_based_vm_exec_control;
2418
2419 if (!cpu_has_virtual_nmis())
2420 return;
2421
2422 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2423 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
2424 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2425}
2426
2427static void do_interrupt_requests(struct kvm_vcpu *vcpu, 2451static void do_interrupt_requests(struct kvm_vcpu *vcpu,
2428 struct kvm_run *kvm_run) 2452 struct kvm_run *kvm_run)
2429{ 2453{
2430 vmx_update_window_states(vcpu); 2454 vmx_update_window_states(vcpu);
2431 2455
2432 if (cpu_has_virtual_nmis()) { 2456 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
2433 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 2457 if (vcpu->arch.nmi_window_open) {
2434 if (vcpu->arch.nmi_window_open) { 2458 vcpu->arch.nmi_pending = false;
2435 vcpu->arch.nmi_pending = false; 2459 vcpu->arch.nmi_injected = true;
2436 vcpu->arch.nmi_injected = true; 2460 } else {
2437 } else { 2461 enable_nmi_window(vcpu);
2438 enable_nmi_window(vcpu);
2439 return;
2440 }
2441 }
2442 if (vcpu->arch.nmi_injected) {
2443 vmx_inject_nmi(vcpu);
2444 if (vcpu->arch.nmi_pending
2445 || kvm_run->request_nmi_window)
2446 enable_nmi_window(vcpu);
2447 else if (vcpu->arch.irq_summary
2448 || kvm_run->request_interrupt_window)
2449 enable_irq_window(vcpu);
2450 return; 2462 return;
2451 } 2463 }
2452 if (!vcpu->arch.nmi_window_open || kvm_run->request_nmi_window) 2464 }
2465 if (vcpu->arch.nmi_injected) {
2466 vmx_inject_nmi(vcpu);
2467 if (vcpu->arch.nmi_pending || kvm_run->request_nmi_window)
2453 enable_nmi_window(vcpu); 2468 enable_nmi_window(vcpu);
2469 else if (vcpu->arch.irq_summary
2470 || kvm_run->request_interrupt_window)
2471 enable_irq_window(vcpu);
2472 return;
2454 } 2473 }
2474 if (!vcpu->arch.nmi_window_open || kvm_run->request_nmi_window)
2475 enable_nmi_window(vcpu);
2455 2476
2456 if (vcpu->arch.interrupt_window_open) { 2477 if (vcpu->arch.interrupt_window_open) {
2457 if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) 2478 if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
@@ -3097,6 +3118,37 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3097 printk(KERN_WARNING "%s: unexpected, valid vectoring info " 3118 printk(KERN_WARNING "%s: unexpected, valid vectoring info "
3098 "(0x%x) and exit reason is 0x%x\n", 3119 "(0x%x) and exit reason is 0x%x\n",
3099 __func__, vectoring_info, exit_reason); 3120 __func__, vectoring_info, exit_reason);
3121
3122 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
3123 if (vcpu->arch.interrupt_window_open) {
3124 vmx->soft_vnmi_blocked = 0;
3125 vcpu->arch.nmi_window_open = 1;
3126 } else if (vmx->vnmi_blocked_time > 1000000000LL &&
3127 (kvm_run->request_nmi_window || vcpu->arch.nmi_pending)) {
3128 /*
3129 * This CPU don't support us in finding the end of an
3130 * NMI-blocked window if the guest runs with IRQs
3131 * disabled. So we pull the trigger after 1 s of
3132 * futile waiting, but inform the user about this.
3133 */
3134 printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
3135 "state on VCPU %d after 1 s timeout\n",
3136 __func__, vcpu->vcpu_id);
3137 vmx->soft_vnmi_blocked = 0;
3138 vmx->vcpu.arch.nmi_window_open = 1;
3139 }
3140
3141 /*
3142 * If the user space waits to inject an NNI, exit ASAP
3143 */
3144 if (vcpu->arch.nmi_window_open && kvm_run->request_nmi_window
3145 && !vcpu->arch.nmi_pending) {
3146 kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
3147 ++vcpu->stat.nmi_window_exits;
3148 return 0;
3149 }
3150 }
3151
3100 if (exit_reason < kvm_vmx_max_exit_handlers 3152 if (exit_reason < kvm_vmx_max_exit_handlers
3101 && kvm_vmx_exit_handlers[exit_reason]) 3153 && kvm_vmx_exit_handlers[exit_reason])
3102 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); 3154 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -3146,7 +3198,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3146 if (unblock_nmi && vector != DF_VECTOR) 3198 if (unblock_nmi && vector != DF_VECTOR)
3147 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 3199 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
3148 GUEST_INTR_STATE_NMI); 3200 GUEST_INTR_STATE_NMI);
3149 } 3201 } else if (unlikely(vmx->soft_vnmi_blocked))
3202 vmx->vnmi_blocked_time +=
3203 ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
3150 3204
3151 idt_vectoring_info = vmx->idt_vectoring_info; 3205 idt_vectoring_info = vmx->idt_vectoring_info;
3152 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; 3206 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
@@ -3186,27 +3240,25 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3186 3240
3187 vmx_update_window_states(vcpu); 3241 vmx_update_window_states(vcpu);
3188 3242
3189 if (cpu_has_virtual_nmis()) { 3243 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
3190 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 3244 if (vcpu->arch.interrupt.pending) {
3191 if (vcpu->arch.interrupt.pending) { 3245 enable_nmi_window(vcpu);
3192 enable_nmi_window(vcpu); 3246 } else if (vcpu->arch.nmi_window_open) {
3193 } else if (vcpu->arch.nmi_window_open) { 3247 vcpu->arch.nmi_pending = false;
3194 vcpu->arch.nmi_pending = false; 3248 vcpu->arch.nmi_injected = true;
3195 vcpu->arch.nmi_injected = true; 3249 } else {
3196 } else { 3250 enable_nmi_window(vcpu);
3197 enable_nmi_window(vcpu);
3198 return;
3199 }
3200 }
3201 if (vcpu->arch.nmi_injected) {
3202 vmx_inject_nmi(vcpu);
3203 if (vcpu->arch.nmi_pending)
3204 enable_nmi_window(vcpu);
3205 else if (kvm_cpu_has_interrupt(vcpu))
3206 enable_irq_window(vcpu);
3207 return; 3251 return;
3208 } 3252 }
3209 } 3253 }
3254 if (vcpu->arch.nmi_injected) {
3255 vmx_inject_nmi(vcpu);
3256 if (vcpu->arch.nmi_pending)
3257 enable_nmi_window(vcpu);
3258 else if (kvm_cpu_has_interrupt(vcpu))
3259 enable_irq_window(vcpu);
3260 return;
3261 }
3210 if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { 3262 if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
3211 if (vcpu->arch.interrupt_window_open) 3263 if (vcpu->arch.interrupt_window_open)
3212 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); 3264 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
@@ -3255,6 +3307,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3255 struct vcpu_vmx *vmx = to_vmx(vcpu); 3307 struct vcpu_vmx *vmx = to_vmx(vcpu);
3256 u32 intr_info; 3308 u32 intr_info;
3257 3309
3310 /* Record the guest's net vcpu time for enforced NMI injections. */
3311 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
3312 vmx->entry_time = ktime_get();
3313
3258 /* Handle invalid guest state instead of entering VMX */ 3314 /* Handle invalid guest state instead of entering VMX */
3259 if (vmx->emulation_required && emulate_invalid_guest_state) { 3315 if (vmx->emulation_required && emulate_invalid_guest_state) {
3260 handle_invalid_guest_state(vcpu, kvm_run); 3316 handle_invalid_guest_state(vcpu, kvm_run);