aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLuiz Capitulino <lcapitulino@redhat.com>2016-04-04 16:46:07 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2016-04-05 08:19:08 -0400
commit61abdbe0bcc2b32745ab4479cc550f4c1f518ee2 (patch)
tree090453c4f201a43a08e0f3d9b8524e3c91a639f2
parent9c650d09a9c3029cc90cae5d2cd7ab131bdb86c2 (diff)
kvm: x86: make lapic hrtimer pinned
When a vCPU runs on a nohz_full core, the hrtimer used by the lapic emulation code can be migrated to another core. When this happens, it's possible to observe milisecond latency when delivering timer IRQs to KVM guests. The huge latency is mainly due to the fact that apic_timer_fn() expects to run during a kvm exit. It sets KVM_REQ_PENDING_TIMER and let it be handled on kvm entry. However, if the timer fires on a different core, we have to wait until the next kvm exit for the guest to see KVM_REQ_PENDING_TIMER set. This problem became visible after commit 9642d18ee. This commit changed the timer migration code to always attempt to migrate timers away from nohz_full cores. While it's discussable if this is correct/desirable (I don't think it is), it's clear that the lapic emulation code has a requirement on firing the hrtimer in the same core where it was started. This is achieved by making the hrtimer pinned. Lastly, note that KVM has code to migrate timers when a vCPU is scheduled to run in different core. However, this forced migration may fail. When this happens, we can have the same problem. If we want 100% correctness, we'll have to modify apic_timer_fn() to cause a kvm exit when it runs on a different core than the vCPU. Not sure if this is possible. Here's a reproducer for the issue being fixed: 1. Set all cores but core0 to be nohz_full cores 2. Start a guest with a single vCPU 3. Trace apic_timer_fn() and kvm_inject_apic_timer_irqs() You'll see that apic_timer_fn() will run in core0 while kvm_inject_apic_timer_irqs() runs in a different core. If you get both on core0, try running a program that takes 100% of the CPU and pin it to core0 to force the vCPU out. Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--arch/x86/kvm/lapic.c8
1 files changed, 4 insertions, 4 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 443d2a57ad3d..1a2da0e5a373 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1369,7 +1369,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
1369 1369
1370 hrtimer_start(&apic->lapic_timer.timer, 1370 hrtimer_start(&apic->lapic_timer.timer,
1371 ktime_add_ns(now, apic->lapic_timer.period), 1371 ktime_add_ns(now, apic->lapic_timer.period),
1372 HRTIMER_MODE_ABS); 1372 HRTIMER_MODE_ABS_PINNED);
1373 1373
1374 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" 1374 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
1375 PRIx64 ", " 1375 PRIx64 ", "
@@ -1402,7 +1402,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
1402 expire = ktime_add_ns(now, ns); 1402 expire = ktime_add_ns(now, ns);
1403 expire = ktime_sub_ns(expire, lapic_timer_advance_ns); 1403 expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
1404 hrtimer_start(&apic->lapic_timer.timer, 1404 hrtimer_start(&apic->lapic_timer.timer,
1405 expire, HRTIMER_MODE_ABS); 1405 expire, HRTIMER_MODE_ABS_PINNED);
1406 } else 1406 } else
1407 apic_timer_expired(apic); 1407 apic_timer_expired(apic);
1408 1408
@@ -1868,7 +1868,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
1868 apic->vcpu = vcpu; 1868 apic->vcpu = vcpu;
1869 1869
1870 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 1870 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
1871 HRTIMER_MODE_ABS); 1871 HRTIMER_MODE_ABS_PINNED);
1872 apic->lapic_timer.timer.function = apic_timer_fn; 1872 apic->lapic_timer.timer.function = apic_timer_fn;
1873 1873
1874 /* 1874 /*
@@ -2003,7 +2003,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2003 2003
2004 timer = &vcpu->arch.apic->lapic_timer.timer; 2004 timer = &vcpu->arch.apic->lapic_timer.timer;
2005 if (hrtimer_cancel(timer)) 2005 if (hrtimer_cancel(timer))
2006 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 2006 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
2007} 2007}
2008 2008
2009/* 2009/*