aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorMichael S. Tsirkin <mst@redhat.com>2012-06-24 12:24:34 -0400
committerAvi Kivity <avi@redhat.com>2012-06-25 05:38:06 -0400
commitab9cf4996bb989983e73da894b8dd0239aa2c3c2 (patch)
tree45d97f795e03f8c18f6dc110887f34391cb7a8e5 /arch/x86
parent8680b94b0e6046af2644c17313287ec0cb5843dc (diff)
KVM guest: guest side for eoi avoidance
The idea is simple: there's a bit, per APIC, in guest memory, that tells the guest that it does not need EOI. Guest tests it using a single est and clear operation - this is necessary so that host can detect interrupt nesting - and if set, it can skip the EOI MSR. I run a simple microbenchmark to show exit reduction (note: for testing, need to apply follow-up patch 'kvm: host side for eoi optimization' + a qemu patch I posted separately, on host): Before: Performance counter stats for 'sleep 1s': 47,357 kvm:kvm_entry [99.98%] 0 kvm:kvm_hypercall [99.98%] 0 kvm:kvm_hv_hypercall [99.98%] 5,001 kvm:kvm_pio [99.98%] 0 kvm:kvm_cpuid [99.98%] 22,124 kvm:kvm_apic [99.98%] 49,849 kvm:kvm_exit [99.98%] 21,115 kvm:kvm_inj_virq [99.98%] 0 kvm:kvm_inj_exception [99.98%] 0 kvm:kvm_page_fault [99.98%] 22,937 kvm:kvm_msr [99.98%] 0 kvm:kvm_cr [99.98%] 0 kvm:kvm_pic_set_irq [99.98%] 0 kvm:kvm_apic_ipi [99.98%] 22,207 kvm:kvm_apic_accept_irq [99.98%] 22,421 kvm:kvm_eoi [99.98%] 0 kvm:kvm_pv_eoi [99.99%] 0 kvm:kvm_nested_vmrun [99.99%] 0 kvm:kvm_nested_intercepts [99.99%] 0 kvm:kvm_nested_vmexit [99.99%] 0 kvm:kvm_nested_vmexit_inject [99.99%] 0 kvm:kvm_nested_intr_vmexit [99.99%] 0 kvm:kvm_invlpga [99.99%] 0 kvm:kvm_skinit [99.99%] 57 kvm:kvm_emulate_insn [99.99%] 0 kvm:vcpu_match_mmio [99.99%] 0 kvm:kvm_userspace_exit [99.99%] 2 kvm:kvm_set_irq [99.99%] 2 kvm:kvm_ioapic_set_irq [99.99%] 23,609 kvm:kvm_msi_set_irq [99.99%] 1 kvm:kvm_ack_irq [99.99%] 131 kvm:kvm_mmio [99.99%] 226 kvm:kvm_fpu [100.00%] 0 kvm:kvm_age_page [100.00%] 0 kvm:kvm_try_async_get_page [100.00%] 0 kvm:kvm_async_pf_doublefault [100.00%] 0 kvm:kvm_async_pf_not_present [100.00%] 0 kvm:kvm_async_pf_ready [100.00%] 0 kvm:kvm_async_pf_completed 1.002100578 seconds time elapsed After: Performance counter stats for 'sleep 1s': 28,354 kvm:kvm_entry [99.98%] 0 kvm:kvm_hypercall [99.98%] 0 kvm:kvm_hv_hypercall [99.98%] 1,347 kvm:kvm_pio [99.98%] 0 kvm:kvm_cpuid [99.98%] 1,931 kvm:kvm_apic [99.98%] 29,595 kvm:kvm_exit [99.98%] 24,884 kvm:kvm_inj_virq [99.98%] 0 kvm:kvm_inj_exception [99.98%] 0 kvm:kvm_page_fault [99.98%] 1,986 kvm:kvm_msr [99.98%] 0 kvm:kvm_cr [99.98%] 0 kvm:kvm_pic_set_irq [99.98%] 0 kvm:kvm_apic_ipi [99.99%] 25,953 kvm:kvm_apic_accept_irq [99.99%] 26,132 kvm:kvm_eoi [99.99%] 26,593 kvm:kvm_pv_eoi [99.99%] 0 kvm:kvm_nested_vmrun [99.99%] 0 kvm:kvm_nested_intercepts [99.99%] 0 kvm:kvm_nested_vmexit [99.99%] 0 kvm:kvm_nested_vmexit_inject [99.99%] 0 kvm:kvm_nested_intr_vmexit [99.99%] 0 kvm:kvm_invlpga [99.99%] 0 kvm:kvm_skinit [99.99%] 284 kvm:kvm_emulate_insn [99.99%] 68 kvm:vcpu_match_mmio [99.99%] 68 kvm:kvm_userspace_exit [99.99%] 2 kvm:kvm_set_irq [99.99%] 2 kvm:kvm_ioapic_set_irq [99.99%] 28,288 kvm:kvm_msi_set_irq [99.99%] 1 kvm:kvm_ack_irq [99.99%] 131 kvm:kvm_mmio [100.00%] 588 kvm:kvm_fpu [100.00%] 0 kvm:kvm_age_page [100.00%] 0 kvm:kvm_try_async_get_page [100.00%] 0 kvm:kvm_async_pf_doublefault [100.00%] 0 kvm:kvm_async_pf_not_present [100.00%] 0 kvm:kvm_async_pf_ready [100.00%] 0 kvm:kvm_async_pf_completed 1.002039622 seconds time elapsed We see that # of exits is almost halved. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm_para.h7
-rw-r--r--arch/x86/kernel/kvm.c57
2 files changed, 61 insertions, 3 deletions
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 63ab1661d00e..2f7712e08b1e 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -22,6 +22,7 @@
22#define KVM_FEATURE_CLOCKSOURCE2 3 22#define KVM_FEATURE_CLOCKSOURCE2 3
23#define KVM_FEATURE_ASYNC_PF 4 23#define KVM_FEATURE_ASYNC_PF 4
24#define KVM_FEATURE_STEAL_TIME 5 24#define KVM_FEATURE_STEAL_TIME 5
25#define KVM_FEATURE_PV_EOI 6
25 26
26/* The last 8 bits are used to indicate how to interpret the flags field 27/* The last 8 bits are used to indicate how to interpret the flags field
27 * in pvclock structure. If no bits are set, all flags are ignored. 28 * in pvclock structure. If no bits are set, all flags are ignored.
@@ -37,6 +38,7 @@
37#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 38#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
38#define MSR_KVM_ASYNC_PF_EN 0x4b564d02 39#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
39#define MSR_KVM_STEAL_TIME 0x4b564d03 40#define MSR_KVM_STEAL_TIME 0x4b564d03
41#define MSR_KVM_PV_EOI_EN 0x4b564d04
40 42
41struct kvm_steal_time { 43struct kvm_steal_time {
42 __u64 steal; 44 __u64 steal;
@@ -89,6 +91,11 @@ struct kvm_vcpu_pv_apf_data {
89 __u32 enabled; 91 __u32 enabled;
90}; 92};
91 93
94#define KVM_PV_EOI_BIT 0
95#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT)
96#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
97#define KVM_PV_EOI_DISABLED 0x0
98
92#ifdef __KERNEL__ 99#ifdef __KERNEL__
93#include <asm/processor.h> 100#include <asm/processor.h>
94 101
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index e554e5ad2fe8..75ab94c75c7a 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -39,6 +39,8 @@
39#include <asm/desc.h> 39#include <asm/desc.h>
40#include <asm/tlbflush.h> 40#include <asm/tlbflush.h>
41#include <asm/idle.h> 41#include <asm/idle.h>
42#include <asm/apic.h>
43#include <asm/apicdef.h>
42 44
43static int kvmapf = 1; 45static int kvmapf = 1;
44 46
@@ -283,6 +285,22 @@ static void kvm_register_steal_time(void)
283 cpu, __pa(st)); 285 cpu, __pa(st));
284} 286}
285 287
288static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
289
290static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
291{
292 /**
293 * This relies on __test_and_clear_bit to modify the memory
294 * in a way that is atomic with respect to the local CPU.
295 * The hypervisor only accesses this memory from the local CPU so
296 * there's no need for lock or memory barriers.
297 * An optimization barrier is implied in apic write.
298 */
299 if (__test_and_clear_bit(KVM_PV_EOI_BIT, &__get_cpu_var(kvm_apic_eoi)))
300 return;
301 apic->write(APIC_EOI, APIC_EOI_ACK);
302}
303
286void __cpuinit kvm_guest_cpu_init(void) 304void __cpuinit kvm_guest_cpu_init(void)
287{ 305{
288 if (!kvm_para_available()) 306 if (!kvm_para_available())
@@ -300,11 +318,20 @@ void __cpuinit kvm_guest_cpu_init(void)
300 smp_processor_id()); 318 smp_processor_id());
301 } 319 }
302 320
321 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
322 unsigned long pa;
323 /* Size alignment is implied but just to make it explicit. */
324 BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
325 __get_cpu_var(kvm_apic_eoi) = 0;
326 pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED;
327 wrmsrl(MSR_KVM_PV_EOI_EN, pa);
328 }
329
303 if (has_steal_clock) 330 if (has_steal_clock)
304 kvm_register_steal_time(); 331 kvm_register_steal_time();
305} 332}
306 333
307static void kvm_pv_disable_apf(void *unused) 334static void kvm_pv_disable_apf(void)
308{ 335{
309 if (!__get_cpu_var(apf_reason).enabled) 336 if (!__get_cpu_var(apf_reason).enabled)
310 return; 337 return;
@@ -316,11 +343,23 @@ static void kvm_pv_disable_apf(void *unused)
316 smp_processor_id()); 343 smp_processor_id());
317} 344}
318 345
346static void kvm_pv_guest_cpu_reboot(void *unused)
347{
348 /*
349 * We disable PV EOI before we load a new kernel by kexec,
350 * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
351 * New kernel can re-enable when it boots.
352 */
353 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
354 wrmsrl(MSR_KVM_PV_EOI_EN, 0);
355 kvm_pv_disable_apf();
356}
357
319static int kvm_pv_reboot_notify(struct notifier_block *nb, 358static int kvm_pv_reboot_notify(struct notifier_block *nb,
320 unsigned long code, void *unused) 359 unsigned long code, void *unused)
321{ 360{
322 if (code == SYS_RESTART) 361 if (code == SYS_RESTART)
323 on_each_cpu(kvm_pv_disable_apf, NULL, 1); 362 on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
324 return NOTIFY_DONE; 363 return NOTIFY_DONE;
325} 364}
326 365
@@ -371,7 +410,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy)
371static void kvm_guest_cpu_offline(void *dummy) 410static void kvm_guest_cpu_offline(void *dummy)
372{ 411{
373 kvm_disable_steal_time(); 412 kvm_disable_steal_time();
374 kvm_pv_disable_apf(NULL); 413 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
414 wrmsrl(MSR_KVM_PV_EOI_EN, 0);
415 kvm_pv_disable_apf();
375 apf_task_wake_all(); 416 apf_task_wake_all();
376} 417}
377 418
@@ -424,6 +465,16 @@ void __init kvm_guest_init(void)
424 pv_time_ops.steal_clock = kvm_steal_clock; 465 pv_time_ops.steal_clock = kvm_steal_clock;
425 } 466 }
426 467
468 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
469 struct apic **drv;
470
471 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
472 /* Should happen once for each apic */
473 WARN_ON((*drv)->eoi_write == kvm_guest_apic_eoi_write);
474 (*drv)->eoi_write = kvm_guest_apic_eoi_write;
475 }
476 }
477
427#ifdef CONFIG_SMP 478#ifdef CONFIG_SMP
428 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; 479 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
429 register_cpu_notifier(&kvm_cpu_notifier); 480 register_cpu_notifier(&kvm_cpu_notifier);