aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/kvm_para.h7
-rw-r--r--arch/x86/kernel/kvm.c57
2 files changed, 61 insertions, 3 deletions
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 63ab1661d00e..2f7712e08b1e 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -22,6 +22,7 @@
22#define KVM_FEATURE_CLOCKSOURCE2 3 22#define KVM_FEATURE_CLOCKSOURCE2 3
23#define KVM_FEATURE_ASYNC_PF 4 23#define KVM_FEATURE_ASYNC_PF 4
24#define KVM_FEATURE_STEAL_TIME 5 24#define KVM_FEATURE_STEAL_TIME 5
25#define KVM_FEATURE_PV_EOI 6
25 26
26/* The last 8 bits are used to indicate how to interpret the flags field 27/* The last 8 bits are used to indicate how to interpret the flags field
27 * in pvclock structure. If no bits are set, all flags are ignored. 28 * in pvclock structure. If no bits are set, all flags are ignored.
@@ -37,6 +38,7 @@
37#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 38#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
38#define MSR_KVM_ASYNC_PF_EN 0x4b564d02 39#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
39#define MSR_KVM_STEAL_TIME 0x4b564d03 40#define MSR_KVM_STEAL_TIME 0x4b564d03
41#define MSR_KVM_PV_EOI_EN 0x4b564d04
40 42
41struct kvm_steal_time { 43struct kvm_steal_time {
42 __u64 steal; 44 __u64 steal;
@@ -89,6 +91,11 @@ struct kvm_vcpu_pv_apf_data {
89 __u32 enabled; 91 __u32 enabled;
90}; 92};
91 93
94#define KVM_PV_EOI_BIT 0
95#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT)
96#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
97#define KVM_PV_EOI_DISABLED 0x0
98
92#ifdef __KERNEL__ 99#ifdef __KERNEL__
93#include <asm/processor.h> 100#include <asm/processor.h>
94 101
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index e554e5ad2fe8..75ab94c75c7a 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -39,6 +39,8 @@
39#include <asm/desc.h> 39#include <asm/desc.h>
40#include <asm/tlbflush.h> 40#include <asm/tlbflush.h>
41#include <asm/idle.h> 41#include <asm/idle.h>
42#include <asm/apic.h>
43#include <asm/apicdef.h>
42 44
43static int kvmapf = 1; 45static int kvmapf = 1;
44 46
@@ -283,6 +285,22 @@ static void kvm_register_steal_time(void)
283 cpu, __pa(st)); 285 cpu, __pa(st));
284} 286}
285 287
288static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
289
290static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
291{
292 /**
293 * This relies on __test_and_clear_bit to modify the memory
294 * in a way that is atomic with respect to the local CPU.
295 * The hypervisor only accesses this memory from the local CPU so
296 * there's no need for lock or memory barriers.
297 * An optimization barrier is implied in apic write.
298 */
299 if (__test_and_clear_bit(KVM_PV_EOI_BIT, &__get_cpu_var(kvm_apic_eoi)))
300 return;
301 apic->write(APIC_EOI, APIC_EOI_ACK);
302}
303
286void __cpuinit kvm_guest_cpu_init(void) 304void __cpuinit kvm_guest_cpu_init(void)
287{ 305{
288 if (!kvm_para_available()) 306 if (!kvm_para_available())
@@ -300,11 +318,20 @@ void __cpuinit kvm_guest_cpu_init(void)
300 smp_processor_id()); 318 smp_processor_id());
301 } 319 }
302 320
321 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
322 unsigned long pa;
323 /* Size alignment is implied but just to make it explicit. */
324 BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
325 __get_cpu_var(kvm_apic_eoi) = 0;
326 pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED;
327 wrmsrl(MSR_KVM_PV_EOI_EN, pa);
328 }
329
303 if (has_steal_clock) 330 if (has_steal_clock)
304 kvm_register_steal_time(); 331 kvm_register_steal_time();
305} 332}
306 333
307static void kvm_pv_disable_apf(void *unused) 334static void kvm_pv_disable_apf(void)
308{ 335{
309 if (!__get_cpu_var(apf_reason).enabled) 336 if (!__get_cpu_var(apf_reason).enabled)
310 return; 337 return;
@@ -316,11 +343,23 @@ static void kvm_pv_disable_apf(void *unused)
316 smp_processor_id()); 343 smp_processor_id());
317} 344}
318 345
346static void kvm_pv_guest_cpu_reboot(void *unused)
347{
348 /*
349 * We disable PV EOI before we load a new kernel by kexec,
350 * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
351 * New kernel can re-enable when it boots.
352 */
353 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
354 wrmsrl(MSR_KVM_PV_EOI_EN, 0);
355 kvm_pv_disable_apf();
356}
357
319static int kvm_pv_reboot_notify(struct notifier_block *nb, 358static int kvm_pv_reboot_notify(struct notifier_block *nb,
320 unsigned long code, void *unused) 359 unsigned long code, void *unused)
321{ 360{
322 if (code == SYS_RESTART) 361 if (code == SYS_RESTART)
323 on_each_cpu(kvm_pv_disable_apf, NULL, 1); 362 on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
324 return NOTIFY_DONE; 363 return NOTIFY_DONE;
325} 364}
326 365
@@ -371,7 +410,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy)
371static void kvm_guest_cpu_offline(void *dummy) 410static void kvm_guest_cpu_offline(void *dummy)
372{ 411{
373 kvm_disable_steal_time(); 412 kvm_disable_steal_time();
374 kvm_pv_disable_apf(NULL); 413 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
414 wrmsrl(MSR_KVM_PV_EOI_EN, 0);
415 kvm_pv_disable_apf();
375 apf_task_wake_all(); 416 apf_task_wake_all();
376} 417}
377 418
@@ -424,6 +465,16 @@ void __init kvm_guest_init(void)
424 pv_time_ops.steal_clock = kvm_steal_clock; 465 pv_time_ops.steal_clock = kvm_steal_clock;
425 } 466 }
426 467
468 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
469 struct apic **drv;
470
471 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
472 /* Should happen once for each apic */
473 WARN_ON((*drv)->eoi_write == kvm_guest_apic_eoi_write);
474 (*drv)->eoi_write = kvm_guest_apic_eoi_write;
475 }
476 }
477
427#ifdef CONFIG_SMP 478#ifdef CONFIG_SMP
428 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; 479 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
429 register_cpu_notifier(&kvm_cpu_notifier); 480 register_cpu_notifier(&kvm_cpu_notifier);