diff options
author | Glauber de Oliveira Costa <gcosta@redhat.com> | 2008-02-15 14:52:47 -0500 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2008-04-27 04:53:22 -0400 |
commit | 18068523d3a0b41fcee5b53cdb437a0ab4d65e4b (patch) | |
tree | cbedbd4371cb57c34728137988a7433ae736d79a /arch/x86/kvm/x86.c | |
parent | 24e09cbf480a72f9c952af4ca77b159503dca44b (diff) |
KVM: paravirtualized clocksource: host part
This is the host part of kvm clocksource implementation. As it does
not include clockevents, it is a fairly simple implementation. We
only have to register a per-vcpu area, and start writing to it periodically.
The area is binary compatible with xen, as we use the same shadow_info
structure.
[marcelo: fix bad_page on MSR_KVM_SYSTEM_TIME]
[avi: save full value of the msr, even if enable bit is clear]
[avi: clear previous value of time_page]
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 113 |
1 files changed, 112 insertions, 1 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c910c774a9b..256c0fc92b67 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include "irq.h" | 19 | #include "irq.h" |
20 | #include "mmu.h" | 20 | #include "mmu.h" |
21 | 21 | ||
22 | #include <linux/clocksource.h> | ||
22 | #include <linux/kvm.h> | 23 | #include <linux/kvm.h> |
23 | #include <linux/fs.h> | 24 | #include <linux/fs.h> |
24 | #include <linux/vmalloc.h> | 25 | #include <linux/vmalloc.h> |
@@ -424,7 +425,7 @@ static u32 msrs_to_save[] = { | |||
424 | #ifdef CONFIG_X86_64 | 425 | #ifdef CONFIG_X86_64 |
425 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | 426 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, |
426 | #endif | 427 | #endif |
427 | MSR_IA32_TIME_STAMP_COUNTER, | 428 | MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
428 | }; | 429 | }; |
429 | 430 | ||
430 | static unsigned num_msrs_to_save; | 431 | static unsigned num_msrs_to_save; |
@@ -482,6 +483,70 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) | |||
482 | return kvm_set_msr(vcpu, index, *data); | 483 | return kvm_set_msr(vcpu, index, *data); |
483 | } | 484 | } |
484 | 485 | ||
486 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | ||
487 | { | ||
488 | static int version; | ||
489 | struct kvm_wall_clock wc; | ||
490 | struct timespec wc_ts; | ||
491 | |||
492 | if (!wall_clock) | ||
493 | return; | ||
494 | |||
495 | version++; | ||
496 | |||
497 | down_read(&kvm->slots_lock); | ||
498 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); | ||
499 | |||
500 | wc_ts = current_kernel_time(); | ||
501 | wc.wc_sec = wc_ts.tv_sec; | ||
502 | wc.wc_nsec = wc_ts.tv_nsec; | ||
503 | wc.wc_version = version; | ||
504 | |||
505 | kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); | ||
506 | |||
507 | version++; | ||
508 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); | ||
509 | up_read(&kvm->slots_lock); | ||
510 | } | ||
511 | |||
512 | static void kvm_write_guest_time(struct kvm_vcpu *v) | ||
513 | { | ||
514 | struct timespec ts; | ||
515 | unsigned long flags; | ||
516 | struct kvm_vcpu_arch *vcpu = &v->arch; | ||
517 | void *shared_kaddr; | ||
518 | |||
519 | if ((!vcpu->time_page)) | ||
520 | return; | ||
521 | |||
522 | /* Keep irq disabled to prevent changes to the clock */ | ||
523 | local_irq_save(flags); | ||
524 | kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, | ||
525 | &vcpu->hv_clock.tsc_timestamp); | ||
526 | ktime_get_ts(&ts); | ||
527 | local_irq_restore(flags); | ||
528 | |||
529 | /* With all the info we got, fill in the values */ | ||
530 | |||
531 | vcpu->hv_clock.system_time = ts.tv_nsec + | ||
532 | (NSEC_PER_SEC * (u64)ts.tv_sec); | ||
533 | /* | ||
534 | * The interface expects us to write an even number signaling that the | ||
535 | * update is finished. Since the guest won't see the intermediate | ||
536 | * state, we just write "2" at the end | ||
537 | */ | ||
538 | vcpu->hv_clock.version = 2; | ||
539 | |||
540 | shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); | ||
541 | |||
542 | memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, | ||
543 | sizeof(vcpu->hv_clock)); | ||
544 | |||
545 | kunmap_atomic(shared_kaddr, KM_USER0); | ||
546 | |||
547 | mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); | ||
548 | } | ||
549 | |||
485 | 550 | ||
486 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 551 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
487 | { | 552 | { |
@@ -511,6 +576,44 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
511 | case MSR_IA32_MISC_ENABLE: | 576 | case MSR_IA32_MISC_ENABLE: |
512 | vcpu->arch.ia32_misc_enable_msr = data; | 577 | vcpu->arch.ia32_misc_enable_msr = data; |
513 | break; | 578 | break; |
579 | case MSR_KVM_WALL_CLOCK: | ||
580 | vcpu->kvm->arch.wall_clock = data; | ||
581 | kvm_write_wall_clock(vcpu->kvm, data); | ||
582 | break; | ||
583 | case MSR_KVM_SYSTEM_TIME: { | ||
584 | if (vcpu->arch.time_page) { | ||
585 | kvm_release_page_dirty(vcpu->arch.time_page); | ||
586 | vcpu->arch.time_page = NULL; | ||
587 | } | ||
588 | |||
589 | vcpu->arch.time = data; | ||
590 | |||
591 | /* we verify if the enable bit is set... */ | ||
592 | if (!(data & 1)) | ||
593 | break; | ||
594 | |||
595 | /* ...but clean it before doing the actual write */ | ||
596 | vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); | ||
597 | |||
598 | vcpu->arch.hv_clock.tsc_to_system_mul = | ||
599 | clocksource_khz2mult(tsc_khz, 22); | ||
600 | vcpu->arch.hv_clock.tsc_shift = 22; | ||
601 | |||
602 | down_read(¤t->mm->mmap_sem); | ||
603 | down_read(&vcpu->kvm->slots_lock); | ||
604 | vcpu->arch.time_page = | ||
605 | gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); | ||
606 | up_read(&vcpu->kvm->slots_lock); | ||
607 | up_read(¤t->mm->mmap_sem); | ||
608 | |||
609 | if (is_error_page(vcpu->arch.time_page)) { | ||
610 | kvm_release_page_clean(vcpu->arch.time_page); | ||
611 | vcpu->arch.time_page = NULL; | ||
612 | } | ||
613 | |||
614 | kvm_write_guest_time(vcpu); | ||
615 | break; | ||
616 | } | ||
514 | default: | 617 | default: |
515 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); | 618 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); |
516 | return 1; | 619 | return 1; |
@@ -569,6 +672,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
569 | case MSR_EFER: | 672 | case MSR_EFER: |
570 | data = vcpu->arch.shadow_efer; | 673 | data = vcpu->arch.shadow_efer; |
571 | break; | 674 | break; |
675 | case MSR_KVM_WALL_CLOCK: | ||
676 | data = vcpu->kvm->arch.wall_clock; | ||
677 | break; | ||
678 | case MSR_KVM_SYSTEM_TIME: | ||
679 | data = vcpu->arch.time; | ||
680 | break; | ||
572 | default: | 681 | default: |
573 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 682 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
574 | return 1; | 683 | return 1; |
@@ -696,6 +805,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
696 | case KVM_CAP_USER_MEMORY: | 805 | case KVM_CAP_USER_MEMORY: |
697 | case KVM_CAP_SET_TSS_ADDR: | 806 | case KVM_CAP_SET_TSS_ADDR: |
698 | case KVM_CAP_EXT_CPUID: | 807 | case KVM_CAP_EXT_CPUID: |
808 | case KVM_CAP_CLOCKSOURCE: | ||
699 | r = 1; | 809 | r = 1; |
700 | break; | 810 | break; |
701 | case KVM_CAP_VAPIC: | 811 | case KVM_CAP_VAPIC: |
@@ -771,6 +881,7 @@ out: | |||
771 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 881 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
772 | { | 882 | { |
773 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 883 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
884 | kvm_write_guest_time(vcpu); | ||
774 | } | 885 | } |
775 | 886 | ||
776 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 887 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |