diff options
| -rw-r--r-- | arch/x86/Kconfig | 4 | ||||
| -rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/pvclock.c | 141 | ||||
| -rw-r--r-- | include/asm-x86/pvclock-abi.h | 42 | ||||
| -rw-r--r-- | include/asm-x86/pvclock.h | 13 |
5 files changed, 201 insertions, 0 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 52e18e6d2ba0..f94bca6ff47f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -410,6 +410,10 @@ config PARAVIRT | |||
| 410 | over full virtualization. However, when run without a hypervisor | 410 | over full virtualization. However, when run without a hypervisor |
| 411 | the kernel is theoretically slower and slightly larger. | 411 | the kernel is theoretically slower and slightly larger. |
| 412 | 412 | ||
| 413 | config PARAVIRT_CLOCK | ||
| 414 | bool | ||
| 415 | default n | ||
| 416 | |||
| 413 | endif | 417 | endif |
| 414 | 418 | ||
| 415 | config MEMTEST_BOOTPARAM | 419 | config MEMTEST_BOOTPARAM |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3b4720..77807d4769c9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
| @@ -82,6 +82,7 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o | |||
| 82 | obj-$(CONFIG_KVM_GUEST) += kvm.o | 82 | obj-$(CONFIG_KVM_GUEST) += kvm.o |
| 83 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | 83 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o |
| 84 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o | 84 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o |
| 85 | obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o | ||
| 85 | 86 | ||
| 86 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o | 87 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o |
| 87 | 88 | ||
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c new file mode 100644 index 000000000000..05fbe9a0325a --- /dev/null +++ b/arch/x86/kernel/pvclock.c | |||
| @@ -0,0 +1,141 @@ | |||
| 1 | /* paravirtual clock -- common code used by kvm/xen | ||
| 2 | |||
| 3 | This program is free software; you can redistribute it and/or modify | ||
| 4 | it under the terms of the GNU General Public License as published by | ||
| 5 | the Free Software Foundation; either version 2 of the License, or | ||
| 6 | (at your option) any later version. | ||
| 7 | |||
| 8 | This program is distributed in the hope that it will be useful, | ||
| 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | GNU General Public License for more details. | ||
| 12 | |||
| 13 | You should have received a copy of the GNU General Public License | ||
| 14 | along with this program; if not, write to the Free Software | ||
| 15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <linux/kernel.h> | ||
| 19 | #include <linux/percpu.h> | ||
| 20 | #include <asm/pvclock.h> | ||
| 21 | |||
| 22 | /* | ||
| 23 | * These are perodically updated | ||
| 24 | * xen: magic shared_info page | ||
| 25 | * kvm: gpa registered via msr | ||
| 26 | * and then copied here. | ||
| 27 | */ | ||
| 28 | struct pvclock_shadow_time { | ||
| 29 | u64 tsc_timestamp; /* TSC at last update of time vals. */ | ||
| 30 | u64 system_timestamp; /* Time, in nanosecs, since boot. */ | ||
| 31 | u32 tsc_to_nsec_mul; | ||
| 32 | int tsc_shift; | ||
| 33 | u32 version; | ||
| 34 | }; | ||
| 35 | |||
| 36 | /* | ||
| 37 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | ||
| 38 | * yielding a 64-bit result. | ||
| 39 | */ | ||
| 40 | static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) | ||
| 41 | { | ||
| 42 | u64 product; | ||
| 43 | #ifdef __i386__ | ||
| 44 | u32 tmp1, tmp2; | ||
| 45 | #endif | ||
| 46 | |||
| 47 | if (shift < 0) | ||
| 48 | delta >>= -shift; | ||
| 49 | else | ||
| 50 | delta <<= shift; | ||
| 51 | |||
| 52 | #ifdef __i386__ | ||
| 53 | __asm__ ( | ||
| 54 | "mul %5 ; " | ||
| 55 | "mov %4,%%eax ; " | ||
| 56 | "mov %%edx,%4 ; " | ||
| 57 | "mul %5 ; " | ||
| 58 | "xor %5,%5 ; " | ||
| 59 | "add %4,%%eax ; " | ||
| 60 | "adc %5,%%edx ; " | ||
| 61 | : "=A" (product), "=r" (tmp1), "=r" (tmp2) | ||
| 62 | : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); | ||
| 63 | #elif __x86_64__ | ||
| 64 | __asm__ ( | ||
| 65 | "mul %%rdx ; shrd $32,%%rdx,%%rax" | ||
| 66 | : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); | ||
| 67 | #else | ||
| 68 | #error implement me! | ||
| 69 | #endif | ||
| 70 | |||
| 71 | return product; | ||
| 72 | } | ||
| 73 | |||
| 74 | static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) | ||
| 75 | { | ||
| 76 | u64 delta = native_read_tsc() - shadow->tsc_timestamp; | ||
| 77 | return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); | ||
| 78 | } | ||
| 79 | |||
| 80 | /* | ||
| 81 | * Reads a consistent set of time-base values from hypervisor, | ||
| 82 | * into a shadow data area. | ||
| 83 | */ | ||
| 84 | static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, | ||
| 85 | struct pvclock_vcpu_time_info *src) | ||
| 86 | { | ||
| 87 | do { | ||
| 88 | dst->version = src->version; | ||
| 89 | rmb(); /* fetch version before data */ | ||
| 90 | dst->tsc_timestamp = src->tsc_timestamp; | ||
| 91 | dst->system_timestamp = src->system_time; | ||
| 92 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | ||
| 93 | dst->tsc_shift = src->tsc_shift; | ||
| 94 | rmb(); /* test version after fetching data */ | ||
| 95 | } while ((src->version & 1) || (dst->version != src->version)); | ||
| 96 | |||
| 97 | return dst->version; | ||
| 98 | } | ||
| 99 | |||
| 100 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | ||
| 101 | { | ||
| 102 | struct pvclock_shadow_time shadow; | ||
| 103 | unsigned version; | ||
| 104 | cycle_t ret, offset; | ||
| 105 | |||
| 106 | do { | ||
| 107 | version = pvclock_get_time_values(&shadow, src); | ||
| 108 | barrier(); | ||
| 109 | offset = pvclock_get_nsec_offset(&shadow); | ||
| 110 | ret = shadow.system_timestamp + offset; | ||
| 111 | barrier(); | ||
| 112 | } while (version != src->version); | ||
| 113 | |||
| 114 | return ret; | ||
| 115 | } | ||
| 116 | |||
| 117 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, | ||
| 118 | struct pvclock_vcpu_time_info *vcpu_time, | ||
| 119 | struct timespec *ts) | ||
| 120 | { | ||
| 121 | u32 version; | ||
| 122 | u64 delta; | ||
| 123 | struct timespec now; | ||
| 124 | |||
| 125 | /* get wallclock at system boot */ | ||
| 126 | do { | ||
| 127 | version = wall_clock->version; | ||
| 128 | rmb(); /* fetch version before time */ | ||
| 129 | now.tv_sec = wall_clock->sec; | ||
| 130 | now.tv_nsec = wall_clock->nsec; | ||
| 131 | rmb(); /* fetch time before checking version */ | ||
| 132 | } while ((wall_clock->version & 1) || (version != wall_clock->version)); | ||
| 133 | |||
| 134 | delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */ | ||
| 135 | delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; | ||
| 136 | |||
| 137 | now.tv_nsec = do_div(delta, NSEC_PER_SEC); | ||
| 138 | now.tv_sec = delta; | ||
| 139 | |||
| 140 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | ||
| 141 | } | ||
diff --git a/include/asm-x86/pvclock-abi.h b/include/asm-x86/pvclock-abi.h new file mode 100644 index 000000000000..6857f840b243 --- /dev/null +++ b/include/asm-x86/pvclock-abi.h | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | #ifndef _ASM_X86_PVCLOCK_ABI_H_ | ||
| 2 | #define _ASM_X86_PVCLOCK_ABI_H_ | ||
| 3 | #ifndef __ASSEMBLY__ | ||
| 4 | |||
| 5 | /* | ||
| 6 | * These structs MUST NOT be changed. | ||
| 7 | * They are the ABI between hypervisor and guest OS. | ||
| 8 | * Both Xen and KVM are using this. | ||
| 9 | * | ||
| 10 | * pvclock_vcpu_time_info holds the system time and the tsc timestamp | ||
| 11 | * of the last update. So the guest can use the tsc delta to get a | ||
| 12 | * more precise system time. There is one per virtual cpu. | ||
| 13 | * | ||
| 14 | * pvclock_wall_clock references the point in time when the system | ||
| 15 | * time was zero (usually boot time), thus the guest calculates the | ||
| 16 | * current wall clock by adding the system time. | ||
| 17 | * | ||
| 18 | * Protocol for the "version" fields is: hypervisor raises it (making | ||
| 19 | * it uneven) before it starts updating the fields and raises it again | ||
| 20 | * (making it even) when it is done. Thus the guest can make sure the | ||
| 21 | * time values it got are consistent by checking the version before | ||
| 22 | * and after reading them. | ||
| 23 | */ | ||
| 24 | |||
| 25 | struct pvclock_vcpu_time_info { | ||
| 26 | u32 version; | ||
| 27 | u32 pad0; | ||
| 28 | u64 tsc_timestamp; | ||
| 29 | u64 system_time; | ||
| 30 | u32 tsc_to_system_mul; | ||
| 31 | s8 tsc_shift; | ||
| 32 | u8 pad[3]; | ||
| 33 | } __attribute__((__packed__)); /* 32 bytes */ | ||
| 34 | |||
| 35 | struct pvclock_wall_clock { | ||
| 36 | u32 version; | ||
| 37 | u32 sec; | ||
| 38 | u32 nsec; | ||
| 39 | } __attribute__((__packed__)); | ||
| 40 | |||
| 41 | #endif /* __ASSEMBLY__ */ | ||
| 42 | #endif /* _ASM_X86_PVCLOCK_ABI_H_ */ | ||
diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h new file mode 100644 index 000000000000..85b1bba8e0a3 --- /dev/null +++ b/include/asm-x86/pvclock.h | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | #ifndef _ASM_X86_PVCLOCK_H_ | ||
| 2 | #define _ASM_X86_PVCLOCK_H_ | ||
| 3 | |||
| 4 | #include <linux/clocksource.h> | ||
| 5 | #include <asm/pvclock-abi.h> | ||
| 6 | |||
| 7 | /* some helper functions for xen and kvm pv clock sources */ | ||
| 8 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); | ||
| 9 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall, | ||
| 10 | struct pvclock_vcpu_time_info *vcpu, | ||
| 11 | struct timespec *ts); | ||
| 12 | |||
| 13 | #endif /* _ASM_X86_PVCLOCK_H_ */ | ||
