diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/kvmclock.c | 56 | ||||
-rw-r--r-- | arch/x86/kernel/pvclock.c | 37 | ||||
-rw-r--r-- | arch/x86/kernel/tboot.c | 1 |
3 files changed, 73 insertions, 21 deletions
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index feaeb0d3aa4f..eb9b76c716c2 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -29,6 +29,8 @@ | |||
29 | #define KVM_SCALE 22 | 29 | #define KVM_SCALE 22 |
30 | 30 | ||
31 | static int kvmclock = 1; | 31 | static int kvmclock = 1; |
32 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | ||
33 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; | ||
32 | 34 | ||
33 | static int parse_no_kvmclock(char *arg) | 35 | static int parse_no_kvmclock(char *arg) |
34 | { | 36 | { |
@@ -54,7 +56,8 @@ static unsigned long kvm_get_wallclock(void) | |||
54 | 56 | ||
55 | low = (int)__pa_symbol(&wall_clock); | 57 | low = (int)__pa_symbol(&wall_clock); |
56 | high = ((u64)__pa_symbol(&wall_clock) >> 32); | 58 | high = ((u64)__pa_symbol(&wall_clock) >> 32); |
57 | native_write_msr(MSR_KVM_WALL_CLOCK, low, high); | 59 | |
60 | native_write_msr(msr_kvm_wall_clock, low, high); | ||
58 | 61 | ||
59 | vcpu_time = &get_cpu_var(hv_clock); | 62 | vcpu_time = &get_cpu_var(hv_clock); |
60 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); | 63 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); |
@@ -130,7 +133,8 @@ static int kvm_register_clock(char *txt) | |||
130 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); | 133 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); |
131 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", | 134 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", |
132 | cpu, high, low, txt); | 135 | cpu, high, low, txt); |
133 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); | 136 | |
137 | return native_write_msr_safe(msr_kvm_system_time, low, high); | ||
134 | } | 138 | } |
135 | 139 | ||
136 | #ifdef CONFIG_X86_LOCAL_APIC | 140 | #ifdef CONFIG_X86_LOCAL_APIC |
@@ -165,14 +169,14 @@ static void __init kvm_smp_prepare_boot_cpu(void) | |||
165 | #ifdef CONFIG_KEXEC | 169 | #ifdef CONFIG_KEXEC |
166 | static void kvm_crash_shutdown(struct pt_regs *regs) | 170 | static void kvm_crash_shutdown(struct pt_regs *regs) |
167 | { | 171 | { |
168 | native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); | 172 | native_write_msr(msr_kvm_system_time, 0, 0); |
169 | native_machine_crash_shutdown(regs); | 173 | native_machine_crash_shutdown(regs); |
170 | } | 174 | } |
171 | #endif | 175 | #endif |
172 | 176 | ||
173 | static void kvm_shutdown(void) | 177 | static void kvm_shutdown(void) |
174 | { | 178 | { |
175 | native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); | 179 | native_write_msr(msr_kvm_system_time, 0, 0); |
176 | native_machine_shutdown(); | 180 | native_machine_shutdown(); |
177 | } | 181 | } |
178 | 182 | ||
@@ -181,27 +185,37 @@ void __init kvmclock_init(void) | |||
181 | if (!kvm_para_available()) | 185 | if (!kvm_para_available()) |
182 | return; | 186 | return; |
183 | 187 | ||
184 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { | 188 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) { |
185 | if (kvm_register_clock("boot clock")) | 189 | msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW; |
186 | return; | 190 | msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW; |
187 | pv_time_ops.sched_clock = kvm_clock_read; | 191 | } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))) |
188 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | 192 | return; |
189 | x86_platform.get_wallclock = kvm_get_wallclock; | 193 | |
190 | x86_platform.set_wallclock = kvm_set_wallclock; | 194 | printk(KERN_INFO "kvm-clock: Using msrs %x and %x", |
195 | msr_kvm_system_time, msr_kvm_wall_clock); | ||
196 | |||
197 | if (kvm_register_clock("boot clock")) | ||
198 | return; | ||
199 | pv_time_ops.sched_clock = kvm_clock_read; | ||
200 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | ||
201 | x86_platform.get_wallclock = kvm_get_wallclock; | ||
202 | x86_platform.set_wallclock = kvm_set_wallclock; | ||
191 | #ifdef CONFIG_X86_LOCAL_APIC | 203 | #ifdef CONFIG_X86_LOCAL_APIC |
192 | x86_cpuinit.setup_percpu_clockev = | 204 | x86_cpuinit.setup_percpu_clockev = |
193 | kvm_setup_secondary_clock; | 205 | kvm_setup_secondary_clock; |
194 | #endif | 206 | #endif |
195 | #ifdef CONFIG_SMP | 207 | #ifdef CONFIG_SMP |
196 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 208 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
197 | #endif | 209 | #endif |
198 | machine_ops.shutdown = kvm_shutdown; | 210 | machine_ops.shutdown = kvm_shutdown; |
199 | #ifdef CONFIG_KEXEC | 211 | #ifdef CONFIG_KEXEC |
200 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 212 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
201 | #endif | 213 | #endif |
202 | kvm_get_preset_lpj(); | 214 | kvm_get_preset_lpj(); |
203 | clocksource_register(&kvm_clock); | 215 | clocksource_register(&kvm_clock); |
204 | pv_info.paravirt_enabled = 1; | 216 | pv_info.paravirt_enabled = 1; |
205 | pv_info.name = "KVM"; | 217 | pv_info.name = "KVM"; |
206 | } | 218 | |
219 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) | ||
220 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); | ||
207 | } | 221 | } |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 03801f2f761f..239427ca02af 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
@@ -31,8 +31,16 @@ struct pvclock_shadow_time { | |||
31 | u32 tsc_to_nsec_mul; | 31 | u32 tsc_to_nsec_mul; |
32 | int tsc_shift; | 32 | int tsc_shift; |
33 | u32 version; | 33 | u32 version; |
34 | u8 flags; | ||
34 | }; | 35 | }; |
35 | 36 | ||
37 | static u8 valid_flags __read_mostly = 0; | ||
38 | |||
39 | void pvclock_set_flags(u8 flags) | ||
40 | { | ||
41 | valid_flags = flags; | ||
42 | } | ||
43 | |||
36 | /* | 44 | /* |
37 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | 45 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, |
38 | * yielding a 64-bit result. | 46 | * yielding a 64-bit result. |
@@ -91,6 +99,7 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, | |||
91 | dst->system_timestamp = src->system_time; | 99 | dst->system_timestamp = src->system_time; |
92 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | 100 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; |
93 | dst->tsc_shift = src->tsc_shift; | 101 | dst->tsc_shift = src->tsc_shift; |
102 | dst->flags = src->flags; | ||
94 | rmb(); /* test version after fetching data */ | 103 | rmb(); /* test version after fetching data */ |
95 | } while ((src->version & 1) || (dst->version != src->version)); | 104 | } while ((src->version & 1) || (dst->version != src->version)); |
96 | 105 | ||
@@ -109,11 +118,14 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) | |||
109 | return pv_tsc_khz; | 118 | return pv_tsc_khz; |
110 | } | 119 | } |
111 | 120 | ||
121 | static atomic64_t last_value = ATOMIC64_INIT(0); | ||
122 | |||
112 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | 123 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) |
113 | { | 124 | { |
114 | struct pvclock_shadow_time shadow; | 125 | struct pvclock_shadow_time shadow; |
115 | unsigned version; | 126 | unsigned version; |
116 | cycle_t ret, offset; | 127 | cycle_t ret, offset; |
128 | u64 last; | ||
117 | 129 | ||
118 | do { | 130 | do { |
119 | version = pvclock_get_time_values(&shadow, src); | 131 | version = pvclock_get_time_values(&shadow, src); |
@@ -123,6 +135,31 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | |||
123 | barrier(); | 135 | barrier(); |
124 | } while (version != src->version); | 136 | } while (version != src->version); |
125 | 137 | ||
138 | if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && | ||
139 | (shadow.flags & PVCLOCK_TSC_STABLE_BIT)) | ||
140 | return ret; | ||
141 | |||
142 | /* | ||
143 | * Assumption here is that last_value, a global accumulator, always goes | ||
144 | * forward. If we are less than that, we should not be much smaller. | ||
145 | * We assume there is an error marging we're inside, and then the correction | ||
146 | * does not sacrifice accuracy. | ||
147 | * | ||
148 | * For reads: global may have changed between test and return, | ||
149 | * but this means someone else updated poked the clock at a later time. | ||
150 | * We just need to make sure we are not seeing a backwards event. | ||
151 | * | ||
152 | * For updates: last_value = ret is not enough, since two vcpus could be | ||
153 | * updating at the same time, and one of them could be slightly behind, | ||
154 | * making the assumption that last_value always go forward fail to hold. | ||
155 | */ | ||
156 | last = atomic64_read(&last_value); | ||
157 | do { | ||
158 | if (ret < last) | ||
159 | return last; | ||
160 | last = atomic64_cmpxchg(&last_value, last, ret); | ||
161 | } while (unlikely(last != ret)); | ||
162 | |||
126 | return ret; | 163 | return ret; |
127 | } | 164 | } |
128 | 165 | ||
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index cc2c60474fd0..c2f1b26141e2 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
@@ -46,6 +46,7 @@ | |||
46 | 46 | ||
47 | /* Global pointer to shared data; NULL means no measured launch. */ | 47 | /* Global pointer to shared data; NULL means no measured launch. */ |
48 | struct tboot *tboot __read_mostly; | 48 | struct tboot *tboot __read_mostly; |
49 | EXPORT_SYMBOL(tboot); | ||
49 | 50 | ||
50 | /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ | 51 | /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ |
51 | #define AP_WAIT_TIMEOUT 1 | 52 | #define AP_WAIT_TIMEOUT 1 |