diff options
author | Radim Krčmář <rkrcmar@redhat.com> | 2015-09-18 11:54:29 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2015-10-01 09:06:42 -0400 |
commit | 72c930dcfc2b49404ee9e20f6c868402e9c71166 (patch) | |
tree | 83ff98a7c359fef9f294bbca18529bca8caf7a27 | |
parent | 1cea0ce68ed76490ffa64a9e2a7a40104efe9352 (diff) |
x86: kvmclock: abolish PVCLOCK_COUNTS_FROM_ZERO
Newer KVM won't be exposing PVCLOCK_COUNTS_FROM_ZERO anymore.
The purpose of that flags was to start counting system time from 0 when
the KVM clock has been initialized.
We can achieve the same by selecting one read as the initial point.
A simple subtraction will work unless the KVM clock count overflows
earlier (has smaller width) than scheduler's cycle count. We should be
safe till x86_128.
Because PVCLOCK_COUNTS_FROM_ZERO was enabled only on new hypervisors,
setting sched clock as stable based on PVCLOCK_TSC_STABLE_BIT might
regress on older ones.
I presume we don't need to change kvm_clock_read instead of introducing
kvm_sched_clock_read. A problem could arise in case sched_clock is
expected to return the same value as get_cycles, but we should have
merged those clocks in that case.
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Acked-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r-- | arch/x86/kernel/kvmclock.c | 46 |
1 files changed, 35 insertions, 11 deletions
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 2c7aafa70702..2bd81e302427 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -32,6 +32,7 @@ | |||
32 | static int kvmclock = 1; | 32 | static int kvmclock = 1; |
33 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | 33 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; |
34 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; | 34 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; |
35 | static cycle_t kvm_sched_clock_offset; | ||
35 | 36 | ||
36 | static int parse_no_kvmclock(char *arg) | 37 | static int parse_no_kvmclock(char *arg) |
37 | { | 38 | { |
@@ -92,6 +93,29 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs) | |||
92 | return kvm_clock_read(); | 93 | return kvm_clock_read(); |
93 | } | 94 | } |
94 | 95 | ||
96 | static cycle_t kvm_sched_clock_read(void) | ||
97 | { | ||
98 | return kvm_clock_read() - kvm_sched_clock_offset; | ||
99 | } | ||
100 | |||
101 | static inline void kvm_sched_clock_init(bool stable) | ||
102 | { | ||
103 | if (!stable) { | ||
104 | pv_time_ops.sched_clock = kvm_clock_read; | ||
105 | return; | ||
106 | } | ||
107 | |||
108 | kvm_sched_clock_offset = kvm_clock_read(); | ||
109 | pv_time_ops.sched_clock = kvm_sched_clock_read; | ||
110 | set_sched_clock_stable(); | ||
111 | |||
112 | printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n", | ||
113 | kvm_sched_clock_offset); | ||
114 | |||
115 | BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) > | ||
116 | sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time)); | ||
117 | } | ||
118 | |||
95 | /* | 119 | /* |
96 | * If we don't do that, there is the possibility that the guest | 120 | * If we don't do that, there is the possibility that the guest |
97 | * will calibrate under heavy load - thus, getting a lower lpj - | 121 | * will calibrate under heavy load - thus, getting a lower lpj - |
@@ -248,7 +272,17 @@ void __init kvmclock_init(void) | |||
248 | memblock_free(mem, size); | 272 | memblock_free(mem, size); |
249 | return; | 273 | return; |
250 | } | 274 | } |
251 | pv_time_ops.sched_clock = kvm_clock_read; | 275 | |
276 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) | ||
277 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); | ||
278 | |||
279 | cpu = get_cpu(); | ||
280 | vcpu_time = &hv_clock[cpu].pvti; | ||
281 | flags = pvclock_read_flags(vcpu_time); | ||
282 | |||
283 | kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT); | ||
284 | put_cpu(); | ||
285 | |||
252 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | 286 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; |
253 | x86_platform.get_wallclock = kvm_get_wallclock; | 287 | x86_platform.get_wallclock = kvm_get_wallclock; |
254 | x86_platform.set_wallclock = kvm_set_wallclock; | 288 | x86_platform.set_wallclock = kvm_set_wallclock; |
@@ -265,16 +299,6 @@ void __init kvmclock_init(void) | |||
265 | kvm_get_preset_lpj(); | 299 | kvm_get_preset_lpj(); |
266 | clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); | 300 | clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); |
267 | pv_info.name = "KVM"; | 301 | pv_info.name = "KVM"; |
268 | |||
269 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) | ||
270 | pvclock_set_flags(~0); | ||
271 | |||
272 | cpu = get_cpu(); | ||
273 | vcpu_time = &hv_clock[cpu].pvti; | ||
274 | flags = pvclock_read_flags(vcpu_time); | ||
275 | if (flags & PVCLOCK_COUNTS_FROM_ZERO) | ||
276 | set_sched_clock_stable(); | ||
277 | put_cpu(); | ||
278 | } | 302 | } |
279 | 303 | ||
280 | int __init kvm_setup_vsyscall_timeinfo(void) | 304 | int __init kvm_setup_vsyscall_timeinfo(void) |