aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRadim Krčmář <rkrcmar@redhat.com>2015-09-18 11:54:29 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2015-10-01 09:06:42 -0400
commit72c930dcfc2b49404ee9e20f6c868402e9c71166 (patch)
tree83ff98a7c359fef9f294bbca18529bca8caf7a27
parent1cea0ce68ed76490ffa64a9e2a7a40104efe9352 (diff)
x86: kvmclock: abolish PVCLOCK_COUNTS_FROM_ZERO
Newer KVM won't be exposing PVCLOCK_COUNTS_FROM_ZERO anymore. The purpose of that flags was to start counting system time from 0 when the KVM clock has been initialized. We can achieve the same by selecting one read as the initial point. A simple subtraction will work unless the KVM clock count overflows earlier (has smaller width) than scheduler's cycle count. We should be safe till x86_128. Because PVCLOCK_COUNTS_FROM_ZERO was enabled only on new hypervisors, setting sched clock as stable based on PVCLOCK_TSC_STABLE_BIT might regress on older ones. I presume we don't need to change kvm_clock_read instead of introducing kvm_sched_clock_read. A problem could arise in case sched_clock is expected to return the same value as get_cycles, but we should have merged those clocks in that case. Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> Acked-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--arch/x86/kernel/kvmclock.c46
1 files changed, 35 insertions, 11 deletions
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 2c7aafa70702..2bd81e302427 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -32,6 +32,7 @@
32static int kvmclock = 1; 32static int kvmclock = 1;
33static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; 33static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
34static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; 34static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
35static cycle_t kvm_sched_clock_offset;
35 36
36static int parse_no_kvmclock(char *arg) 37static int parse_no_kvmclock(char *arg)
37{ 38{
@@ -92,6 +93,29 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
92 return kvm_clock_read(); 93 return kvm_clock_read();
93} 94}
94 95
96static cycle_t kvm_sched_clock_read(void)
97{
98 return kvm_clock_read() - kvm_sched_clock_offset;
99}
100
101static inline void kvm_sched_clock_init(bool stable)
102{
103 if (!stable) {
104 pv_time_ops.sched_clock = kvm_clock_read;
105 return;
106 }
107
108 kvm_sched_clock_offset = kvm_clock_read();
109 pv_time_ops.sched_clock = kvm_sched_clock_read;
110 set_sched_clock_stable();
111
112 printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n",
113 kvm_sched_clock_offset);
114
115 BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) >
116 sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time));
117}
118
95/* 119/*
96 * If we don't do that, there is the possibility that the guest 120 * If we don't do that, there is the possibility that the guest
97 * will calibrate under heavy load - thus, getting a lower lpj - 121 * will calibrate under heavy load - thus, getting a lower lpj -
@@ -248,7 +272,17 @@ void __init kvmclock_init(void)
248 memblock_free(mem, size); 272 memblock_free(mem, size);
249 return; 273 return;
250 } 274 }
251 pv_time_ops.sched_clock = kvm_clock_read; 275
276 if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
277 pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
278
279 cpu = get_cpu();
280 vcpu_time = &hv_clock[cpu].pvti;
281 flags = pvclock_read_flags(vcpu_time);
282
283 kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT);
284 put_cpu();
285
252 x86_platform.calibrate_tsc = kvm_get_tsc_khz; 286 x86_platform.calibrate_tsc = kvm_get_tsc_khz;
253 x86_platform.get_wallclock = kvm_get_wallclock; 287 x86_platform.get_wallclock = kvm_get_wallclock;
254 x86_platform.set_wallclock = kvm_set_wallclock; 288 x86_platform.set_wallclock = kvm_set_wallclock;
@@ -265,16 +299,6 @@ void __init kvmclock_init(void)
265 kvm_get_preset_lpj(); 299 kvm_get_preset_lpj();
266 clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); 300 clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
267 pv_info.name = "KVM"; 301 pv_info.name = "KVM";
268
269 if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
270 pvclock_set_flags(~0);
271
272 cpu = get_cpu();
273 vcpu_time = &hv_clock[cpu].pvti;
274 flags = pvclock_read_flags(vcpu_time);
275 if (flags & PVCLOCK_COUNTS_FROM_ZERO)
276 set_sched_clock_stable();
277 put_cpu();
278} 302}
279 303
280int __init kvm_setup_vsyscall_timeinfo(void) 304int __init kvm_setup_vsyscall_timeinfo(void)