aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2008-04-01 13:45:18 -0400
committerIngo Molnar <mingo@elte.hu>2008-04-04 12:36:49 -0400
commit47001d603375f857a7fab0e9c095d964a1ea0039 (patch)
tree8cf2893bc3c4e1259cdf98150724fed820e810c8
parentc946c7de49a9ba50bc205d6359b41bbc8f01174c (diff)
x86: tsc prevent time going backwards
We already catch most of the TSC problems by sanity checks, but there is a subtle bug which has been in the code for ever. This can cause time jumps in the range of hours. This was reported in: http://lkml.org/lkml/2007/8/23/96 and http://lkml.org/lkml/2008/3/31/23 I was able to reproduce the problem with a gettimeofday loop test on a dual core and a quad core machine which both have sychronized TSCs. The TSCs seems not to be perfectly in sync though, but the kernel is not able to detect the slight delta in the sync check. Still there exists an extremly small window where this delta can be observed with a real big time jump. So far I was only able to reproduce this with the vsyscall gettimeofday implementation, but in theory this might be observable with the syscall based version as well. CPU 0 updates the clock source variables under xtime/vyscall lock and CPU1, where the TSC is slighty behind CPU0, is reading the time right after the seqlock was unlocked. The clocksource reference data was updated with the TSC from CPU0 and the value which is read from TSC on CPU1 is less than the reference data. This results in a huge delta value due to the unsigned subtraction of the TSC value and the reference value. This algorithm can not be changed due to the support of wrapping clock sources like pm timer. The huge delta is converted to nanoseconds and added to xtime, which is then observable by the caller. The next gettimeofday call on CPU1 will show the correct time again as now the TSC has advanced above the reference value. To prevent this TSC specific wreckage we need to compare the TSC value against the reference value and return the latter when it is larger than the actual TSC value. I pondered to mark the TSC unstable when the readout is smaller than the reference value, but this would render an otherwise good and fast clocksource unusable without a real good reason. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/tsc_32.c15
-rw-r--r--arch/x86/kernel/tsc_64.c23
2 files changed, 34 insertions, 4 deletions
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index f14cfd9d1f94..d7498b34c8e9 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -287,14 +287,27 @@ core_initcall(cpufreq_tsc);
287/* clock source code */ 287/* clock source code */
288 288
289static unsigned long current_tsc_khz = 0; 289static unsigned long current_tsc_khz = 0;
290static struct clocksource clocksource_tsc;
290 291
292/*
293 * We compare the TSC to the cycle_last value in the clocksource
294 * structure to avoid a nasty time-warp issue. This can be observed in
295 * a very small window right after one CPU updated cycle_last under
296 * xtime lock and the other CPU reads a TSC value which is smaller
297 * than the cycle_last reference value due to a TSC which is slighty
298 * behind. This delta is nowhere else observable, but in that case it
299 * results in a forward time jump in the range of hours due to the
300 * unsigned delta calculation of the time keeping core code, which is
301 * necessary to support wrapping clocksources like pm timer.
302 */
291static cycle_t read_tsc(void) 303static cycle_t read_tsc(void)
292{ 304{
293 cycle_t ret; 305 cycle_t ret;
294 306
295 rdtscll(ret); 307 rdtscll(ret);
296 308
297 return ret; 309 return ret >= clocksource_tsc.cycle_last ?
310 ret : clocksource_tsc.cycle_last;
298} 311}
299 312
300static struct clocksource clocksource_tsc = { 313static struct clocksource clocksource_tsc = {
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 947554ddabb6..01fc9f0c39e2 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -11,6 +11,7 @@
11#include <asm/hpet.h> 11#include <asm/hpet.h>
12#include <asm/timex.h> 12#include <asm/timex.h>
13#include <asm/timer.h> 13#include <asm/timer.h>
14#include <asm/vgtod.h>
14 15
15static int notsc __initdata = 0; 16static int notsc __initdata = 0;
16 17
@@ -290,18 +291,34 @@ int __init notsc_setup(char *s)
290 291
291__setup("notsc", notsc_setup); 292__setup("notsc", notsc_setup);
292 293
294static struct clocksource clocksource_tsc;
293 295
294/* clock source code: */ 296/*
297 * We compare the TSC to the cycle_last value in the clocksource
298 * structure to avoid a nasty time-warp. This can be observed in a
299 * very small window right after one CPU updated cycle_last under
300 * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
301 * is smaller than the cycle_last reference value due to a TSC which
302 * is slighty behind. This delta is nowhere else observable, but in
303 * that case it results in a forward time jump in the range of hours
304 * due to the unsigned delta calculation of the time keeping core
305 * code, which is necessary to support wrapping clocksources like pm
306 * timer.
307 */
295static cycle_t read_tsc(void) 308static cycle_t read_tsc(void)
296{ 309{
297 cycle_t ret = (cycle_t)get_cycles(); 310 cycle_t ret = (cycle_t)get_cycles();
298 return ret; 311
312 return ret >= clocksource_tsc.cycle_last ?
313 ret : clocksource_tsc.cycle_last;
299} 314}
300 315
301static cycle_t __vsyscall_fn vread_tsc(void) 316static cycle_t __vsyscall_fn vread_tsc(void)
302{ 317{
303 cycle_t ret = (cycle_t)vget_cycles(); 318 cycle_t ret = (cycle_t)vget_cycles();
304 return ret; 319
320 return ret >= __vsyscall_gtod_data.clock.cycle_last ?
321 ret : __vsyscall_gtod_data.clock.cycle_last;
305} 322}
306 323
307static struct clocksource clocksource_tsc = { 324static struct clocksource clocksource_tsc = {