aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Stultz <john.stultz@linaro.org>2015-06-11 18:54:55 -0400
committerThomas Gleixner <tglx@linutronix.de>2015-06-12 05:15:49 -0400
commit833f32d763028c1bb371c64f457788b933773b3e (patch)
treee49045ff3592b68bbce6c155375092b81eb5abed
parent90bf361ceae28dee50a584c3dd4c1a96178d982c (diff)
time: Prevent early expiry of hrtimers[CLOCK_REALTIME] at the leap second edge
Currently, leapsecond adjustments are done at tick time. As a result, the leapsecond was applied at the first timer tick *after* the leapsecond (~1-10ms late depending on HZ), rather then exactly on the second edge. This was in part historical from back when we were always tick based, but correcting this since has been avoided since it adds extra conditional checks in the gettime fastpath, which has performance overhead. However, it was recently pointed out that ABS_TIME CLOCK_REALTIME timers set for right after the leapsecond could fire a second early, since some timers may be expired before we trigger the timekeeping timer, which then applies the leapsecond. This isn't quite as bad as it sounds, since behaviorally it is similar to what is possible w/ ntpd made leapsecond adjustments done w/o using the kernel discipline. Where due to latencies, timers may fire just prior to the settimeofday call. (Also, one should note that all applications using CLOCK_REALTIME timers should always be careful, since they are prone to quirks from settimeofday() disturbances.) However, the purpose of having the kernel do the leap adjustment is to avoid such latencies, so I think this is worth fixing. So in order to properly keep those timers from firing a second early, this patch modifies the ntp and timekeeping logic so that we keep enough state so that the update_base_offsets_now accessor, which provides the hrtimer core the current time, can check and apply the leapsecond adjustment on the second edge. This prevents the hrtimer core from expiring timers too early. This patch does not modify any other time read path, so no additional overhead is incurred. However, this also means that the leap-second continues to be applied at tick time for all other read-paths. Apologies to Richard Cochran, who pushed for similar changes years ago, which I resisted due to the concerns about the performance overhead. While I suspect this isn't extremely critical, folks who care about strict leap-second correctness will likely want to watch this. Potentially a -stable candidate eventually. Originally-suggested-by: Richard Cochran <richardcochran@gmail.com> Reported-by: Daniel Bristot de Oliveira <bristot@redhat.com> Reported-by: Prarit Bhargava <prarit@redhat.com> Signed-off-by: John Stultz <john.stultz@linaro.org> Cc: Richard Cochran <richardcochran@gmail.com> Cc: Jan Kara <jack@suse.cz> Cc: Jiri Bohac <jbohac@suse.cz> Cc: Shuah Khan <shuahkh@osg.samsung.com> Cc: Ingo Molnar <mingo@kernel.org> Link: http://lkml.kernel.org/r/1434063297-28657-4-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--include/linux/time64.h1
-rw-r--r--include/linux/timekeeper_internal.h2
-rw-r--r--kernel/time/ntp.c42
-rw-r--r--kernel/time/ntp_internal.h1
-rw-r--r--kernel/time/timekeeping.c23
5 files changed, 61 insertions, 8 deletions
diff --git a/include/linux/time64.h b/include/linux/time64.h
index 12d4e82b0276..77b5df2acd2a 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -29,6 +29,7 @@ struct timespec64 {
29#define FSEC_PER_SEC 1000000000000000LL 29#define FSEC_PER_SEC 1000000000000000LL
30 30
31/* Located here for timespec[64]_valid_strict */ 31/* Located here for timespec[64]_valid_strict */
32#define TIME64_MAX ((s64)~((u64)1 << 63))
32#define KTIME_MAX ((s64)~((u64)1 << 63)) 33#define KTIME_MAX ((s64)~((u64)1 << 63))
33#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) 34#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
34 35
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index e1f5a1136554..25247220b4b7 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -50,6 +50,7 @@ struct tk_read_base {
50 * @offs_tai: Offset clock monotonic -> clock tai 50 * @offs_tai: Offset clock monotonic -> clock tai
51 * @tai_offset: The current UTC to TAI offset in seconds 51 * @tai_offset: The current UTC to TAI offset in seconds
52 * @clock_was_set_seq: The sequence number of clock was set events 52 * @clock_was_set_seq: The sequence number of clock was set events
53 * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
53 * @raw_time: Monotonic raw base time in timespec64 format 54 * @raw_time: Monotonic raw base time in timespec64 format
54 * @cycle_interval: Number of clock cycles in one NTP interval 55 * @cycle_interval: Number of clock cycles in one NTP interval
55 * @xtime_interval: Number of clock shifted nano seconds in one NTP 56 * @xtime_interval: Number of clock shifted nano seconds in one NTP
@@ -90,6 +91,7 @@ struct timekeeper {
90 ktime_t offs_tai; 91 ktime_t offs_tai;
91 s32 tai_offset; 92 s32 tai_offset;
92 unsigned int clock_was_set_seq; 93 unsigned int clock_was_set_seq;
94 ktime_t next_leap_ktime;
93 struct timespec64 raw_time; 95 struct timespec64 raw_time;
94 96
95 /* The following members are for timekeeping internal use */ 97 /* The following members are for timekeeping internal use */
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 7aa216188450..033743e3647a 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -77,6 +77,9 @@ static long time_adjust;
77/* constant (boot-param configurable) NTP tick adjustment (upscaled) */ 77/* constant (boot-param configurable) NTP tick adjustment (upscaled) */
78static s64 ntp_tick_adj; 78static s64 ntp_tick_adj;
79 79
80/* second value of the next pending leapsecond, or TIME64_MAX if no leap */
81static time64_t ntp_next_leap_sec = TIME64_MAX;
82
80#ifdef CONFIG_NTP_PPS 83#ifdef CONFIG_NTP_PPS
81 84
82/* 85/*
@@ -350,6 +353,7 @@ void ntp_clear(void)
350 tick_length = tick_length_base; 353 tick_length = tick_length_base;
351 time_offset = 0; 354 time_offset = 0;
352 355
356 ntp_next_leap_sec = TIME64_MAX;
353 /* Clear PPS state variables */ 357 /* Clear PPS state variables */
354 pps_clear(); 358 pps_clear();
355} 359}
@@ -360,6 +364,21 @@ u64 ntp_tick_length(void)
360 return tick_length; 364 return tick_length;
361} 365}
362 366
367/**
368 * ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime_t
369 *
370 * Provides the time of the next leapsecond against CLOCK_REALTIME in
371 * a ktime_t format. Returns KTIME_MAX if no leapsecond is pending.
372 */
373ktime_t ntp_get_next_leap(void)
374{
375 ktime_t ret;
376
377 if ((time_state == TIME_INS) && (time_status & STA_INS))
378 return ktime_set(ntp_next_leap_sec, 0);
379 ret.tv64 = KTIME_MAX;
380 return ret;
381}
363 382
364/* 383/*
365 * this routine handles the overflow of the microsecond field 384 * this routine handles the overflow of the microsecond field
@@ -383,15 +402,21 @@ int second_overflow(unsigned long secs)
383 */ 402 */
384 switch (time_state) { 403 switch (time_state) {
385 case TIME_OK: 404 case TIME_OK:
386 if (time_status & STA_INS) 405 if (time_status & STA_INS) {
387 time_state = TIME_INS; 406 time_state = TIME_INS;
388 else if (time_status & STA_DEL) 407 ntp_next_leap_sec = secs + SECS_PER_DAY -
408 (secs % SECS_PER_DAY);
409 } else if (time_status & STA_DEL) {
389 time_state = TIME_DEL; 410 time_state = TIME_DEL;
411 ntp_next_leap_sec = secs + SECS_PER_DAY -
412 ((secs+1) % SECS_PER_DAY);
413 }
390 break; 414 break;
391 case TIME_INS: 415 case TIME_INS:
392 if (!(time_status & STA_INS)) 416 if (!(time_status & STA_INS)) {
417 ntp_next_leap_sec = TIME64_MAX;
393 time_state = TIME_OK; 418 time_state = TIME_OK;
394 else if (secs % SECS_PER_DAY == 0) { 419 } else if (secs % SECS_PER_DAY == 0) {
395 leap = -1; 420 leap = -1;
396 time_state = TIME_OOP; 421 time_state = TIME_OOP;
397 printk(KERN_NOTICE 422 printk(KERN_NOTICE
@@ -399,19 +424,21 @@ int second_overflow(unsigned long secs)
399 } 424 }
400 break; 425 break;
401 case TIME_DEL: 426 case TIME_DEL:
402 if (!(time_status & STA_DEL)) 427 if (!(time_status & STA_DEL)) {
428 ntp_next_leap_sec = TIME64_MAX;
403 time_state = TIME_OK; 429 time_state = TIME_OK;
404 else if ((secs + 1) % SECS_PER_DAY == 0) { 430 } else if ((secs + 1) % SECS_PER_DAY == 0) {
405 leap = 1; 431 leap = 1;
432 ntp_next_leap_sec = TIME64_MAX;
406 time_state = TIME_WAIT; 433 time_state = TIME_WAIT;
407 printk(KERN_NOTICE 434 printk(KERN_NOTICE
408 "Clock: deleting leap second 23:59:59 UTC\n"); 435 "Clock: deleting leap second 23:59:59 UTC\n");
409 } 436 }
410 break; 437 break;
411 case TIME_OOP: 438 case TIME_OOP:
439 ntp_next_leap_sec = TIME64_MAX;
412 time_state = TIME_WAIT; 440 time_state = TIME_WAIT;
413 break; 441 break;
414
415 case TIME_WAIT: 442 case TIME_WAIT:
416 if (!(time_status & (STA_INS | STA_DEL))) 443 if (!(time_status & (STA_INS | STA_DEL)))
417 time_state = TIME_OK; 444 time_state = TIME_OK;
@@ -548,6 +575,7 @@ static inline void process_adj_status(struct timex *txc, struct timespec64 *ts)
548 if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { 575 if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
549 time_state = TIME_OK; 576 time_state = TIME_OK;
550 time_status = STA_UNSYNC; 577 time_status = STA_UNSYNC;
578 ntp_next_leap_sec = TIME64_MAX;
551 /* restart PPS frequency calibration */ 579 /* restart PPS frequency calibration */
552 pps_reset_freq_interval(); 580 pps_reset_freq_interval();
553 } 581 }
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index bbd102ad9df7..65430504ca26 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -5,6 +5,7 @@ extern void ntp_init(void);
5extern void ntp_clear(void); 5extern void ntp_clear(void);
6/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ 6/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
7extern u64 ntp_tick_length(void); 7extern u64 ntp_tick_length(void);
8extern ktime_t ntp_get_next_leap(void);
8extern int second_overflow(unsigned long secs); 9extern int second_overflow(unsigned long secs);
9extern int ntp_validate_timex(struct timex *); 10extern int ntp_validate_timex(struct timex *);
10extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *); 11extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 849b93265904..5d67ffb7e317 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -540,6 +540,17 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
540EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); 540EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
541 541
542/* 542/*
543 * tk_update_leap_state - helper to update the next_leap_ktime
544 */
545static inline void tk_update_leap_state(struct timekeeper *tk)
546{
547 tk->next_leap_ktime = ntp_get_next_leap();
548 if (tk->next_leap_ktime.tv64 != KTIME_MAX)
549 /* Convert to monotonic time */
550 tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real);
551}
552
553/*
543 * Update the ktime_t based scalar nsec members of the timekeeper 554 * Update the ktime_t based scalar nsec members of the timekeeper
544 */ 555 */
545static inline void tk_update_ktime_data(struct timekeeper *tk) 556static inline void tk_update_ktime_data(struct timekeeper *tk)
@@ -580,6 +591,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
580 ntp_clear(); 591 ntp_clear();
581 } 592 }
582 593
594 tk_update_leap_state(tk);
583 tk_update_ktime_data(tk); 595 tk_update_ktime_data(tk);
584 596
585 update_vsyscall(tk); 597 update_vsyscall(tk);
@@ -1956,15 +1968,22 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
1956 1968
1957 base = tk->tkr_mono.base; 1969 base = tk->tkr_mono.base;
1958 nsecs = timekeeping_get_ns(&tk->tkr_mono); 1970 nsecs = timekeeping_get_ns(&tk->tkr_mono);
1971 base = ktime_add_ns(base, nsecs);
1972
1959 if (*cwsseq != tk->clock_was_set_seq) { 1973 if (*cwsseq != tk->clock_was_set_seq) {
1960 *cwsseq = tk->clock_was_set_seq; 1974 *cwsseq = tk->clock_was_set_seq;
1961 *offs_real = tk->offs_real; 1975 *offs_real = tk->offs_real;
1962 *offs_boot = tk->offs_boot; 1976 *offs_boot = tk->offs_boot;
1963 *offs_tai = tk->offs_tai; 1977 *offs_tai = tk->offs_tai;
1964 } 1978 }
1979
1980 /* Handle leapsecond insertion adjustments */
1981 if (unlikely(base.tv64 >= tk->next_leap_ktime.tv64))
1982 *offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0));
1983
1965 } while (read_seqcount_retry(&tk_core.seq, seq)); 1984 } while (read_seqcount_retry(&tk_core.seq, seq));
1966 1985
1967 return ktime_add_ns(base, nsecs); 1986 return base;
1968} 1987}
1969 1988
1970/** 1989/**
@@ -2006,6 +2025,8 @@ int do_adjtimex(struct timex *txc)
2006 __timekeeping_set_tai_offset(tk, tai); 2025 __timekeeping_set_tai_offset(tk, tai);
2007 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); 2026 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
2008 } 2027 }
2028 tk_update_leap_state(tk);
2029
2009 write_seqcount_end(&tk_core.seq); 2030 write_seqcount_end(&tk_core.seq);
2010 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 2031 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
2011 2032