diff options
author | John Stultz <john.stultz@linaro.org> | 2012-03-15 16:04:03 -0400 |
---|---|---|
committer | John Stultz <john.stultz@linaro.org> | 2012-03-22 22:43:43 -0400 |
commit | 6b43ae8a619d17c4935c3320d2ef9e92bdeed05d (patch) | |
tree | 007df06a9cf0d4d2b72ed7dd8d646e853de80e9b /kernel/time/ntp.c | |
parent | 57779dc2b3b75bee05ef5d1ada47f615f7a13932 (diff) |
ntp: Fix leap-second hrtimer livelock
Since commit 7dffa3c673fbcf835cd7be80bb4aec8ad3f51168 the ntp
subsystem has used an hrtimer for triggering the leapsecond
adjustment. However, this can cause a potential livelock.
Thomas diagnosed this as the following pattern:
CPU 0 CPU 1
do_adjtimex()
spin_lock_irq(&ntp_lock);
process_adjtimex_modes(); timer_interrupt()
process_adj_status(); do_timer()
ntp_start_leap_timer(); write_lock(&xtime_lock);
hrtimer_start(); update_wall_time();
hrtimer_reprogram(); ntp_tick_length()
tick_program_event() spin_lock(&ntp_lock);
clockevents_program_event()
ktime_get()
seq = req_seqbegin(xtime_lock);
This patch tries to avoid the problem by reverting back to not using
an hrtimer to inject leapseconds, and instead we handle the leapsecond
processing in the second_overflow() function.
The downside to this change is that on systems that support highres
timers, the leap second processing will occur on a HZ tick boundary,
(ie: ~1-10ms, depending on HZ) after the leap second instead of
possibly sooner (~34us in my tests w/ x86_64 lapic).
This patch applies on top of tip/timers/core.
CC: Sasha Levin <levinsasha928@gmail.com>
CC: Thomas Gleixner <tglx@linutronix.de>
Reported-by: Sasha Levin <levinsasha928@gmail.com>
Diagnoised-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Diffstat (limited to 'kernel/time/ntp.c')
-rw-r--r-- | kernel/time/ntp.c | 128 |
1 files changed, 41 insertions, 87 deletions
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 6e039b144daf..3d17ebd47fa2 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -34,8 +34,6 @@ unsigned long tick_nsec; | |||
34 | static u64 tick_length; | 34 | static u64 tick_length; |
35 | static u64 tick_length_base; | 35 | static u64 tick_length_base; |
36 | 36 | ||
37 | static struct hrtimer leap_timer; | ||
38 | |||
39 | #define MAX_TICKADJ 500LL /* usecs */ | 37 | #define MAX_TICKADJ 500LL /* usecs */ |
40 | #define MAX_TICKADJ_SCALED \ | 38 | #define MAX_TICKADJ_SCALED \ |
41 | (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) | 39 | (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) |
@@ -381,70 +379,63 @@ u64 ntp_tick_length(void) | |||
381 | 379 | ||
382 | 380 | ||
383 | /* | 381 | /* |
384 | * Leap second processing. If in leap-insert state at the end of the | 382 | * this routine handles the overflow of the microsecond field |
385 | * day, the system clock is set back one second; if in leap-delete | 383 | * |
386 | * state, the system clock is set ahead one second. | 384 | * The tricky bits of code to handle the accurate clock support |
385 | * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. | ||
386 | * They were originally developed for SUN and DEC kernels. | ||
387 | * All the kudos should go to Dave for this stuff. | ||
388 | * | ||
389 | * Also handles leap second processing, and returns leap offset | ||
387 | */ | 390 | */ |
388 | static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) | 391 | int second_overflow(unsigned long secs) |
389 | { | 392 | { |
390 | enum hrtimer_restart res = HRTIMER_NORESTART; | 393 | s64 delta; |
391 | unsigned long flags; | ||
392 | int leap = 0; | 394 | int leap = 0; |
395 | unsigned long flags; | ||
393 | 396 | ||
394 | spin_lock_irqsave(&ntp_lock, flags); | 397 | spin_lock_irqsave(&ntp_lock, flags); |
398 | |||
399 | /* | ||
400 | * Leap second processing. If in leap-insert state at the end of the | ||
401 | * day, the system clock is set back one second; if in leap-delete | ||
402 | * state, the system clock is set ahead one second. | ||
403 | */ | ||
395 | switch (time_state) { | 404 | switch (time_state) { |
396 | case TIME_OK: | 405 | case TIME_OK: |
406 | if (time_status & STA_INS) | ||
407 | time_state = TIME_INS; | ||
408 | else if (time_status & STA_DEL) | ||
409 | time_state = TIME_DEL; | ||
397 | break; | 410 | break; |
398 | case TIME_INS: | 411 | case TIME_INS: |
399 | leap = -1; | 412 | if (secs % 86400 == 0) { |
400 | time_state = TIME_OOP; | 413 | leap = -1; |
401 | printk(KERN_NOTICE | 414 | time_state = TIME_OOP; |
402 | "Clock: inserting leap second 23:59:60 UTC\n"); | 415 | printk(KERN_NOTICE |
403 | hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); | 416 | "Clock: inserting leap second 23:59:60 UTC\n"); |
404 | res = HRTIMER_RESTART; | 417 | } |
405 | break; | 418 | break; |
406 | case TIME_DEL: | 419 | case TIME_DEL: |
407 | leap = 1; | 420 | if ((secs + 1) % 86400 == 0) { |
408 | time_tai--; | 421 | leap = 1; |
409 | time_state = TIME_WAIT; | 422 | time_tai--; |
410 | printk(KERN_NOTICE | 423 | time_state = TIME_WAIT; |
411 | "Clock: deleting leap second 23:59:59 UTC\n"); | 424 | printk(KERN_NOTICE |
425 | "Clock: deleting leap second 23:59:59 UTC\n"); | ||
426 | } | ||
412 | break; | 427 | break; |
413 | case TIME_OOP: | 428 | case TIME_OOP: |
414 | time_tai++; | 429 | time_tai++; |
415 | time_state = TIME_WAIT; | 430 | time_state = TIME_WAIT; |
416 | /* fall through */ | 431 | break; |
432 | |||
417 | case TIME_WAIT: | 433 | case TIME_WAIT: |
418 | if (!(time_status & (STA_INS | STA_DEL))) | 434 | if (!(time_status & (STA_INS | STA_DEL))) |
419 | time_state = TIME_OK; | 435 | time_state = TIME_OK; |
420 | break; | 436 | break; |
421 | } | 437 | } |
422 | spin_unlock_irqrestore(&ntp_lock, flags); | ||
423 | |||
424 | /* | ||
425 | * We have to call this outside of the ntp_lock to keep | ||
426 | * the proper locking hierarchy | ||
427 | */ | ||
428 | if (leap) | ||
429 | timekeeping_leap_insert(leap); | ||
430 | |||
431 | return res; | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | * this routine handles the overflow of the microsecond field | ||
436 | * | ||
437 | * The tricky bits of code to handle the accurate clock support | ||
438 | * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. | ||
439 | * They were originally developed for SUN and DEC kernels. | ||
440 | * All the kudos should go to Dave for this stuff. | ||
441 | */ | ||
442 | void second_overflow(void) | ||
443 | { | ||
444 | s64 delta; | ||
445 | unsigned long flags; | ||
446 | 438 | ||
447 | spin_lock_irqsave(&ntp_lock, flags); | ||
448 | 439 | ||
449 | /* Bump the maxerror field */ | 440 | /* Bump the maxerror field */ |
450 | time_maxerror += MAXFREQ / NSEC_PER_USEC; | 441 | time_maxerror += MAXFREQ / NSEC_PER_USEC; |
@@ -481,8 +472,13 @@ void second_overflow(void) | |||
481 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) | 472 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) |
482 | << NTP_SCALE_SHIFT; | 473 | << NTP_SCALE_SHIFT; |
483 | time_adjust = 0; | 474 | time_adjust = 0; |
475 | |||
476 | |||
477 | |||
484 | out: | 478 | out: |
485 | spin_unlock_irqrestore(&ntp_lock, flags); | 479 | spin_unlock_irqrestore(&ntp_lock, flags); |
480 | |||
481 | return leap; | ||
486 | } | 482 | } |
487 | 483 | ||
488 | #ifdef CONFIG_GENERIC_CMOS_UPDATE | 484 | #ifdef CONFIG_GENERIC_CMOS_UPDATE |
@@ -544,27 +540,6 @@ static void notify_cmos_timer(void) | |||
544 | static inline void notify_cmos_timer(void) { } | 540 | static inline void notify_cmos_timer(void) { } |
545 | #endif | 541 | #endif |
546 | 542 | ||
547 | /* | ||
548 | * Start the leap seconds timer: | ||
549 | */ | ||
550 | static inline void ntp_start_leap_timer(struct timespec *ts) | ||
551 | { | ||
552 | long now = ts->tv_sec; | ||
553 | |||
554 | if (time_status & STA_INS) { | ||
555 | time_state = TIME_INS; | ||
556 | now += 86400 - now % 86400; | ||
557 | hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); | ||
558 | |||
559 | return; | ||
560 | } | ||
561 | |||
562 | if (time_status & STA_DEL) { | ||
563 | time_state = TIME_DEL; | ||
564 | now += 86400 - (now + 1) % 86400; | ||
565 | hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); | ||
566 | } | ||
567 | } | ||
568 | 543 | ||
569 | /* | 544 | /* |
570 | * Propagate a new txc->status value into the NTP state: | 545 | * Propagate a new txc->status value into the NTP state: |
@@ -589,22 +564,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) | |||
589 | time_status &= STA_RONLY; | 564 | time_status &= STA_RONLY; |
590 | time_status |= txc->status & ~STA_RONLY; | 565 | time_status |= txc->status & ~STA_RONLY; |
591 | 566 | ||
592 | switch (time_state) { | ||
593 | case TIME_OK: | ||
594 | ntp_start_leap_timer(ts); | ||
595 | break; | ||
596 | case TIME_INS: | ||
597 | case TIME_DEL: | ||
598 | time_state = TIME_OK; | ||
599 | ntp_start_leap_timer(ts); | ||
600 | case TIME_WAIT: | ||
601 | if (!(time_status & (STA_INS | STA_DEL))) | ||
602 | time_state = TIME_OK; | ||
603 | break; | ||
604 | case TIME_OOP: | ||
605 | hrtimer_restart(&leap_timer); | ||
606 | break; | ||
607 | } | ||
608 | } | 567 | } |
609 | /* | 568 | /* |
610 | * Called with the xtime lock held, so we can access and modify | 569 | * Called with the xtime lock held, so we can access and modify |
@@ -686,9 +645,6 @@ int do_adjtimex(struct timex *txc) | |||
686 | (txc->tick < 900000/USER_HZ || | 645 | (txc->tick < 900000/USER_HZ || |
687 | txc->tick > 1100000/USER_HZ)) | 646 | txc->tick > 1100000/USER_HZ)) |
688 | return -EINVAL; | 647 | return -EINVAL; |
689 | |||
690 | if (txc->modes & ADJ_STATUS && time_state != TIME_OK) | ||
691 | hrtimer_cancel(&leap_timer); | ||
692 | } | 648 | } |
693 | 649 | ||
694 | if (txc->modes & ADJ_SETOFFSET) { | 650 | if (txc->modes & ADJ_SETOFFSET) { |
@@ -1010,6 +966,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup); | |||
1010 | void __init ntp_init(void) | 966 | void __init ntp_init(void) |
1011 | { | 967 | { |
1012 | ntp_clear(); | 968 | ntp_clear(); |
1013 | hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | ||
1014 | leap_timer.function = ntp_leap_second; | ||
1015 | } | 969 | } |