diff options
author | John Stultz <john.stultz@linaro.org> | 2012-03-15 16:04:03 -0400 |
---|---|---|
committer | John Stultz <john.stultz@linaro.org> | 2012-03-22 22:43:43 -0400 |
commit | 6b43ae8a619d17c4935c3320d2ef9e92bdeed05d (patch) | |
tree | 007df06a9cf0d4d2b72ed7dd8d646e853de80e9b /kernel | |
parent | 57779dc2b3b75bee05ef5d1ada47f615f7a13932 (diff) |
ntp: Fix leap-second hrtimer livelock
Since commit 7dffa3c673fbcf835cd7be80bb4aec8ad3f51168 the ntp
subsystem has used an hrtimer for triggering the leapsecond
adjustment. However, this can cause a potential livelock.
Thomas diagnosed this as the following pattern:
CPU 0 CPU 1
do_adjtimex()
spin_lock_irq(&ntp_lock);
process_adjtimex_modes(); timer_interrupt()
process_adj_status(); do_timer()
ntp_start_leap_timer(); write_lock(&xtime_lock);
hrtimer_start(); update_wall_time();
hrtimer_reprogram(); ntp_tick_length()
tick_program_event() spin_lock(&ntp_lock);
clockevents_program_event()
ktime_get()
seq = req_seqbegin(xtime_lock);
This patch tries to avoid the problem by reverting back to not using
an hrtimer to inject leapseconds, and instead we handle the leapsecond
processing in the second_overflow() function.
The downside to this change is that on systems that support highres
timers, the leap second processing will occur on a HZ tick boundary,
(ie: ~1-10ms, depending on HZ) after the leap second instead of
possibly sooner (~34us in my tests w/ x86_64 lapic).
This patch applies on top of tip/timers/core.
CC: Sasha Levin <levinsasha928@gmail.com>
CC: Thomas Gleixner <tglx@linutronix.de>
Reported-by: Sasha Levin <levinsasha928@gmail.com>
Diagnoised-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/time/ntp.c | 128 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 20 |
2 files changed, 47 insertions, 101 deletions
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 6e039b144daf..3d17ebd47fa2 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -34,8 +34,6 @@ unsigned long tick_nsec; | |||
34 | static u64 tick_length; | 34 | static u64 tick_length; |
35 | static u64 tick_length_base; | 35 | static u64 tick_length_base; |
36 | 36 | ||
37 | static struct hrtimer leap_timer; | ||
38 | |||
39 | #define MAX_TICKADJ 500LL /* usecs */ | 37 | #define MAX_TICKADJ 500LL /* usecs */ |
40 | #define MAX_TICKADJ_SCALED \ | 38 | #define MAX_TICKADJ_SCALED \ |
41 | (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) | 39 | (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) |
@@ -381,70 +379,63 @@ u64 ntp_tick_length(void) | |||
381 | 379 | ||
382 | 380 | ||
383 | /* | 381 | /* |
384 | * Leap second processing. If in leap-insert state at the end of the | 382 | * this routine handles the overflow of the microsecond field |
385 | * day, the system clock is set back one second; if in leap-delete | 383 | * |
386 | * state, the system clock is set ahead one second. | 384 | * The tricky bits of code to handle the accurate clock support |
385 | * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. | ||
386 | * They were originally developed for SUN and DEC kernels. | ||
387 | * All the kudos should go to Dave for this stuff. | ||
388 | * | ||
389 | * Also handles leap second processing, and returns leap offset | ||
387 | */ | 390 | */ |
388 | static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) | 391 | int second_overflow(unsigned long secs) |
389 | { | 392 | { |
390 | enum hrtimer_restart res = HRTIMER_NORESTART; | 393 | s64 delta; |
391 | unsigned long flags; | ||
392 | int leap = 0; | 394 | int leap = 0; |
395 | unsigned long flags; | ||
393 | 396 | ||
394 | spin_lock_irqsave(&ntp_lock, flags); | 397 | spin_lock_irqsave(&ntp_lock, flags); |
398 | |||
399 | /* | ||
400 | * Leap second processing. If in leap-insert state at the end of the | ||
401 | * day, the system clock is set back one second; if in leap-delete | ||
402 | * state, the system clock is set ahead one second. | ||
403 | */ | ||
395 | switch (time_state) { | 404 | switch (time_state) { |
396 | case TIME_OK: | 405 | case TIME_OK: |
406 | if (time_status & STA_INS) | ||
407 | time_state = TIME_INS; | ||
408 | else if (time_status & STA_DEL) | ||
409 | time_state = TIME_DEL; | ||
397 | break; | 410 | break; |
398 | case TIME_INS: | 411 | case TIME_INS: |
399 | leap = -1; | 412 | if (secs % 86400 == 0) { |
400 | time_state = TIME_OOP; | 413 | leap = -1; |
401 | printk(KERN_NOTICE | 414 | time_state = TIME_OOP; |
402 | "Clock: inserting leap second 23:59:60 UTC\n"); | 415 | printk(KERN_NOTICE |
403 | hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); | 416 | "Clock: inserting leap second 23:59:60 UTC\n"); |
404 | res = HRTIMER_RESTART; | 417 | } |
405 | break; | 418 | break; |
406 | case TIME_DEL: | 419 | case TIME_DEL: |
407 | leap = 1; | 420 | if ((secs + 1) % 86400 == 0) { |
408 | time_tai--; | 421 | leap = 1; |
409 | time_state = TIME_WAIT; | 422 | time_tai--; |
410 | printk(KERN_NOTICE | 423 | time_state = TIME_WAIT; |
411 | "Clock: deleting leap second 23:59:59 UTC\n"); | 424 | printk(KERN_NOTICE |
425 | "Clock: deleting leap second 23:59:59 UTC\n"); | ||
426 | } | ||
412 | break; | 427 | break; |
413 | case TIME_OOP: | 428 | case TIME_OOP: |
414 | time_tai++; | 429 | time_tai++; |
415 | time_state = TIME_WAIT; | 430 | time_state = TIME_WAIT; |
416 | /* fall through */ | 431 | break; |
432 | |||
417 | case TIME_WAIT: | 433 | case TIME_WAIT: |
418 | if (!(time_status & (STA_INS | STA_DEL))) | 434 | if (!(time_status & (STA_INS | STA_DEL))) |
419 | time_state = TIME_OK; | 435 | time_state = TIME_OK; |
420 | break; | 436 | break; |
421 | } | 437 | } |
422 | spin_unlock_irqrestore(&ntp_lock, flags); | ||
423 | |||
424 | /* | ||
425 | * We have to call this outside of the ntp_lock to keep | ||
426 | * the proper locking hierarchy | ||
427 | */ | ||
428 | if (leap) | ||
429 | timekeeping_leap_insert(leap); | ||
430 | |||
431 | return res; | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | * this routine handles the overflow of the microsecond field | ||
436 | * | ||
437 | * The tricky bits of code to handle the accurate clock support | ||
438 | * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. | ||
439 | * They were originally developed for SUN and DEC kernels. | ||
440 | * All the kudos should go to Dave for this stuff. | ||
441 | */ | ||
442 | void second_overflow(void) | ||
443 | { | ||
444 | s64 delta; | ||
445 | unsigned long flags; | ||
446 | 438 | ||
447 | spin_lock_irqsave(&ntp_lock, flags); | ||
448 | 439 | ||
449 | /* Bump the maxerror field */ | 440 | /* Bump the maxerror field */ |
450 | time_maxerror += MAXFREQ / NSEC_PER_USEC; | 441 | time_maxerror += MAXFREQ / NSEC_PER_USEC; |
@@ -481,8 +472,13 @@ void second_overflow(void) | |||
481 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) | 472 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) |
482 | << NTP_SCALE_SHIFT; | 473 | << NTP_SCALE_SHIFT; |
483 | time_adjust = 0; | 474 | time_adjust = 0; |
475 | |||
476 | |||
477 | |||
484 | out: | 478 | out: |
485 | spin_unlock_irqrestore(&ntp_lock, flags); | 479 | spin_unlock_irqrestore(&ntp_lock, flags); |
480 | |||
481 | return leap; | ||
486 | } | 482 | } |
487 | 483 | ||
488 | #ifdef CONFIG_GENERIC_CMOS_UPDATE | 484 | #ifdef CONFIG_GENERIC_CMOS_UPDATE |
@@ -544,27 +540,6 @@ static void notify_cmos_timer(void) | |||
544 | static inline void notify_cmos_timer(void) { } | 540 | static inline void notify_cmos_timer(void) { } |
545 | #endif | 541 | #endif |
546 | 542 | ||
547 | /* | ||
548 | * Start the leap seconds timer: | ||
549 | */ | ||
550 | static inline void ntp_start_leap_timer(struct timespec *ts) | ||
551 | { | ||
552 | long now = ts->tv_sec; | ||
553 | |||
554 | if (time_status & STA_INS) { | ||
555 | time_state = TIME_INS; | ||
556 | now += 86400 - now % 86400; | ||
557 | hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); | ||
558 | |||
559 | return; | ||
560 | } | ||
561 | |||
562 | if (time_status & STA_DEL) { | ||
563 | time_state = TIME_DEL; | ||
564 | now += 86400 - (now + 1) % 86400; | ||
565 | hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); | ||
566 | } | ||
567 | } | ||
568 | 543 | ||
569 | /* | 544 | /* |
570 | * Propagate a new txc->status value into the NTP state: | 545 | * Propagate a new txc->status value into the NTP state: |
@@ -589,22 +564,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) | |||
589 | time_status &= STA_RONLY; | 564 | time_status &= STA_RONLY; |
590 | time_status |= txc->status & ~STA_RONLY; | 565 | time_status |= txc->status & ~STA_RONLY; |
591 | 566 | ||
592 | switch (time_state) { | ||
593 | case TIME_OK: | ||
594 | ntp_start_leap_timer(ts); | ||
595 | break; | ||
596 | case TIME_INS: | ||
597 | case TIME_DEL: | ||
598 | time_state = TIME_OK; | ||
599 | ntp_start_leap_timer(ts); | ||
600 | case TIME_WAIT: | ||
601 | if (!(time_status & (STA_INS | STA_DEL))) | ||
602 | time_state = TIME_OK; | ||
603 | break; | ||
604 | case TIME_OOP: | ||
605 | hrtimer_restart(&leap_timer); | ||
606 | break; | ||
607 | } | ||
608 | } | 567 | } |
609 | /* | 568 | /* |
610 | * Called with the xtime lock held, so we can access and modify | 569 | * Called with the xtime lock held, so we can access and modify |
@@ -686,9 +645,6 @@ int do_adjtimex(struct timex *txc) | |||
686 | (txc->tick < 900000/USER_HZ || | 645 | (txc->tick < 900000/USER_HZ || |
687 | txc->tick > 1100000/USER_HZ)) | 646 | txc->tick > 1100000/USER_HZ)) |
688 | return -EINVAL; | 647 | return -EINVAL; |
689 | |||
690 | if (txc->modes & ADJ_STATUS && time_state != TIME_OK) | ||
691 | hrtimer_cancel(&leap_timer); | ||
692 | } | 648 | } |
693 | 649 | ||
694 | if (txc->modes & ADJ_SETOFFSET) { | 650 | if (txc->modes & ADJ_SETOFFSET) { |
@@ -1010,6 +966,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup); | |||
1010 | void __init ntp_init(void) | 966 | void __init ntp_init(void) |
1011 | { | 967 | { |
1012 | ntp_clear(); | 968 | ntp_clear(); |
1013 | hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | ||
1014 | leap_timer.function = ntp_leap_second; | ||
1015 | } | 969 | } |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b53da5ecbea2..5d76e09ddd3d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -184,18 +184,6 @@ static void timekeeping_update(bool clearntp) | |||
184 | } | 184 | } |
185 | 185 | ||
186 | 186 | ||
187 | void timekeeping_leap_insert(int leapsecond) | ||
188 | { | ||
189 | unsigned long flags; | ||
190 | |||
191 | write_seqlock_irqsave(&timekeeper.lock, flags); | ||
192 | timekeeper.xtime.tv_sec += leapsecond; | ||
193 | timekeeper.wall_to_monotonic.tv_sec -= leapsecond; | ||
194 | timekeeping_update(false); | ||
195 | write_sequnlock_irqrestore(&timekeeper.lock, flags); | ||
196 | |||
197 | } | ||
198 | |||
199 | /** | 187 | /** |
200 | * timekeeping_forward_now - update clock to the current time | 188 | * timekeeping_forward_now - update clock to the current time |
201 | * | 189 | * |
@@ -969,9 +957,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) | |||
969 | 957 | ||
970 | timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; | 958 | timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; |
971 | while (timekeeper.xtime_nsec >= nsecps) { | 959 | while (timekeeper.xtime_nsec >= nsecps) { |
960 | int leap; | ||
972 | timekeeper.xtime_nsec -= nsecps; | 961 | timekeeper.xtime_nsec -= nsecps; |
973 | timekeeper.xtime.tv_sec++; | 962 | timekeeper.xtime.tv_sec++; |
974 | second_overflow(); | 963 | leap = second_overflow(timekeeper.xtime.tv_sec); |
964 | timekeeper.xtime.tv_sec += leap; | ||
975 | } | 965 | } |
976 | 966 | ||
977 | /* Accumulate raw time */ | 967 | /* Accumulate raw time */ |
@@ -1082,9 +1072,11 @@ static void update_wall_time(void) | |||
1082 | * xtime.tv_nsec isn't larger then NSEC_PER_SEC | 1072 | * xtime.tv_nsec isn't larger then NSEC_PER_SEC |
1083 | */ | 1073 | */ |
1084 | if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) { | 1074 | if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) { |
1075 | int leap; | ||
1085 | timekeeper.xtime.tv_nsec -= NSEC_PER_SEC; | 1076 | timekeeper.xtime.tv_nsec -= NSEC_PER_SEC; |
1086 | timekeeper.xtime.tv_sec++; | 1077 | timekeeper.xtime.tv_sec++; |
1087 | second_overflow(); | 1078 | leap = second_overflow(timekeeper.xtime.tv_sec); |
1079 | timekeeper.xtime.tv_sec += leap; | ||
1088 | } | 1080 | } |
1089 | 1081 | ||
1090 | timekeeping_update(false); | 1082 | timekeeping_update(false); |