aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJohn Stultz <john.stultz@linaro.org>2012-03-15 16:04:03 -0400
committerJohn Stultz <john.stultz@linaro.org>2012-03-22 22:43:43 -0400
commit6b43ae8a619d17c4935c3320d2ef9e92bdeed05d (patch)
tree007df06a9cf0d4d2b72ed7dd8d646e853de80e9b /kernel
parent57779dc2b3b75bee05ef5d1ada47f615f7a13932 (diff)
ntp: Fix leap-second hrtimer livelock
Since commit 7dffa3c673fbcf835cd7be80bb4aec8ad3f51168 the ntp subsystem has used an hrtimer for triggering the leapsecond adjustment. However, this can cause a potential livelock. Thomas diagnosed this as the following pattern: CPU 0 CPU 1 do_adjtimex() spin_lock_irq(&ntp_lock); process_adjtimex_modes(); timer_interrupt() process_adj_status(); do_timer() ntp_start_leap_timer(); write_lock(&xtime_lock); hrtimer_start(); update_wall_time(); hrtimer_reprogram(); ntp_tick_length() tick_program_event() spin_lock(&ntp_lock); clockevents_program_event() ktime_get() seq = req_seqbegin(xtime_lock); This patch tries to avoid the problem by reverting back to not using an hrtimer to inject leapseconds, and instead we handle the leapsecond processing in the second_overflow() function. The downside to this change is that on systems that support highres timers, the leap second processing will occur on a HZ tick boundary, (ie: ~1-10ms, depending on HZ) after the leap second instead of possibly sooner (~34us in my tests w/ x86_64 lapic). This patch applies on top of tip/timers/core. CC: Sasha Levin <levinsasha928@gmail.com> CC: Thomas Gleixner <tglx@linutronix.de> Reported-by: Sasha Levin <levinsasha928@gmail.com> Diagnoised-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Sasha Levin <levinsasha928@gmail.com> Signed-off-by: John Stultz <john.stultz@linaro.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/time/ntp.c128
-rw-r--r--kernel/time/timekeeping.c20
2 files changed, 47 insertions, 101 deletions
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 6e039b144daf..3d17ebd47fa2 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -34,8 +34,6 @@ unsigned long tick_nsec;
34static u64 tick_length; 34static u64 tick_length;
35static u64 tick_length_base; 35static u64 tick_length_base;
36 36
37static struct hrtimer leap_timer;
38
39#define MAX_TICKADJ 500LL /* usecs */ 37#define MAX_TICKADJ 500LL /* usecs */
40#define MAX_TICKADJ_SCALED \ 38#define MAX_TICKADJ_SCALED \
41 (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) 39 (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
@@ -381,70 +379,63 @@ u64 ntp_tick_length(void)
381 379
382 380
383/* 381/*
384 * Leap second processing. If in leap-insert state at the end of the 382 * this routine handles the overflow of the microsecond field
385 * day, the system clock is set back one second; if in leap-delete 383 *
386 * state, the system clock is set ahead one second. 384 * The tricky bits of code to handle the accurate clock support
385 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
386 * They were originally developed for SUN and DEC kernels.
387 * All the kudos should go to Dave for this stuff.
388 *
389 * Also handles leap second processing, and returns leap offset
387 */ 390 */
388static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) 391int second_overflow(unsigned long secs)
389{ 392{
390 enum hrtimer_restart res = HRTIMER_NORESTART; 393 s64 delta;
391 unsigned long flags;
392 int leap = 0; 394 int leap = 0;
395 unsigned long flags;
393 396
394 spin_lock_irqsave(&ntp_lock, flags); 397 spin_lock_irqsave(&ntp_lock, flags);
398
399 /*
400 * Leap second processing. If in leap-insert state at the end of the
401 * day, the system clock is set back one second; if in leap-delete
402 * state, the system clock is set ahead one second.
403 */
395 switch (time_state) { 404 switch (time_state) {
396 case TIME_OK: 405 case TIME_OK:
406 if (time_status & STA_INS)
407 time_state = TIME_INS;
408 else if (time_status & STA_DEL)
409 time_state = TIME_DEL;
397 break; 410 break;
398 case TIME_INS: 411 case TIME_INS:
399 leap = -1; 412 if (secs % 86400 == 0) {
400 time_state = TIME_OOP; 413 leap = -1;
401 printk(KERN_NOTICE 414 time_state = TIME_OOP;
402 "Clock: inserting leap second 23:59:60 UTC\n"); 415 printk(KERN_NOTICE
403 hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); 416 "Clock: inserting leap second 23:59:60 UTC\n");
404 res = HRTIMER_RESTART; 417 }
405 break; 418 break;
406 case TIME_DEL: 419 case TIME_DEL:
407 leap = 1; 420 if ((secs + 1) % 86400 == 0) {
408 time_tai--; 421 leap = 1;
409 time_state = TIME_WAIT; 422 time_tai--;
410 printk(KERN_NOTICE 423 time_state = TIME_WAIT;
411 "Clock: deleting leap second 23:59:59 UTC\n"); 424 printk(KERN_NOTICE
425 "Clock: deleting leap second 23:59:59 UTC\n");
426 }
412 break; 427 break;
413 case TIME_OOP: 428 case TIME_OOP:
414 time_tai++; 429 time_tai++;
415 time_state = TIME_WAIT; 430 time_state = TIME_WAIT;
416 /* fall through */ 431 break;
432
417 case TIME_WAIT: 433 case TIME_WAIT:
418 if (!(time_status & (STA_INS | STA_DEL))) 434 if (!(time_status & (STA_INS | STA_DEL)))
419 time_state = TIME_OK; 435 time_state = TIME_OK;
420 break; 436 break;
421 } 437 }
422 spin_unlock_irqrestore(&ntp_lock, flags);
423
424 /*
425 * We have to call this outside of the ntp_lock to keep
426 * the proper locking hierarchy
427 */
428 if (leap)
429 timekeeping_leap_insert(leap);
430
431 return res;
432}
433
434/*
435 * this routine handles the overflow of the microsecond field
436 *
437 * The tricky bits of code to handle the accurate clock support
438 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
439 * They were originally developed for SUN and DEC kernels.
440 * All the kudos should go to Dave for this stuff.
441 */
442void second_overflow(void)
443{
444 s64 delta;
445 unsigned long flags;
446 438
447 spin_lock_irqsave(&ntp_lock, flags);
448 439
449 /* Bump the maxerror field */ 440 /* Bump the maxerror field */
450 time_maxerror += MAXFREQ / NSEC_PER_USEC; 441 time_maxerror += MAXFREQ / NSEC_PER_USEC;
@@ -481,8 +472,13 @@ void second_overflow(void)
481 tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) 472 tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
482 << NTP_SCALE_SHIFT; 473 << NTP_SCALE_SHIFT;
483 time_adjust = 0; 474 time_adjust = 0;
475
476
477
484out: 478out:
485 spin_unlock_irqrestore(&ntp_lock, flags); 479 spin_unlock_irqrestore(&ntp_lock, flags);
480
481 return leap;
486} 482}
487 483
488#ifdef CONFIG_GENERIC_CMOS_UPDATE 484#ifdef CONFIG_GENERIC_CMOS_UPDATE
@@ -544,27 +540,6 @@ static void notify_cmos_timer(void)
544static inline void notify_cmos_timer(void) { } 540static inline void notify_cmos_timer(void) { }
545#endif 541#endif
546 542
547/*
548 * Start the leap seconds timer:
549 */
550static inline void ntp_start_leap_timer(struct timespec *ts)
551{
552 long now = ts->tv_sec;
553
554 if (time_status & STA_INS) {
555 time_state = TIME_INS;
556 now += 86400 - now % 86400;
557 hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
558
559 return;
560 }
561
562 if (time_status & STA_DEL) {
563 time_state = TIME_DEL;
564 now += 86400 - (now + 1) % 86400;
565 hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
566 }
567}
568 543
569/* 544/*
570 * Propagate a new txc->status value into the NTP state: 545 * Propagate a new txc->status value into the NTP state:
@@ -589,22 +564,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
589 time_status &= STA_RONLY; 564 time_status &= STA_RONLY;
590 time_status |= txc->status & ~STA_RONLY; 565 time_status |= txc->status & ~STA_RONLY;
591 566
592 switch (time_state) {
593 case TIME_OK:
594 ntp_start_leap_timer(ts);
595 break;
596 case TIME_INS:
597 case TIME_DEL:
598 time_state = TIME_OK;
599 ntp_start_leap_timer(ts);
600 case TIME_WAIT:
601 if (!(time_status & (STA_INS | STA_DEL)))
602 time_state = TIME_OK;
603 break;
604 case TIME_OOP:
605 hrtimer_restart(&leap_timer);
606 break;
607 }
608} 567}
609/* 568/*
610 * Called with the xtime lock held, so we can access and modify 569 * Called with the xtime lock held, so we can access and modify
@@ -686,9 +645,6 @@ int do_adjtimex(struct timex *txc)
686 (txc->tick < 900000/USER_HZ || 645 (txc->tick < 900000/USER_HZ ||
687 txc->tick > 1100000/USER_HZ)) 646 txc->tick > 1100000/USER_HZ))
688 return -EINVAL; 647 return -EINVAL;
689
690 if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
691 hrtimer_cancel(&leap_timer);
692 } 648 }
693 649
694 if (txc->modes & ADJ_SETOFFSET) { 650 if (txc->modes & ADJ_SETOFFSET) {
@@ -1010,6 +966,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
1010void __init ntp_init(void) 966void __init ntp_init(void)
1011{ 967{
1012 ntp_clear(); 968 ntp_clear();
1013 hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
1014 leap_timer.function = ntp_leap_second;
1015} 969}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b53da5ecbea2..5d76e09ddd3d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -184,18 +184,6 @@ static void timekeeping_update(bool clearntp)
184} 184}
185 185
186 186
187void timekeeping_leap_insert(int leapsecond)
188{
189 unsigned long flags;
190
191 write_seqlock_irqsave(&timekeeper.lock, flags);
192 timekeeper.xtime.tv_sec += leapsecond;
193 timekeeper.wall_to_monotonic.tv_sec -= leapsecond;
194 timekeeping_update(false);
195 write_sequnlock_irqrestore(&timekeeper.lock, flags);
196
197}
198
199/** 187/**
200 * timekeeping_forward_now - update clock to the current time 188 * timekeeping_forward_now - update clock to the current time
201 * 189 *
@@ -969,9 +957,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
969 957
970 timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; 958 timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
971 while (timekeeper.xtime_nsec >= nsecps) { 959 while (timekeeper.xtime_nsec >= nsecps) {
960 int leap;
972 timekeeper.xtime_nsec -= nsecps; 961 timekeeper.xtime_nsec -= nsecps;
973 timekeeper.xtime.tv_sec++; 962 timekeeper.xtime.tv_sec++;
974 second_overflow(); 963 leap = second_overflow(timekeeper.xtime.tv_sec);
964 timekeeper.xtime.tv_sec += leap;
975 } 965 }
976 966
977 /* Accumulate raw time */ 967 /* Accumulate raw time */
@@ -1082,9 +1072,11 @@ static void update_wall_time(void)
1082 * xtime.tv_nsec isn't larger then NSEC_PER_SEC 1072 * xtime.tv_nsec isn't larger then NSEC_PER_SEC
1083 */ 1073 */
1084 if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) { 1074 if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) {
1075 int leap;
1085 timekeeper.xtime.tv_nsec -= NSEC_PER_SEC; 1076 timekeeper.xtime.tv_nsec -= NSEC_PER_SEC;
1086 timekeeper.xtime.tv_sec++; 1077 timekeeper.xtime.tv_sec++;
1087 second_overflow(); 1078 leap = second_overflow(timekeeper.xtime.tv_sec);
1079 timekeeper.xtime.tv_sec += leap;
1088 } 1080 }
1089 1081
1090 timekeeping_update(false); 1082 timekeeping_update(false);