diff options
Diffstat (limited to 'kernel/time/clocksource.c')
-rw-r--r-- | kernel/time/clocksource.c | 173 |
1 files changed, 80 insertions, 93 deletions
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 4892352f0e49..15facb1b9c60 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -142,13 +142,6 @@ static void __clocksource_unstable(struct clocksource *cs) | |||
142 | schedule_work(&watchdog_work); | 142 | schedule_work(&watchdog_work); |
143 | } | 143 | } |
144 | 144 | ||
145 | static void clocksource_unstable(struct clocksource *cs, int64_t delta) | ||
146 | { | ||
147 | printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", | ||
148 | cs->name, delta); | ||
149 | __clocksource_unstable(cs); | ||
150 | } | ||
151 | |||
152 | /** | 145 | /** |
153 | * clocksource_mark_unstable - mark clocksource unstable via watchdog | 146 | * clocksource_mark_unstable - mark clocksource unstable via watchdog |
154 | * @cs: clocksource to be marked unstable | 147 | * @cs: clocksource to be marked unstable |
@@ -174,7 +167,7 @@ void clocksource_mark_unstable(struct clocksource *cs) | |||
174 | static void clocksource_watchdog(unsigned long data) | 167 | static void clocksource_watchdog(unsigned long data) |
175 | { | 168 | { |
176 | struct clocksource *cs; | 169 | struct clocksource *cs; |
177 | cycle_t csnow, wdnow, delta; | 170 | cycle_t csnow, wdnow, cslast, wdlast, delta; |
178 | int64_t wd_nsec, cs_nsec; | 171 | int64_t wd_nsec, cs_nsec; |
179 | int next_cpu, reset_pending; | 172 | int next_cpu, reset_pending; |
180 | 173 | ||
@@ -213,6 +206,8 @@ static void clocksource_watchdog(unsigned long data) | |||
213 | 206 | ||
214 | delta = clocksource_delta(csnow, cs->cs_last, cs->mask); | 207 | delta = clocksource_delta(csnow, cs->cs_last, cs->mask); |
215 | cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift); | 208 | cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift); |
209 | wdlast = cs->wd_last; /* save these in case we print them */ | ||
210 | cslast = cs->cs_last; | ||
216 | cs->cs_last = csnow; | 211 | cs->cs_last = csnow; |
217 | cs->wd_last = wdnow; | 212 | cs->wd_last = wdnow; |
218 | 213 | ||
@@ -221,7 +216,12 @@ static void clocksource_watchdog(unsigned long data) | |||
221 | 216 | ||
222 | /* Check the deviation from the watchdog clocksource. */ | 217 | /* Check the deviation from the watchdog clocksource. */ |
223 | if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { | 218 | if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { |
224 | clocksource_unstable(cs, cs_nsec - wd_nsec); | 219 | pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name); |
220 | pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", | ||
221 | watchdog->name, wdnow, wdlast, watchdog->mask); | ||
222 | pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n", | ||
223 | cs->name, csnow, cslast, cs->mask); | ||
224 | __clocksource_unstable(cs); | ||
225 | continue; | 225 | continue; |
226 | } | 226 | } |
227 | 227 | ||
@@ -469,26 +469,25 @@ static u32 clocksource_max_adjustment(struct clocksource *cs) | |||
469 | * @shift: cycle to nanosecond divisor (power of two) | 469 | * @shift: cycle to nanosecond divisor (power of two) |
470 | * @maxadj: maximum adjustment value to mult (~11%) | 470 | * @maxadj: maximum adjustment value to mult (~11%) |
471 | * @mask: bitmask for two's complement subtraction of non 64 bit counters | 471 | * @mask: bitmask for two's complement subtraction of non 64 bit counters |
472 | * @max_cyc: maximum cycle value before potential overflow (does not include | ||
473 | * any safety margin) | ||
474 | * | ||
475 | * NOTE: This function includes a safety margin of 50%, in other words, we | ||
476 | * return half the number of nanoseconds the hardware counter can technically | ||
477 | * cover. This is done so that we can potentially detect problems caused by | ||
478 | * delayed timers or bad hardware, which might result in time intervals that | ||
479 | * are larger then what the math used can handle without overflows. | ||
472 | */ | 480 | */ |
473 | u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) | 481 | u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc) |
474 | { | 482 | { |
475 | u64 max_nsecs, max_cycles; | 483 | u64 max_nsecs, max_cycles; |
476 | 484 | ||
477 | /* | 485 | /* |
478 | * Calculate the maximum number of cycles that we can pass to the | 486 | * Calculate the maximum number of cycles that we can pass to the |
479 | * cyc2ns function without overflowing a 64-bit signed result. The | 487 | * cyc2ns() function without overflowing a 64-bit result. |
480 | * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj) | ||
481 | * which is equivalent to the below. | ||
482 | * max_cycles < (2^63)/(mult + maxadj) | ||
483 | * max_cycles < 2^(log2((2^63)/(mult + maxadj))) | ||
484 | * max_cycles < 2^(log2(2^63) - log2(mult + maxadj)) | ||
485 | * max_cycles < 2^(63 - log2(mult + maxadj)) | ||
486 | * max_cycles < 1 << (63 - log2(mult + maxadj)) | ||
487 | * Please note that we add 1 to the result of the log2 to account for | ||
488 | * any rounding errors, ensure the above inequality is satisfied and | ||
489 | * no overflow will occur. | ||
490 | */ | 488 | */ |
491 | max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1)); | 489 | max_cycles = ULLONG_MAX; |
490 | do_div(max_cycles, mult+maxadj); | ||
492 | 491 | ||
493 | /* | 492 | /* |
494 | * The actual maximum number of cycles we can defer the clocksource is | 493 | * The actual maximum number of cycles we can defer the clocksource is |
@@ -499,27 +498,26 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) | |||
499 | max_cycles = min(max_cycles, mask); | 498 | max_cycles = min(max_cycles, mask); |
500 | max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); | 499 | max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); |
501 | 500 | ||
501 | /* return the max_cycles value as well if requested */ | ||
502 | if (max_cyc) | ||
503 | *max_cyc = max_cycles; | ||
504 | |||
505 | /* Return 50% of the actual maximum, so we can detect bad values */ | ||
506 | max_nsecs >>= 1; | ||
507 | |||
502 | return max_nsecs; | 508 | return max_nsecs; |
503 | } | 509 | } |
504 | 510 | ||
505 | /** | 511 | /** |
506 | * clocksource_max_deferment - Returns max time the clocksource can be deferred | 512 | * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles |
507 | * @cs: Pointer to clocksource | 513 | * @cs: Pointer to clocksource to be updated |
508 | * | 514 | * |
509 | */ | 515 | */ |
510 | static u64 clocksource_max_deferment(struct clocksource *cs) | 516 | static inline void clocksource_update_max_deferment(struct clocksource *cs) |
511 | { | 517 | { |
512 | u64 max_nsecs; | 518 | cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift, |
513 | 519 | cs->maxadj, cs->mask, | |
514 | max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, | 520 | &cs->max_cycles); |
515 | cs->mask); | ||
516 | /* | ||
517 | * To ensure that the clocksource does not wrap whilst we are idle, | ||
518 | * limit the time the clocksource can be deferred by 12.5%. Please | ||
519 | * note a margin of 12.5% is used because this can be computed with | ||
520 | * a shift, versus say 10% which would require division. | ||
521 | */ | ||
522 | return max_nsecs - (max_nsecs >> 3); | ||
523 | } | 521 | } |
524 | 522 | ||
525 | #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET | 523 | #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET |
@@ -648,7 +646,7 @@ static void clocksource_enqueue(struct clocksource *cs) | |||
648 | } | 646 | } |
649 | 647 | ||
650 | /** | 648 | /** |
651 | * __clocksource_updatefreq_scale - Used update clocksource with new freq | 649 | * __clocksource_update_freq_scale - Used update clocksource with new freq |
652 | * @cs: clocksource to be registered | 650 | * @cs: clocksource to be registered |
653 | * @scale: Scale factor multiplied against freq to get clocksource hz | 651 | * @scale: Scale factor multiplied against freq to get clocksource hz |
654 | * @freq: clocksource frequency (cycles per second) divided by scale | 652 | * @freq: clocksource frequency (cycles per second) divided by scale |
@@ -656,48 +654,64 @@ static void clocksource_enqueue(struct clocksource *cs) | |||
656 | * This should only be called from the clocksource->enable() method. | 654 | * This should only be called from the clocksource->enable() method. |
657 | * | 655 | * |
658 | * This *SHOULD NOT* be called directly! Please use the | 656 | * This *SHOULD NOT* be called directly! Please use the |
659 | * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions. | 657 | * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper |
658 | * functions. | ||
660 | */ | 659 | */ |
661 | void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) | 660 | void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq) |
662 | { | 661 | { |
663 | u64 sec; | 662 | u64 sec; |
663 | |||
664 | /* | 664 | /* |
665 | * Calc the maximum number of seconds which we can run before | 665 | * Default clocksources are *special* and self-define their mult/shift. |
666 | * wrapping around. For clocksources which have a mask > 32bit | 666 | * But, you're not special, so you should specify a freq value. |
667 | * we need to limit the max sleep time to have a good | ||
668 | * conversion precision. 10 minutes is still a reasonable | ||
669 | * amount. That results in a shift value of 24 for a | ||
670 | * clocksource with mask >= 40bit and f >= 4GHz. That maps to | ||
671 | * ~ 0.06ppm granularity for NTP. We apply the same 12.5% | ||
672 | * margin as we do in clocksource_max_deferment() | ||
673 | */ | 667 | */ |
674 | sec = (cs->mask - (cs->mask >> 3)); | 668 | if (freq) { |
675 | do_div(sec, freq); | 669 | /* |
676 | do_div(sec, scale); | 670 | * Calc the maximum number of seconds which we can run before |
677 | if (!sec) | 671 | * wrapping around. For clocksources which have a mask > 32-bit |
678 | sec = 1; | 672 | * we need to limit the max sleep time to have a good |
679 | else if (sec > 600 && cs->mask > UINT_MAX) | 673 | * conversion precision. 10 minutes is still a reasonable |
680 | sec = 600; | 674 | * amount. That results in a shift value of 24 for a |
681 | 675 | * clocksource with mask >= 40-bit and f >= 4GHz. That maps to | |
682 | clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, | 676 | * ~ 0.06ppm granularity for NTP. |
683 | NSEC_PER_SEC / scale, sec * scale); | 677 | */ |
684 | 678 | sec = cs->mask; | |
679 | do_div(sec, freq); | ||
680 | do_div(sec, scale); | ||
681 | if (!sec) | ||
682 | sec = 1; | ||
683 | else if (sec > 600 && cs->mask > UINT_MAX) | ||
684 | sec = 600; | ||
685 | |||
686 | clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, | ||
687 | NSEC_PER_SEC / scale, sec * scale); | ||
688 | } | ||
685 | /* | 689 | /* |
686 | * for clocksources that have large mults, to avoid overflow. | 690 | * Ensure clocksources that have large 'mult' values don't overflow |
687 | * Since mult may be adjusted by ntp, add an safety extra margin | 691 | * when adjusted. |
688 | * | ||
689 | */ | 692 | */ |
690 | cs->maxadj = clocksource_max_adjustment(cs); | 693 | cs->maxadj = clocksource_max_adjustment(cs); |
691 | while ((cs->mult + cs->maxadj < cs->mult) | 694 | while (freq && ((cs->mult + cs->maxadj < cs->mult) |
692 | || (cs->mult - cs->maxadj > cs->mult)) { | 695 | || (cs->mult - cs->maxadj > cs->mult))) { |
693 | cs->mult >>= 1; | 696 | cs->mult >>= 1; |
694 | cs->shift--; | 697 | cs->shift--; |
695 | cs->maxadj = clocksource_max_adjustment(cs); | 698 | cs->maxadj = clocksource_max_adjustment(cs); |
696 | } | 699 | } |
697 | 700 | ||
698 | cs->max_idle_ns = clocksource_max_deferment(cs); | 701 | /* |
702 | * Only warn for *special* clocksources that self-define | ||
703 | * their mult/shift values and don't specify a freq. | ||
704 | */ | ||
705 | WARN_ONCE(cs->mult + cs->maxadj < cs->mult, | ||
706 | "timekeeping: Clocksource %s might overflow on 11%% adjustment\n", | ||
707 | cs->name); | ||
708 | |||
709 | clocksource_update_max_deferment(cs); | ||
710 | |||
711 | pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n", | ||
712 | cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns); | ||
699 | } | 713 | } |
700 | EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); | 714 | EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale); |
701 | 715 | ||
702 | /** | 716 | /** |
703 | * __clocksource_register_scale - Used to install new clocksources | 717 | * __clocksource_register_scale - Used to install new clocksources |
@@ -714,7 +728,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) | |||
714 | { | 728 | { |
715 | 729 | ||
716 | /* Initialize mult/shift and max_idle_ns */ | 730 | /* Initialize mult/shift and max_idle_ns */ |
717 | __clocksource_updatefreq_scale(cs, scale, freq); | 731 | __clocksource_update_freq_scale(cs, scale, freq); |
718 | 732 | ||
719 | /* Add clocksource to the clocksource list */ | 733 | /* Add clocksource to the clocksource list */ |
720 | mutex_lock(&clocksource_mutex); | 734 | mutex_lock(&clocksource_mutex); |
@@ -726,33 +740,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) | |||
726 | } | 740 | } |
727 | EXPORT_SYMBOL_GPL(__clocksource_register_scale); | 741 | EXPORT_SYMBOL_GPL(__clocksource_register_scale); |
728 | 742 | ||
729 | |||
730 | /** | ||
731 | * clocksource_register - Used to install new clocksources | ||
732 | * @cs: clocksource to be registered | ||
733 | * | ||
734 | * Returns -EBUSY if registration fails, zero otherwise. | ||
735 | */ | ||
736 | int clocksource_register(struct clocksource *cs) | ||
737 | { | ||
738 | /* calculate max adjustment for given mult/shift */ | ||
739 | cs->maxadj = clocksource_max_adjustment(cs); | ||
740 | WARN_ONCE(cs->mult + cs->maxadj < cs->mult, | ||
741 | "Clocksource %s might overflow on 11%% adjustment\n", | ||
742 | cs->name); | ||
743 | |||
744 | /* calculate max idle time permitted for this clocksource */ | ||
745 | cs->max_idle_ns = clocksource_max_deferment(cs); | ||
746 | |||
747 | mutex_lock(&clocksource_mutex); | ||
748 | clocksource_enqueue(cs); | ||
749 | clocksource_enqueue_watchdog(cs); | ||
750 | clocksource_select(); | ||
751 | mutex_unlock(&clocksource_mutex); | ||
752 | return 0; | ||
753 | } | ||
754 | EXPORT_SYMBOL(clocksource_register); | ||
755 | |||
756 | static void __clocksource_change_rating(struct clocksource *cs, int rating) | 743 | static void __clocksource_change_rating(struct clocksource *cs, int rating) |
757 | { | 744 | { |
758 | list_del(&cs->list); | 745 | list_del(&cs->list); |