diff options
| -rw-r--r-- | include/linux/clocksource.h | 3 | ||||
| -rw-r--r-- | kernel/hrtimer.c | 6 | ||||
| -rw-r--r-- | kernel/time/clocksource.c | 58 | ||||
| -rw-r--r-- | kernel/time/timekeeping.c | 92 |
4 files changed, 145 insertions, 14 deletions
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 139c4db55f17..c86c940d1de3 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h | |||
| @@ -156,6 +156,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, | |||
| 156 | * @mult: cycle to nanosecond multiplier | 156 | * @mult: cycle to nanosecond multiplier |
| 157 | * @shift: cycle to nanosecond divisor (power of two) | 157 | * @shift: cycle to nanosecond divisor (power of two) |
| 158 | * @max_idle_ns: max idle time permitted by the clocksource (nsecs) | 158 | * @max_idle_ns: max idle time permitted by the clocksource (nsecs) |
| 159 | * @maxadj maximum adjustment value to mult (~11%) | ||
| 159 | * @flags: flags describing special properties | 160 | * @flags: flags describing special properties |
| 160 | * @archdata: arch-specific data | 161 | * @archdata: arch-specific data |
| 161 | * @suspend: suspend function for the clocksource, if necessary | 162 | * @suspend: suspend function for the clocksource, if necessary |
| @@ -172,7 +173,7 @@ struct clocksource { | |||
| 172 | u32 mult; | 173 | u32 mult; |
| 173 | u32 shift; | 174 | u32 shift; |
| 174 | u64 max_idle_ns; | 175 | u64 max_idle_ns; |
| 175 | 176 | u32 maxadj; | |
| 176 | #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA | 177 | #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA |
| 177 | struct arch_clocksource_data archdata; | 178 | struct arch_clocksource_data archdata; |
| 178 | #endif | 179 | #endif |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 422e567eecf6..ae34bf51682b 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
| @@ -885,10 +885,13 @@ static void __remove_hrtimer(struct hrtimer *timer, | |||
| 885 | struct hrtimer_clock_base *base, | 885 | struct hrtimer_clock_base *base, |
| 886 | unsigned long newstate, int reprogram) | 886 | unsigned long newstate, int reprogram) |
| 887 | { | 887 | { |
| 888 | struct timerqueue_node *next_timer; | ||
| 888 | if (!(timer->state & HRTIMER_STATE_ENQUEUED)) | 889 | if (!(timer->state & HRTIMER_STATE_ENQUEUED)) |
| 889 | goto out; | 890 | goto out; |
| 890 | 891 | ||
| 891 | if (&timer->node == timerqueue_getnext(&base->active)) { | 892 | next_timer = timerqueue_getnext(&base->active); |
| 893 | timerqueue_del(&base->active, &timer->node); | ||
| 894 | if (&timer->node == next_timer) { | ||
| 892 | #ifdef CONFIG_HIGH_RES_TIMERS | 895 | #ifdef CONFIG_HIGH_RES_TIMERS |
| 893 | /* Reprogram the clock event device. if enabled */ | 896 | /* Reprogram the clock event device. if enabled */ |
| 894 | if (reprogram && hrtimer_hres_active()) { | 897 | if (reprogram && hrtimer_hres_active()) { |
| @@ -901,7 +904,6 @@ static void __remove_hrtimer(struct hrtimer *timer, | |||
| 901 | } | 904 | } |
| 902 | #endif | 905 | #endif |
| 903 | } | 906 | } |
| 904 | timerqueue_del(&base->active, &timer->node); | ||
| 905 | if (!timerqueue_getnext(&base->active)) | 907 | if (!timerqueue_getnext(&base->active)) |
| 906 | base->cpu_base->active_bases &= ~(1 << base->index); | 908 | base->cpu_base->active_bases &= ~(1 << base->index); |
| 907 | out: | 909 | out: |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index cf52fda2e096..cfc65e1eb9fb 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
| @@ -492,6 +492,22 @@ void clocksource_touch_watchdog(void) | |||
| 492 | } | 492 | } |
| 493 | 493 | ||
| 494 | /** | 494 | /** |
| 495 | * clocksource_max_adjustment- Returns max adjustment amount | ||
| 496 | * @cs: Pointer to clocksource | ||
| 497 | * | ||
| 498 | */ | ||
| 499 | static u32 clocksource_max_adjustment(struct clocksource *cs) | ||
| 500 | { | ||
| 501 | u64 ret; | ||
| 502 | /* | ||
| 503 | * We won't try to correct for more then 11% adjustments (110,000 ppm), | ||
| 504 | */ | ||
| 505 | ret = (u64)cs->mult * 11; | ||
| 506 | do_div(ret,100); | ||
| 507 | return (u32)ret; | ||
| 508 | } | ||
| 509 | |||
| 510 | /** | ||
| 495 | * clocksource_max_deferment - Returns max time the clocksource can be deferred | 511 | * clocksource_max_deferment - Returns max time the clocksource can be deferred |
| 496 | * @cs: Pointer to clocksource | 512 | * @cs: Pointer to clocksource |
| 497 | * | 513 | * |
| @@ -503,25 +519,28 @@ static u64 clocksource_max_deferment(struct clocksource *cs) | |||
| 503 | /* | 519 | /* |
| 504 | * Calculate the maximum number of cycles that we can pass to the | 520 | * Calculate the maximum number of cycles that we can pass to the |
| 505 | * cyc2ns function without overflowing a 64-bit signed result. The | 521 | * cyc2ns function without overflowing a 64-bit signed result. The |
| 506 | * maximum number of cycles is equal to ULLONG_MAX/cs->mult which | 522 | * maximum number of cycles is equal to ULLONG_MAX/(cs->mult+cs->maxadj) |
| 507 | * is equivalent to the below. | 523 | * which is equivalent to the below. |
| 508 | * max_cycles < (2^63)/cs->mult | 524 | * max_cycles < (2^63)/(cs->mult + cs->maxadj) |
| 509 | * max_cycles < 2^(log2((2^63)/cs->mult)) | 525 | * max_cycles < 2^(log2((2^63)/(cs->mult + cs->maxadj))) |
| 510 | * max_cycles < 2^(log2(2^63) - log2(cs->mult)) | 526 | * max_cycles < 2^(log2(2^63) - log2(cs->mult + cs->maxadj)) |
| 511 | * max_cycles < 2^(63 - log2(cs->mult)) | 527 | * max_cycles < 2^(63 - log2(cs->mult + cs->maxadj)) |
| 512 | * max_cycles < 1 << (63 - log2(cs->mult)) | 528 | * max_cycles < 1 << (63 - log2(cs->mult + cs->maxadj)) |
| 513 | * Please note that we add 1 to the result of the log2 to account for | 529 | * Please note that we add 1 to the result of the log2 to account for |
| 514 | * any rounding errors, ensure the above inequality is satisfied and | 530 | * any rounding errors, ensure the above inequality is satisfied and |
| 515 | * no overflow will occur. | 531 | * no overflow will occur. |
| 516 | */ | 532 | */ |
| 517 | max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1)); | 533 | max_cycles = 1ULL << (63 - (ilog2(cs->mult + cs->maxadj) + 1)); |
| 518 | 534 | ||
| 519 | /* | 535 | /* |
| 520 | * The actual maximum number of cycles we can defer the clocksource is | 536 | * The actual maximum number of cycles we can defer the clocksource is |
| 521 | * determined by the minimum of max_cycles and cs->mask. | 537 | * determined by the minimum of max_cycles and cs->mask. |
| 538 | * Note: Here we subtract the maxadj to make sure we don't sleep for | ||
| 539 | * too long if there's a large negative adjustment. | ||
| 522 | */ | 540 | */ |
| 523 | max_cycles = min_t(u64, max_cycles, (u64) cs->mask); | 541 | max_cycles = min_t(u64, max_cycles, (u64) cs->mask); |
| 524 | max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift); | 542 | max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult - cs->maxadj, |
| 543 | cs->shift); | ||
| 525 | 544 | ||
| 526 | /* | 545 | /* |
| 527 | * To ensure that the clocksource does not wrap whilst we are idle, | 546 | * To ensure that the clocksource does not wrap whilst we are idle, |
| @@ -640,7 +659,6 @@ static void clocksource_enqueue(struct clocksource *cs) | |||
| 640 | void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) | 659 | void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) |
| 641 | { | 660 | { |
| 642 | u64 sec; | 661 | u64 sec; |
| 643 | |||
| 644 | /* | 662 | /* |
| 645 | * Calc the maximum number of seconds which we can run before | 663 | * Calc the maximum number of seconds which we can run before |
| 646 | * wrapping around. For clocksources which have a mask > 32bit | 664 | * wrapping around. For clocksources which have a mask > 32bit |
| @@ -661,6 +679,20 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) | |||
| 661 | 679 | ||
| 662 | clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, | 680 | clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, |
| 663 | NSEC_PER_SEC / scale, sec * scale); | 681 | NSEC_PER_SEC / scale, sec * scale); |
| 682 | |||
| 683 | /* | ||
| 684 | * for clocksources that have large mults, to avoid overflow. | ||
| 685 | * Since mult may be adjusted by ntp, add an safety extra margin | ||
| 686 | * | ||
| 687 | */ | ||
| 688 | cs->maxadj = clocksource_max_adjustment(cs); | ||
| 689 | while ((cs->mult + cs->maxadj < cs->mult) | ||
| 690 | || (cs->mult - cs->maxadj > cs->mult)) { | ||
| 691 | cs->mult >>= 1; | ||
| 692 | cs->shift--; | ||
| 693 | cs->maxadj = clocksource_max_adjustment(cs); | ||
| 694 | } | ||
| 695 | |||
| 664 | cs->max_idle_ns = clocksource_max_deferment(cs); | 696 | cs->max_idle_ns = clocksource_max_deferment(cs); |
| 665 | } | 697 | } |
| 666 | EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); | 698 | EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); |
| @@ -701,6 +733,12 @@ EXPORT_SYMBOL_GPL(__clocksource_register_scale); | |||
| 701 | */ | 733 | */ |
| 702 | int clocksource_register(struct clocksource *cs) | 734 | int clocksource_register(struct clocksource *cs) |
| 703 | { | 735 | { |
| 736 | /* calculate max adjustment for given mult/shift */ | ||
| 737 | cs->maxadj = clocksource_max_adjustment(cs); | ||
| 738 | WARN_ONCE(cs->mult + cs->maxadj < cs->mult, | ||
| 739 | "Clocksource %s might overflow on 11%% adjustment\n", | ||
| 740 | cs->name); | ||
| 741 | |||
| 704 | /* calculate max idle time permitted for this clocksource */ | 742 | /* calculate max idle time permitted for this clocksource */ |
| 705 | cs->max_idle_ns = clocksource_max_deferment(cs); | 743 | cs->max_idle_ns = clocksource_max_deferment(cs); |
| 706 | 744 | ||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2b021b0e8507..237841378c03 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -249,6 +249,8 @@ ktime_t ktime_get(void) | |||
| 249 | secs = xtime.tv_sec + wall_to_monotonic.tv_sec; | 249 | secs = xtime.tv_sec + wall_to_monotonic.tv_sec; |
| 250 | nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec; | 250 | nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec; |
| 251 | nsecs += timekeeping_get_ns(); | 251 | nsecs += timekeeping_get_ns(); |
| 252 | /* If arch requires, add in gettimeoffset() */ | ||
| 253 | nsecs += arch_gettimeoffset(); | ||
| 252 | 254 | ||
| 253 | } while (read_seqretry(&xtime_lock, seq)); | 255 | } while (read_seqretry(&xtime_lock, seq)); |
| 254 | /* | 256 | /* |
| @@ -280,6 +282,8 @@ void ktime_get_ts(struct timespec *ts) | |||
| 280 | *ts = xtime; | 282 | *ts = xtime; |
| 281 | tomono = wall_to_monotonic; | 283 | tomono = wall_to_monotonic; |
| 282 | nsecs = timekeeping_get_ns(); | 284 | nsecs = timekeeping_get_ns(); |
| 285 | /* If arch requires, add in gettimeoffset() */ | ||
| 286 | nsecs += arch_gettimeoffset(); | ||
| 283 | 287 | ||
| 284 | } while (read_seqretry(&xtime_lock, seq)); | 288 | } while (read_seqretry(&xtime_lock, seq)); |
| 285 | 289 | ||
| @@ -802,14 +806,44 @@ static void timekeeping_adjust(s64 offset) | |||
| 802 | s64 error, interval = timekeeper.cycle_interval; | 806 | s64 error, interval = timekeeper.cycle_interval; |
| 803 | int adj; | 807 | int adj; |
| 804 | 808 | ||
| 809 | /* | ||
| 810 | * The point of this is to check if the error is greater then half | ||
| 811 | * an interval. | ||
| 812 | * | ||
| 813 | * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs. | ||
| 814 | * | ||
| 815 | * Note we subtract one in the shift, so that error is really error*2. | ||
| 816 | * This "saves" dividing(shifting) intererval twice, but keeps the | ||
| 817 | * (error > interval) comparision as still measuring if error is | ||
| 818 | * larger then half an interval. | ||
| 819 | * | ||
| 820 | * Note: It does not "save" on aggrivation when reading the code. | ||
| 821 | */ | ||
| 805 | error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1); | 822 | error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1); |
| 806 | if (error > interval) { | 823 | if (error > interval) { |
| 824 | /* | ||
| 825 | * We now divide error by 4(via shift), which checks if | ||
| 826 | * the error is greater then twice the interval. | ||
| 827 | * If it is greater, we need a bigadjust, if its smaller, | ||
| 828 | * we can adjust by 1. | ||
| 829 | */ | ||
| 807 | error >>= 2; | 830 | error >>= 2; |
| 831 | /* | ||
| 832 | * XXX - In update_wall_time, we round up to the next | ||
| 833 | * nanosecond, and store the amount rounded up into | ||
| 834 | * the error. This causes the likely below to be unlikely. | ||
| 835 | * | ||
| 836 | * The properfix is to avoid rounding up by using | ||
| 837 | * the high precision timekeeper.xtime_nsec instead of | ||
| 838 | * xtime.tv_nsec everywhere. Fixing this will take some | ||
| 839 | * time. | ||
| 840 | */ | ||
| 808 | if (likely(error <= interval)) | 841 | if (likely(error <= interval)) |
| 809 | adj = 1; | 842 | adj = 1; |
| 810 | else | 843 | else |
| 811 | adj = timekeeping_bigadjust(error, &interval, &offset); | 844 | adj = timekeeping_bigadjust(error, &interval, &offset); |
| 812 | } else if (error < -interval) { | 845 | } else if (error < -interval) { |
| 846 | /* See comment above, this is just switched for the negative */ | ||
| 813 | error >>= 2; | 847 | error >>= 2; |
| 814 | if (likely(error >= -interval)) { | 848 | if (likely(error >= -interval)) { |
| 815 | adj = -1; | 849 | adj = -1; |
| @@ -817,9 +851,65 @@ static void timekeeping_adjust(s64 offset) | |||
| 817 | offset = -offset; | 851 | offset = -offset; |
| 818 | } else | 852 | } else |
| 819 | adj = timekeeping_bigadjust(error, &interval, &offset); | 853 | adj = timekeeping_bigadjust(error, &interval, &offset); |
| 820 | } else | 854 | } else /* No adjustment needed */ |
| 821 | return; | 855 | return; |
| 822 | 856 | ||
| 857 | WARN_ONCE(timekeeper.clock->maxadj && | ||
| 858 | (timekeeper.mult + adj > timekeeper.clock->mult + | ||
| 859 | timekeeper.clock->maxadj), | ||
| 860 | "Adjusting %s more then 11%% (%ld vs %ld)\n", | ||
| 861 | timekeeper.clock->name, (long)timekeeper.mult + adj, | ||
| 862 | (long)timekeeper.clock->mult + | ||
| 863 | timekeeper.clock->maxadj); | ||
| 864 | /* | ||
| 865 | * So the following can be confusing. | ||
| 866 | * | ||
| 867 | * To keep things simple, lets assume adj == 1 for now. | ||
| 868 | * | ||
| 869 | * When adj != 1, remember that the interval and offset values | ||
| 870 | * have been appropriately scaled so the math is the same. | ||
| 871 | * | ||
| 872 | * The basic idea here is that we're increasing the multiplier | ||
| 873 | * by one, this causes the xtime_interval to be incremented by | ||
| 874 | * one cycle_interval. This is because: | ||
| 875 | * xtime_interval = cycle_interval * mult | ||
| 876 | * So if mult is being incremented by one: | ||
| 877 | * xtime_interval = cycle_interval * (mult + 1) | ||
| 878 | * Its the same as: | ||
| 879 | * xtime_interval = (cycle_interval * mult) + cycle_interval | ||
| 880 | * Which can be shortened to: | ||
| 881 | * xtime_interval += cycle_interval | ||
| 882 | * | ||
| 883 | * So offset stores the non-accumulated cycles. Thus the current | ||
| 884 | * time (in shifted nanoseconds) is: | ||
| 885 | * now = (offset * adj) + xtime_nsec | ||
| 886 | * Now, even though we're adjusting the clock frequency, we have | ||
| 887 | * to keep time consistent. In other words, we can't jump back | ||
| 888 | * in time, and we also want to avoid jumping forward in time. | ||
| 889 | * | ||
| 890 | * So given the same offset value, we need the time to be the same | ||
| 891 | * both before and after the freq adjustment. | ||
| 892 | * now = (offset * adj_1) + xtime_nsec_1 | ||
| 893 | * now = (offset * adj_2) + xtime_nsec_2 | ||
| 894 | * So: | ||
| 895 | * (offset * adj_1) + xtime_nsec_1 = | ||
| 896 | * (offset * adj_2) + xtime_nsec_2 | ||
| 897 | * And we know: | ||
| 898 | * adj_2 = adj_1 + 1 | ||
| 899 | * So: | ||
| 900 | * (offset * adj_1) + xtime_nsec_1 = | ||
| 901 | * (offset * (adj_1+1)) + xtime_nsec_2 | ||
| 902 | * (offset * adj_1) + xtime_nsec_1 = | ||
| 903 | * (offset * adj_1) + offset + xtime_nsec_2 | ||
| 904 | * Canceling the sides: | ||
| 905 | * xtime_nsec_1 = offset + xtime_nsec_2 | ||
| 906 | * Which gives us: | ||
| 907 | * xtime_nsec_2 = xtime_nsec_1 - offset | ||
| 908 | * Which simplfies to: | ||
| 909 | * xtime_nsec -= offset | ||
| 910 | * | ||
| 911 | * XXX - TODO: Doc ntp_error calculation. | ||
| 912 | */ | ||
| 823 | timekeeper.mult += adj; | 913 | timekeeper.mult += adj; |
| 824 | timekeeper.xtime_interval += interval; | 914 | timekeeper.xtime_interval += interval; |
| 825 | timekeeper.xtime_nsec -= offset; | 915 | timekeeper.xtime_nsec -= offset; |
