aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjohn stultz <johnstul@us.ibm.com>2009-10-02 19:17:53 -0400
committerIngo Molnar <mingo@elte.hu>2009-10-05 07:51:48 -0400
commita092ff0f90cae22b2ac8028ecd2c6f6c1a9e4601 (patch)
tree79ec451d0bcdf6c08e0bc210b4beed694fbbf4a9
parent8a0382f6fceaf0c6479e582e1054f36333ea3d24 (diff)
time: Implement logarithmic time accumulation
Accumulating one tick at a time works well unless we're using NOHZ. Then it can be an issue, since we may have to run through the loop a few thousand times, which can increase timer interrupt caused latency. The current solution was to accumulate in half-second intervals with NOHZ. This kept the number of loops down, however it did slightly change how we make NTP adjustments. While not an issue with NTPd users, as NTPd makes adjustments over a longer period of time, other adjtimex() users have noticed the half-second granularity with which we can apply frequency changes to the clock. For instance, if a application tries to apply a 100ppm frequency correction for 20ms to correct a 2us offset, with NOHZ they either get no correction, or a 50us correction. Now, there will always be some granularity error for applying frequency corrections. However with users sensitive to this error have seen a 50-500x increase with NOHZ compared to running without NOHZ. So I figured I'd try another approach then just simply increasing the interval. My approach is to consume the time interval logarithmically. This reduces the number of times through the loop needed keeping latency down, while still preserving the original granularity error for adjtimex() changes. Further, this change allows us to remove the xtime_cache code (patch to follow), as xtime is always within one tick of the current time, instead of the half-second updates it saw before. An earlier version of this patch has been shipping to x86 users in the RedHat MRG releases for awhile without issue, but I've reworked this version to be even more careful about avoiding possible overflows if the shift value gets too large. Signed-off-by: John Stultz <johnstul@us.ibm.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: John Kacur <jkacur@redhat.com> Cc: Clark Williams <williams@redhat.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Andrew Morton <akpm@linux-foundation.org> LKML-Reference: <1254525473.7741.88.camel@localhost.localdomain> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/timex.h4
-rw-r--r--kernel/time/timekeeping.c85
2 files changed, 60 insertions, 29 deletions
diff --git a/include/linux/timex.h b/include/linux/timex.h
index e6967d10d9e5..0c0ef7d4db7c 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -261,11 +261,7 @@ static inline int ntp_synced(void)
261 261
262#define NTP_SCALE_SHIFT 32 262#define NTP_SCALE_SHIFT 32
263 263
264#ifdef CONFIG_NO_HZ
265#define NTP_INTERVAL_FREQ (2)
266#else
267#define NTP_INTERVAL_FREQ (HZ) 264#define NTP_INTERVAL_FREQ (HZ)
268#endif
269#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) 265#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
270 266
271/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ 267/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index fb0f46fa1ecd..5fdd78e0858a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -721,6 +721,51 @@ static void timekeeping_adjust(s64 offset)
721 timekeeper.ntp_error_shift; 721 timekeeper.ntp_error_shift;
722} 722}
723 723
724
725/**
726 * logarithmic_accumulation - shifted accumulation of cycles
727 *
728 * This functions accumulates a shifted interval of cycles into
729 * into a shifted interval nanoseconds. Allows for O(log) accumulation
730 * loop.
731 *
732 * Returns the unconsumed cycles.
733 */
734static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
735{
736 u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
737
738 /* If the offset is smaller then a shifted interval, do nothing */
739 if (offset < timekeeper.cycle_interval<<shift)
740 return offset;
741
742 /* Accumulate one shifted interval */
743 offset -= timekeeper.cycle_interval << shift;
744 timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift;
745
746 timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
747 while (timekeeper.xtime_nsec >= nsecps) {
748 timekeeper.xtime_nsec -= nsecps;
749 xtime.tv_sec++;
750 second_overflow();
751 }
752
753 /* Accumulate into raw time */
754 raw_time.tv_nsec += timekeeper.raw_interval << shift;;
755 while (raw_time.tv_nsec >= NSEC_PER_SEC) {
756 raw_time.tv_nsec -= NSEC_PER_SEC;
757 raw_time.tv_sec++;
758 }
759
760 /* Accumulate error between NTP and clock interval */
761 timekeeper.ntp_error += tick_length << shift;
762 timekeeper.ntp_error -= timekeeper.xtime_interval <<
763 (timekeeper.ntp_error_shift + shift);
764
765 return offset;
766}
767
768
724/** 769/**
725 * update_wall_time - Uses the current clocksource to increment the wall time 770 * update_wall_time - Uses the current clocksource to increment the wall time
726 * 771 *
@@ -731,6 +776,7 @@ void update_wall_time(void)
731 struct clocksource *clock; 776 struct clocksource *clock;
732 cycle_t offset; 777 cycle_t offset;
733 u64 nsecs; 778 u64 nsecs;
779 int shift = 0, maxshift;
734 780
735 /* Make sure we're fully resumed: */ 781 /* Make sure we're fully resumed: */
736 if (unlikely(timekeeping_suspended)) 782 if (unlikely(timekeeping_suspended))
@@ -744,33 +790,22 @@ void update_wall_time(void)
744#endif 790#endif
745 timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift; 791 timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
746 792
747 /* normally this loop will run just once, however in the 793 /*
748 * case of lost or late ticks, it will accumulate correctly. 794 * With NO_HZ we may have to accumulate many cycle_intervals
795 * (think "ticks") worth of time at once. To do this efficiently,
796 * we calculate the largest doubling multiple of cycle_intervals
797 * that is smaller then the offset. We then accumulate that
798 * chunk in one go, and then try to consume the next smaller
799 * doubled multiple.
749 */ 800 */
801 shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
802 shift = max(0, shift);
803 /* Bound shift to one less then what overflows tick_length */
804 maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1;
805 shift = min(shift, maxshift);
750 while (offset >= timekeeper.cycle_interval) { 806 while (offset >= timekeeper.cycle_interval) {
751 u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; 807 offset = logarithmic_accumulation(offset, shift);
752 808 shift--;
753 /* accumulate one interval */
754 offset -= timekeeper.cycle_interval;
755 clock->cycle_last += timekeeper.cycle_interval;
756
757 timekeeper.xtime_nsec += timekeeper.xtime_interval;
758 if (timekeeper.xtime_nsec >= nsecps) {
759 timekeeper.xtime_nsec -= nsecps;
760 xtime.tv_sec++;
761 second_overflow();
762 }
763
764 raw_time.tv_nsec += timekeeper.raw_interval;
765 if (raw_time.tv_nsec >= NSEC_PER_SEC) {
766 raw_time.tv_nsec -= NSEC_PER_SEC;
767 raw_time.tv_sec++;
768 }
769
770 /* accumulate error between NTP and clock interval */
771 timekeeper.ntp_error += tick_length;
772 timekeeper.ntp_error -= timekeeper.xtime_interval <<
773 timekeeper.ntp_error_shift;
774 } 809 }
775 810
776 /* correct the clock when NTP error is too big */ 811 /* correct the clock when NTP error is too big */