aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Stultz <johnstul@us.ibm.com>2010-07-27 20:00:00 -0400
committerJohn Stultz <john.stultz@linaro.org>2010-12-02 19:48:37 -0500
commit08ec0c58fb8a05d3191d5cb6f5d6f81adb419798 (patch)
tree9dd292575c8b349300ba837a8103cbd9235a5b4f
parentb0f969009f647cd473c5e559aeec9c4229d12f87 (diff)
x86: Improve TSC calibration using a delayed workqueue
Boot to boot the TSC calibration may vary by quite a large amount. While normal variance of 50-100ppm can easily be seen, the quick calibration code only requires 500ppm accuracy, which is the limit of what NTP can correct for. This can cause problems for systems being used as NTP servers, as every time they reboot it can take hours for them to calculate the new drift error caused by the calibration. The classic trade-off here is calibration accuracy vs slow boot times, as during the calibration nothing else can run. This patch uses a delayed workqueue to calibrate the TSC over the period of a second. This allows very accurate calibration (in my tests only varying by 1khz or 0.4ppm boot to boot). Additionally this refined calibration step does not block the boot process, and only delays the TSC clocksoure registration by a few seconds in early boot. If the refined calibration strays 1% from the early boot calibration value, the system will fall back to already calculated early boot calibration. Credit to Andi Kleen who suggested using a timer quite awhile back, but I dismissed it thinking the timer calibration would be done after the clocksource was registered (which would break things). Forgive me for my short-sightedness. This patch has worked very well in my testing, but TSC hardware is quite varied so it would probably be good to get some extended testing, possibly pushing inclusion out to 2.6.39. Signed-off-by: John Stultz <johnstul@us.ibm.com> LKML-Reference: <1289003985-29060-1-git-send-email-johnstul@us.ibm.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> CC: Thomas Gleixner <tglx@linutronix.de> CC: Ingo Molnar <mingo@elte.hu> CC: Martin Schwidefsky <schwidefsky@de.ibm.com> CC: Clark Williams <williams@redhat.com> CC: Andi Kleen <andi@firstfloor.org>
-rw-r--r--arch/x86/kernel/tsc.c86
1 files changed, 83 insertions, 3 deletions
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index bb64beb301d9..dc1393e7cbfb 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -888,7 +888,82 @@ __cpuinit int unsynchronized_tsc(void)
888 return 0; 888 return 0;
889} 889}
890 890
891static void __init init_tsc_clocksource(void) 891
892static void tsc_refine_calibration_work(struct work_struct *work);
893static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
894/**
895 * tsc_refine_calibration_work - Further refine tsc freq calibration
896 * @work - ignored.
897 *
898 * This functions uses delayed work over a period of a
899 * second to further refine the TSC freq value. Since this is
900 * timer based, instead of loop based, we don't block the boot
901 * process while this longer calibration is done.
902 *
903 * If there are any calibration anomolies (too many SMIs, etc),
904 * or the refined calibration is off by 1% of the fast early
905 * calibration, we throw out the new calibration and use the
906 * early calibration.
907 */
908static void tsc_refine_calibration_work(struct work_struct *work)
909{
910 static u64 tsc_start = -1, ref_start;
911 static int hpet;
912 u64 tsc_stop, ref_stop, delta;
913 unsigned long freq;
914
915 /* Don't bother refining TSC on unstable systems */
916 if (check_tsc_unstable())
917 goto out;
918
919 /*
920 * Since the work is started early in boot, we may be
921 * delayed the first time we expire. So set the workqueue
922 * again once we know timers are working.
923 */
924 if (tsc_start == -1) {
925 /*
926 * Only set hpet once, to avoid mixing hardware
927 * if the hpet becomes enabled later.
928 */
929 hpet = is_hpet_enabled();
930 schedule_delayed_work(&tsc_irqwork, HZ);
931 tsc_start = tsc_read_refs(&ref_start, hpet);
932 return;
933 }
934
935 tsc_stop = tsc_read_refs(&ref_stop, hpet);
936
937 /* hpet or pmtimer available ? */
938 if (!hpet && !ref_start && !ref_stop)
939 goto out;
940
941 /* Check, whether the sampling was disturbed by an SMI */
942 if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
943 goto out;
944
945 delta = tsc_stop - tsc_start;
946 delta *= 1000000LL;
947 if (hpet)
948 freq = calc_hpet_ref(delta, ref_start, ref_stop);
949 else
950 freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
951
952 /* Make sure we're within 1% */
953 if (abs(tsc_khz - freq) > tsc_khz/100)
954 goto out;
955
956 tsc_khz = freq;
957 printk(KERN_INFO "Refined TSC clocksource calibration: "
958 "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000,
959 (unsigned long)tsc_khz % 1000);
960
961out:
962 clocksource_register_khz(&clocksource_tsc, tsc_khz);
963}
964
965
966static int __init init_tsc_clocksource(void)
892{ 967{
893 if (tsc_clocksource_reliable) 968 if (tsc_clocksource_reliable)
894 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 969 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
@@ -897,8 +972,14 @@ static void __init init_tsc_clocksource(void)
897 clocksource_tsc.rating = 0; 972 clocksource_tsc.rating = 0;
898 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 973 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
899 } 974 }
900 clocksource_register_khz(&clocksource_tsc, tsc_khz); 975 schedule_delayed_work(&tsc_irqwork, 0);
976 return 0;
901} 977}
978/*
979 * We use device_initcall here, to ensure we run after the hpet
980 * is fully initialized, which may occur at fs_initcall time.
981 */
982device_initcall(init_tsc_clocksource);
902 983
903void __init tsc_init(void) 984void __init tsc_init(void)
904{ 985{
@@ -952,6 +1033,5 @@ void __init tsc_init(void)
952 mark_tsc_unstable("TSCs unsynchronized"); 1033 mark_tsc_unstable("TSCs unsynchronized");
953 1034
954 check_system_tsc_reliable(); 1035 check_system_tsc_reliable();
955 init_tsc_clocksource();
956} 1036}
957 1037