aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Vacek <neelx@redhat.com>2018-11-05 12:10:40 -0500
committerThomas Gleixner <tglx@linutronix.de>2018-11-06 15:53:15 -0500
commita786ef152cdcfebc923a67f63c7815806eefcf81 (patch)
tree3a29cf70452df09bb81b51b867e1ca5e37f25e09
parent651022382c7f8da46cb4872a545ee1da6d097d2a (diff)
x86/tsc: Make calibration refinement more robust
The threshold in tsc_read_refs() is constant which may favor slower CPUs but may not be optimal for simple reading of reference on faster ones. Hence make it proportional to tsc_khz when available to compensate for this. The threshold guards against any disturbance like IRQs, NMIs, SMIs or CPU stealing by host on guest systems so rename it accordingly and fix comments as well. Also on some systems there is noticeable DMI bus contention at some point during boot keeping the readout failing (observed with about one in ~300 boots when testing). In that case retry also the second readout instead of simply bailing out unrefined. Usually the next second the readout returns fast just fine without any issues. Signed-off-by: Daniel Vacek <neelx@redhat.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Borislav Petkov <bp@alien8.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Link: https://lkml.kernel.org/r/1541437840-29293-1-git-send-email-neelx@redhat.com
-rw-r--r--arch/x86/kernel/tsc.c30
1 files changed, 16 insertions, 14 deletions
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index e9f777bfed40..3fae23834069 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -297,15 +297,16 @@ static int __init tsc_setup(char *str)
297 297
298__setup("tsc=", tsc_setup); 298__setup("tsc=", tsc_setup);
299 299
300#define MAX_RETRIES 5 300#define MAX_RETRIES 5
301#define SMI_TRESHOLD 50000 301#define TSC_DEFAULT_THRESHOLD 0x20000
302 302
303/* 303/*
304 * Read TSC and the reference counters. Take care of SMI disturbance 304 * Read TSC and the reference counters. Take care of any disturbances
305 */ 305 */
306static u64 tsc_read_refs(u64 *p, int hpet) 306static u64 tsc_read_refs(u64 *p, int hpet)
307{ 307{
308 u64 t1, t2; 308 u64 t1, t2;
309 u64 thresh = tsc_khz ? tsc_khz >> 5 : TSC_DEFAULT_THRESHOLD;
309 int i; 310 int i;
310 311
311 for (i = 0; i < MAX_RETRIES; i++) { 312 for (i = 0; i < MAX_RETRIES; i++) {
@@ -315,7 +316,7 @@ static u64 tsc_read_refs(u64 *p, int hpet)
315 else 316 else
316 *p = acpi_pm_read_early(); 317 *p = acpi_pm_read_early();
317 t2 = get_cycles(); 318 t2 = get_cycles();
318 if ((t2 - t1) < SMI_TRESHOLD) 319 if ((t2 - t1) < thresh)
319 return t2; 320 return t2;
320 } 321 }
321 return ULLONG_MAX; 322 return ULLONG_MAX;
@@ -703,15 +704,15 @@ static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
703 * zero. In each wait loop iteration we read the TSC and check 704 * zero. In each wait loop iteration we read the TSC and check
704 * the delta to the previous read. We keep track of the min 705 * the delta to the previous read. We keep track of the min
705 * and max values of that delta. The delta is mostly defined 706 * and max values of that delta. The delta is mostly defined
706 * by the IO time of the PIT access, so we can detect when a 707 * by the IO time of the PIT access, so we can detect when
707 * SMI/SMM disturbance happened between the two reads. If the 708 * any disturbance happened between the two reads. If the
708 * maximum time is significantly larger than the minimum time, 709 * maximum time is significantly larger than the minimum time,
709 * then we discard the result and have another try. 710 * then we discard the result and have another try.
710 * 711 *
711 * 2) Reference counter. If available we use the HPET or the 712 * 2) Reference counter. If available we use the HPET or the
712 * PMTIMER as a reference to check the sanity of that value. 713 * PMTIMER as a reference to check the sanity of that value.
713 * We use separate TSC readouts and check inside of the 714 * We use separate TSC readouts and check inside of the
714 * reference read for a SMI/SMM disturbance. We dicard 715 * reference read for any possible disturbance. We dicard
715 * disturbed values here as well. We do that around the PIT 716 * disturbed values here as well. We do that around the PIT
716 * calibration delay loop as we have to wait for a certain 717 * calibration delay loop as we have to wait for a certain
717 * amount of time anyway. 718 * amount of time anyway.
@@ -744,7 +745,7 @@ static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
744 if (ref1 == ref2) 745 if (ref1 == ref2)
745 continue; 746 continue;
746 747
747 /* Check, whether the sampling was disturbed by an SMI */ 748 /* Check, whether the sampling was disturbed */
748 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) 749 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
749 continue; 750 continue;
750 751
@@ -1268,7 +1269,7 @@ static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
1268 */ 1269 */
1269static void tsc_refine_calibration_work(struct work_struct *work) 1270static void tsc_refine_calibration_work(struct work_struct *work)
1270{ 1271{
1271 static u64 tsc_start = -1, ref_start; 1272 static u64 tsc_start = ULLONG_MAX, ref_start;
1272 static int hpet; 1273 static int hpet;
1273 u64 tsc_stop, ref_stop, delta; 1274 u64 tsc_stop, ref_stop, delta;
1274 unsigned long freq; 1275 unsigned long freq;
@@ -1283,14 +1284,15 @@ static void tsc_refine_calibration_work(struct work_struct *work)
1283 * delayed the first time we expire. So set the workqueue 1284 * delayed the first time we expire. So set the workqueue
1284 * again once we know timers are working. 1285 * again once we know timers are working.
1285 */ 1286 */
1286 if (tsc_start == -1) { 1287 if (tsc_start == ULLONG_MAX) {
1288restart:
1287 /* 1289 /*
1288 * Only set hpet once, to avoid mixing hardware 1290 * Only set hpet once, to avoid mixing hardware
1289 * if the hpet becomes enabled later. 1291 * if the hpet becomes enabled later.
1290 */ 1292 */
1291 hpet = is_hpet_enabled(); 1293 hpet = is_hpet_enabled();
1292 schedule_delayed_work(&tsc_irqwork, HZ);
1293 tsc_start = tsc_read_refs(&ref_start, hpet); 1294 tsc_start = tsc_read_refs(&ref_start, hpet);
1295 schedule_delayed_work(&tsc_irqwork, HZ);
1294 return; 1296 return;
1295 } 1297 }
1296 1298
@@ -1300,9 +1302,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)
1300 if (ref_start == ref_stop) 1302 if (ref_start == ref_stop)
1301 goto out; 1303 goto out;
1302 1304
1303 /* Check, whether the sampling was disturbed by an SMI */ 1305 /* Check, whether the sampling was disturbed */
1304 if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) 1306 if (tsc_stop == ULLONG_MAX)
1305 goto out; 1307 goto restart;
1306 1308
1307 delta = tsc_stop - tsc_start; 1309 delta = tsc_stop - tsc_start;
1308 delta *= 1000000LL; 1310 delta *= 1000000LL;