aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2016-12-13 08:14:17 -0500
committerThomas Gleixner <tglx@linutronix.de>2016-12-15 05:44:29 -0500
commit5bae156241e05d25171b18ee43e49f103c3f8097 (patch)
tree62f1033ced7a668a129800112f3d4c958008228f
parent6a369583178d0b89c2c3919c4456ee22fee0f249 (diff)
x86/tsc: Force TSC_ADJUST register to value >= zero
Roland reported that his DELL T5810 sports a value add BIOS which completely wreckages the TSC. The squirmware [(TM) Ingo Molnar] boots with random negative TSC_ADJUST values, different on all CPUs. That renders the TSC useless because the sycnchronization check fails. Roland tested the new TSC_ADJUST mechanism. While it manages to readjust the TSCs he needs to disable the TSC deadline timer, otherwise the machine just stops booting. Deeper investigation unearthed that the TSC deadline timer is sensitive to the TSC_ADJUST value. Writing TSC_ADJUST to a negative value results in an interrupt storm caused by the TSC deadline timer. This does not make any sense and it's hard to imagine what kind of hardware wreckage is behind that misfeature, but it's reliably reproducible on other systems which have TSC_ADJUST and TSC deadline timer. While it would be understandable that a big enough negative value which moves the resulting TSC readout into the negative space could have the described effect, this happens even with a adjust value of -1, which keeps the TSC readout definitely in the positive space. The compare register for the TSC deadline timer is set to a positive value larger than the TSC, but despite not having reached the deadline the interrupt is raised immediately. If this happens on the boot CPU, then the machine dies silently because this setup happens before the NMI watchdog is armed. Further experiments showed that any other adjustment of TSC_ADJUST works as expected as long as it stays in the positive range. The direction of the adjustment has no influence either. See the lkml link for further analysis. Yet another proof for the theory that timers are designed by janitors and the underlying (obviously undocumented) mechanisms which allow BIOSes to wreckage them are considered a feature. Well done Intel - NOT! To address this wreckage add the following sanity measures: - If the TSC_ADJUST value on the boot cpu is not 0, set it to 0 - If the TSC_ADJUST value on any cpu is negative, set it to 0 - Prevent the cross package synchronization mechanism from setting negative TSC_ADJUST values. Reported-and-tested-by: Roland Scheidegger <rscheidegger_lists@hispeed.ch> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Bruce Schlobohm <bruce.schlobohm@intel.com> Cc: Kevin Stanton <kevin.b.stanton@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Allen Hung <allen_hung@dell.com> Cc: Borislav Petkov <bp@alien8.de> Link: http://lkml.kernel.org/r/20161213131211.397588033@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/include/asm/tsc.h4
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/tsc_sync.c55
3 files changed, 42 insertions, 19 deletions
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 372ad0cd1357..abb1fdcc545a 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -46,12 +46,12 @@ extern int tsc_clocksource_reliable;
46 * all CPUs/cores: 46 * all CPUs/cores:
47 */ 47 */
48#ifdef CONFIG_X86_TSC 48#ifdef CONFIG_X86_TSC
49extern bool tsc_store_and_check_tsc_adjust(void); 49extern bool tsc_store_and_check_tsc_adjust(bool bootcpu);
50extern void tsc_verify_tsc_adjust(bool resume); 50extern void tsc_verify_tsc_adjust(bool resume);
51extern void check_tsc_sync_source(int cpu); 51extern void check_tsc_sync_source(int cpu);
52extern void check_tsc_sync_target(void); 52extern void check_tsc_sync_target(void);
53#else 53#else
54static inline bool tsc_store_and_check_tsc_adjust(void) { return false; } 54static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; }
55static inline void tsc_verify_tsc_adjust(bool resume) { } 55static inline void tsc_verify_tsc_adjust(bool resume) { }
56static inline void check_tsc_sync_source(int cpu) { } 56static inline void check_tsc_sync_source(int cpu) { }
57static inline void check_tsc_sync_target(void) { } 57static inline void check_tsc_sync_target(void) { }
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index bfb541a5bb48..0aed75a1e31b 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1386,7 +1386,7 @@ void __init tsc_init(void)
1386 if (unsynchronized_tsc()) 1386 if (unsynchronized_tsc())
1387 mark_tsc_unstable("TSCs unsynchronized"); 1387 mark_tsc_unstable("TSCs unsynchronized");
1388 else 1388 else
1389 tsc_store_and_check_tsc_adjust(); 1389 tsc_store_and_check_tsc_adjust(true);
1390 1390
1391 check_system_tsc_reliable(); 1391 check_system_tsc_reliable();
1392 1392
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 94f2ce5fb159..9151f0ce6a42 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -58,8 +58,33 @@ void tsc_verify_tsc_adjust(bool resume)
58 } 58 }
59} 59}
60 60
61static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
62 unsigned int cpu, bool bootcpu)
63{
64 /*
65 * First online CPU in a package stores the boot value in the
66 * adjustment value. This value might change later via the sync
67 * mechanism. If that fails we still can yell about boot values not
68 * being consistent.
69 *
70 * On the boot cpu we just force set the ADJUST value to 0 if it's
71 * non zero. We don't do that on non boot cpus because physical
72 * hotplug should have set the ADJUST register to a value > 0 so
73 * the TSC is in sync with the already running cpus.
74 *
75 * But we always force positive ADJUST values. Otherwise the TSC
76 * deadline timer creates an interrupt storm. Sigh!
77 */
78 if ((bootcpu && bootval != 0) || (!bootcpu && bootval < 0)) {
79 pr_warn("TSC ADJUST: CPU%u: %lld force to 0\n", cpu, bootval);
80 wrmsrl(MSR_IA32_TSC_ADJUST, 0);
81 bootval = 0;
82 }
83 cur->adjusted = bootval;
84}
85
61#ifndef CONFIG_SMP 86#ifndef CONFIG_SMP
62bool __init tsc_store_and_check_tsc_adjust(void) 87bool __init tsc_store_and_check_tsc_adjust(bool bootcpu)
63{ 88{
64 struct tsc_adjust *cur = this_cpu_ptr(&tsc_adjust); 89 struct tsc_adjust *cur = this_cpu_ptr(&tsc_adjust);
65 s64 bootval; 90 s64 bootval;
@@ -69,9 +94,8 @@ bool __init tsc_store_and_check_tsc_adjust(void)
69 94
70 rdmsrl(MSR_IA32_TSC_ADJUST, bootval); 95 rdmsrl(MSR_IA32_TSC_ADJUST, bootval);
71 cur->bootval = bootval; 96 cur->bootval = bootval;
72 cur->adjusted = bootval;
73 cur->nextcheck = jiffies + HZ; 97 cur->nextcheck = jiffies + HZ;
74 pr_info("TSC ADJUST: Boot CPU0: %lld\n", bootval); 98 tsc_sanitize_first_cpu(cur, bootval, smp_processor_id(), bootcpu);
75 return false; 99 return false;
76} 100}
77 101
@@ -80,7 +104,7 @@ bool __init tsc_store_and_check_tsc_adjust(void)
80/* 104/*
81 * Store and check the TSC ADJUST MSR if available 105 * Store and check the TSC ADJUST MSR if available
82 */ 106 */
83bool tsc_store_and_check_tsc_adjust(void) 107bool tsc_store_and_check_tsc_adjust(bool bootcpu)
84{ 108{
85 struct tsc_adjust *ref, *cur = this_cpu_ptr(&tsc_adjust); 109 struct tsc_adjust *ref, *cur = this_cpu_ptr(&tsc_adjust);
86 unsigned int refcpu, cpu = smp_processor_id(); 110 unsigned int refcpu, cpu = smp_processor_id();
@@ -98,22 +122,16 @@ bool tsc_store_and_check_tsc_adjust(void)
98 /* 122 /*
99 * Check whether this CPU is the first in a package to come up. In 123 * Check whether this CPU is the first in a package to come up. In
100 * this case do not check the boot value against another package 124 * this case do not check the boot value against another package
101 * because the package might have been physically hotplugged, where 125 * because the new package might have been physically hotplugged,
102 * TSC_ADJUST is expected to be different. When called on the boot 126 * where TSC_ADJUST is expected to be different. When called on the
103 * CPU topology_core_cpumask() might not be available yet. 127 * boot CPU topology_core_cpumask() might not be available yet.
104 */ 128 */
105 mask = topology_core_cpumask(cpu); 129 mask = topology_core_cpumask(cpu);
106 refcpu = mask ? cpumask_any_but(mask, cpu) : nr_cpu_ids; 130 refcpu = mask ? cpumask_any_but(mask, cpu) : nr_cpu_ids;
107 131
108 if (refcpu >= nr_cpu_ids) { 132 if (refcpu >= nr_cpu_ids) {
109 /* 133 tsc_sanitize_first_cpu(cur, bootval, smp_processor_id(),
110 * First online CPU in a package stores the boot value in 134 bootcpu);
111 * the adjustment value. This value might change later via
112 * the sync mechanism. If that fails we still can yell
113 * about boot values not being consistent.
114 */
115 cur->adjusted = bootval;
116 pr_info_once("TSC ADJUST: Boot CPU%u: %lld\n", cpu, bootval);
117 return false; 135 return false;
118 } 136 }
119 137
@@ -366,7 +384,7 @@ void check_tsc_sync_target(void)
366 * Store, verify and sanitize the TSC adjust register. If 384 * Store, verify and sanitize the TSC adjust register. If
367 * successful skip the test. 385 * successful skip the test.
368 */ 386 */
369 if (tsc_store_and_check_tsc_adjust()) { 387 if (tsc_store_and_check_tsc_adjust(false)) {
370 atomic_inc(&skip_test); 388 atomic_inc(&skip_test);
371 return; 389 return;
372 } 390 }
@@ -429,8 +447,13 @@ retry:
429 * that the warp is not longer detectable when the observed warp 447 * that the warp is not longer detectable when the observed warp
430 * value is used. In the worst case the adjustment needs to go 448 * value is used. In the worst case the adjustment needs to go
431 * through a 3rd run for fine tuning. 449 * through a 3rd run for fine tuning.
450 *
451 * But we must make sure that the value doesn't become negative
452 * otherwise TSC deadline timer will create an interrupt storm.
432 */ 453 */
433 cur->adjusted += cur_max_warp; 454 cur->adjusted += cur_max_warp;
455 if (cur->adjusted < 0)
456 cur->adjusted = 0;
434 457
435 pr_warn("TSC ADJUST compensate: CPU%u observed %lld warp. Adjust: %lld\n", 458 pr_warn("TSC ADJUST compensate: CPU%u observed %lld warp. Adjust: %lld\n",
436 cpu, cur_max_warp, cur->adjusted); 459 cpu, cur_max_warp, cur->adjusted);