aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-18 16:59:10 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-18 16:59:10 -0500
commitf7dd3b1734ea335fea01f103d48b3de26ea0d335 (patch)
tree02284dfc866bfab2d277d05512129dfcf182bb65
parent1bbb05f52055c8b2fc1cbb2ac272b011593172f9 (diff)
parent8c9b9d87b855226a823b41a77a05f42324497603 (diff)
Merge branch 'x86-timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer updates from Thomas Gleixner: "This is the last functional update from the tip tree for 4.10. It got delayed due to a newly reported and anlyzed variant of BIOS bug and the resulting wreckage: - Seperation of TSC being marked realiable and the fact that the platform provides the TSC frequency via CPUID/MSRs and making use for it for GOLDMONT. - TSC adjust MSR validation and sanitizing: The TSC adjust MSR contains the offset to the hardware counter. The sum of the adjust MSR and the counter is the TSC value which is read via RDTSC. On at least two machines from different vendors the BIOS sets the TSC adjust MSR to negative values. This happens on cold and warm boot. While on cold boot the offset is a few milliseconds, on warm boot it basically compensates the power on time of the system. The BIOSes are not even using the adjust MSR to set all CPUs in the package to the same offset. The offsets are different which renders the TSC unusable, What's worse is that the TSC deadline timer has a HW feature^Wbug. It malfunctions when the TSC adjust value is negative or greater equal 0x80000000 resulting in silent boot failures, hard lockups or non firing timers. This looks like some hardware internal 32/64bit issue with a sign extension problem. Intel has been silent so far on the issue. The update contains sanity checks and keeps the adjust register within working limits and in sync on the package. As it looks like this disease is spreading via BIOS crapware, we need to address this urgently as the boot failures are hard to debug for users" * 'x86-timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/tsc: Limit the adjust value further x86/tsc: Annotate printouts as firmware bug x86/tsc: Force TSC_ADJUST register to value >= zero x86/tsc: Validate TSC_ADJUST after resume x86/tsc: Validate cpumask pointer before accessing it x86/tsc: Fix broken CONFIG_X86_TSC=n build x86/tsc: Try to adjust TSC if sync test fails x86/tsc: Prepare warp test for TSC adjustment x86/tsc: Move sync cleanup to a safe place x86/tsc: Sync test only for the first cpu in a package x86/tsc: Verify TSC_ADJUST from idle x86/tsc: Store and check TSC ADJUST MSR x86/tsc: Detect random warps x86/tsc: Use X86_FEATURE_TSC_ADJUST in detect_art() x86/tsc: Finalize the split of the TSC_RELIABLE flag x86/tsc: Set TSC_KNOWN_FREQ and TSC_RELIABLE flags on Intel Atom SoCs x86/tsc: Mark Intel ATOM_GOLDMONT TSC reliable x86/tsc: Mark TSC frequency determined by CPUID as known x86/tsc: Add X86_FEATURE_TSC_KNOWN_FREQ flag
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/tsc.h9
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/process.c1
-rw-r--r--arch/x86/kernel/tsc.c42
-rw-r--r--arch/x86/kernel/tsc_msr.c19
-rw-r--r--arch/x86/kernel/tsc_sync.c290
-rw-r--r--arch/x86/platform/intel-mid/mfld.c9
-rw-r--r--arch/x86/platform/intel-mid/mrfld.c8
-rw-r--r--arch/x86/power/cpu.c1
10 files changed, 355 insertions, 27 deletions
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 59ac427960d4..6ccbf1aaa7ce 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -105,6 +105,7 @@
105#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ 105#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
106#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ 106#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
107#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ 107#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
108#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
108 109
109/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 110/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
110#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ 111#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 33b6365c22fe..abb1fdcc545a 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -45,8 +45,17 @@ extern int tsc_clocksource_reliable;
45 * Boot-time check whether the TSCs are synchronized across 45 * Boot-time check whether the TSCs are synchronized across
46 * all CPUs/cores: 46 * all CPUs/cores:
47 */ 47 */
48#ifdef CONFIG_X86_TSC
49extern bool tsc_store_and_check_tsc_adjust(bool bootcpu);
50extern void tsc_verify_tsc_adjust(bool resume);
48extern void check_tsc_sync_source(int cpu); 51extern void check_tsc_sync_source(int cpu);
49extern void check_tsc_sync_target(void); 52extern void check_tsc_sync_target(void);
53#else
54static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; }
55static inline void tsc_verify_tsc_adjust(bool resume) { }
56static inline void check_tsc_sync_source(int cpu) { }
57static inline void check_tsc_sync_target(void) { }
58#endif
50 59
51extern int notsc_setup(char *); 60extern int notsc_setup(char *);
52extern void tsc_save_sched_clock_state(void); 61extern void tsc_save_sched_clock_state(void);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 05110c1097ae..581386c7e429 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -75,7 +75,7 @@ apm-y := apm_32.o
75obj-$(CONFIG_APM) += apm.o 75obj-$(CONFIG_APM) += apm.o
76obj-$(CONFIG_SMP) += smp.o 76obj-$(CONFIG_SMP) += smp.o
77obj-$(CONFIG_SMP) += smpboot.o 77obj-$(CONFIG_SMP) += smpboot.o
78obj-$(CONFIG_SMP) += tsc_sync.o 78obj-$(CONFIG_X86_TSC) += tsc_sync.o
79obj-$(CONFIG_SMP) += setup_percpu.o 79obj-$(CONFIG_SMP) += setup_percpu.o
80obj-$(CONFIG_X86_MPPARSE) += mpparse.o 80obj-$(CONFIG_X86_MPPARSE) += mpparse.o
81obj-y += apic/ 81obj-y += apic/
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 43c36d8a6ae2..37363e46b1f0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -235,6 +235,7 @@ static inline void play_dead(void)
235 235
236void arch_cpu_idle_enter(void) 236void arch_cpu_idle_enter(void)
237{ 237{
238 tsc_verify_tsc_adjust(false);
238 local_touch_nmi(); 239 local_touch_nmi();
239} 240}
240 241
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 46b2f41f8b05..0aed75a1e31b 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -702,6 +702,20 @@ unsigned long native_calibrate_tsc(void)
702 } 702 }
703 } 703 }
704 704
705 /*
706 * TSC frequency determined by CPUID is a "hardware reported"
707 * frequency and is the most accurate one so far we have. This
708 * is considered a known frequency.
709 */
710 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
711
712 /*
713 * For Atom SoCs TSC is the only reliable clocksource.
714 * Mark TSC reliable so no watchdog on it.
715 */
716 if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
717 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
718
705 return crystal_khz * ebx_numerator / eax_denominator; 719 return crystal_khz * ebx_numerator / eax_denominator;
706} 720}
707 721
@@ -1043,18 +1057,20 @@ static void detect_art(void)
1043 if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF) 1057 if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
1044 return; 1058 return;
1045 1059
1046 cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator, 1060 /* Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required */
1047 &art_to_tsc_numerator, unused, unused+1);
1048
1049 /* Don't enable ART in a VM, non-stop TSC required */
1050 if (boot_cpu_has(X86_FEATURE_HYPERVISOR) || 1061 if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
1051 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) || 1062 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
1052 art_to_tsc_denominator < ART_MIN_DENOMINATOR) 1063 !boot_cpu_has(X86_FEATURE_TSC_ADJUST))
1053 return; 1064 return;
1054 1065
1055 if (rdmsrl_safe(MSR_IA32_TSC_ADJUST, &art_to_tsc_offset)) 1066 cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
1067 &art_to_tsc_numerator, unused, unused+1);
1068
1069 if (art_to_tsc_denominator < ART_MIN_DENOMINATOR)
1056 return; 1070 return;
1057 1071
1072 rdmsrl(MSR_IA32_TSC_ADJUST, art_to_tsc_offset);
1073
1058 /* Make this sticky over multiple CPU init calls */ 1074 /* Make this sticky over multiple CPU init calls */
1059 setup_force_cpu_cap(X86_FEATURE_ART); 1075 setup_force_cpu_cap(X86_FEATURE_ART);
1060} 1076}
@@ -1064,6 +1080,11 @@ static void detect_art(void)
1064 1080
1065static struct clocksource clocksource_tsc; 1081static struct clocksource clocksource_tsc;
1066 1082
1083static void tsc_resume(struct clocksource *cs)
1084{
1085 tsc_verify_tsc_adjust(true);
1086}
1087
1067/* 1088/*
1068 * We used to compare the TSC to the cycle_last value in the clocksource 1089 * We used to compare the TSC to the cycle_last value in the clocksource
1069 * structure to avoid a nasty time-warp. This can be observed in a 1090 * structure to avoid a nasty time-warp. This can be observed in a
@@ -1096,6 +1117,7 @@ static struct clocksource clocksource_tsc = {
1096 .flags = CLOCK_SOURCE_IS_CONTINUOUS | 1117 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
1097 CLOCK_SOURCE_MUST_VERIFY, 1118 CLOCK_SOURCE_MUST_VERIFY,
1098 .archdata = { .vclock_mode = VCLOCK_TSC }, 1119 .archdata = { .vclock_mode = VCLOCK_TSC },
1120 .resume = tsc_resume,
1099}; 1121};
1100 1122
1101void mark_tsc_unstable(char *reason) 1123void mark_tsc_unstable(char *reason)
@@ -1283,10 +1305,10 @@ static int __init init_tsc_clocksource(void)
1283 clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; 1305 clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
1284 1306
1285 /* 1307 /*
1286 * Trust the results of the earlier calibration on systems 1308 * When TSC frequency is known (retrieved via MSR or CPUID), we skip
1287 * exporting a reliable TSC. 1309 * the refined calibration and directly register it as a clocksource.
1288 */ 1310 */
1289 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { 1311 if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
1290 clocksource_register_khz(&clocksource_tsc, tsc_khz); 1312 clocksource_register_khz(&clocksource_tsc, tsc_khz);
1291 return 0; 1313 return 0;
1292 } 1314 }
@@ -1363,6 +1385,8 @@ void __init tsc_init(void)
1363 1385
1364 if (unsynchronized_tsc()) 1386 if (unsynchronized_tsc())
1365 mark_tsc_unstable("TSCs unsynchronized"); 1387 mark_tsc_unstable("TSCs unsynchronized");
1388 else
1389 tsc_store_and_check_tsc_adjust(true);
1366 1390
1367 check_system_tsc_reliable(); 1391 check_system_tsc_reliable();
1368 1392
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 0fe720d64fef..19afdbd7d0a7 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -100,5 +100,24 @@ unsigned long cpu_khz_from_msr(void)
100#ifdef CONFIG_X86_LOCAL_APIC 100#ifdef CONFIG_X86_LOCAL_APIC
101 lapic_timer_frequency = (freq * 1000) / HZ; 101 lapic_timer_frequency = (freq * 1000) / HZ;
102#endif 102#endif
103
104 /*
105 * TSC frequency determined by MSR is always considered "known"
106 * because it is reported by HW.
107 * Another fact is that on MSR capable platforms, PIT/HPET is
108 * generally not available so calibration won't work at all.
109 */
110 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
111
112 /*
113 * Unfortunately there is no way for hardware to tell whether the
114 * TSC is reliable. We were told by silicon design team that TSC
115 * on Atom SoCs are always "reliable". TSC is also the only
116 * reliable clocksource on these SoCs (HPET is either not present
117 * or not functional) so mark TSC reliable which removes the
118 * requirement for a watchdog clocksource.
119 */
120 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
121
103 return res; 122 return res;
104} 123}
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 78083bf23ed1..d0db011051a5 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -14,18 +14,166 @@
14 * ( The serial nature of the boot logic and the CPU hotplug lock 14 * ( The serial nature of the boot logic and the CPU hotplug lock
15 * protects against more than 2 CPUs entering this code. ) 15 * protects against more than 2 CPUs entering this code. )
16 */ 16 */
17#include <linux/topology.h>
17#include <linux/spinlock.h> 18#include <linux/spinlock.h>
18#include <linux/kernel.h> 19#include <linux/kernel.h>
19#include <linux/smp.h> 20#include <linux/smp.h>
20#include <linux/nmi.h> 21#include <linux/nmi.h>
21#include <asm/tsc.h> 22#include <asm/tsc.h>
22 23
24struct tsc_adjust {
25 s64 bootval;
26 s64 adjusted;
27 unsigned long nextcheck;
28 bool warned;
29};
30
31static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust);
32
33void tsc_verify_tsc_adjust(bool resume)
34{
35 struct tsc_adjust *adj = this_cpu_ptr(&tsc_adjust);
36 s64 curval;
37
38 if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
39 return;
40
41 /* Rate limit the MSR check */
42 if (!resume && time_before(jiffies, adj->nextcheck))
43 return;
44
45 adj->nextcheck = jiffies + HZ;
46
47 rdmsrl(MSR_IA32_TSC_ADJUST, curval);
48 if (adj->adjusted == curval)
49 return;
50
51 /* Restore the original value */
52 wrmsrl(MSR_IA32_TSC_ADJUST, adj->adjusted);
53
54 if (!adj->warned || resume) {
55 pr_warn(FW_BUG "TSC ADJUST differs: CPU%u %lld --> %lld. Restoring\n",
56 smp_processor_id(), adj->adjusted, curval);
57 adj->warned = true;
58 }
59}
60
61static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
62 unsigned int cpu, bool bootcpu)
63{
64 /*
65 * First online CPU in a package stores the boot value in the
66 * adjustment value. This value might change later via the sync
67 * mechanism. If that fails we still can yell about boot values not
68 * being consistent.
69 *
70 * On the boot cpu we just force set the ADJUST value to 0 if it's
71 * non zero. We don't do that on non boot cpus because physical
72 * hotplug should have set the ADJUST register to a value > 0 so
73 * the TSC is in sync with the already running cpus.
74 *
75 * But we always force positive ADJUST values. Otherwise the TSC
76 * deadline timer creates an interrupt storm. We also have to
77 * prevent values > 0x7FFFFFFF as those wreckage the timer as well.
78 */
79 if ((bootcpu && bootval != 0) || (!bootcpu && bootval < 0) ||
80 (bootval > 0x7FFFFFFF)) {
81 pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n", cpu,
82 bootval);
83 wrmsrl(MSR_IA32_TSC_ADJUST, 0);
84 bootval = 0;
85 }
86 cur->adjusted = bootval;
87}
88
89#ifndef CONFIG_SMP
90bool __init tsc_store_and_check_tsc_adjust(bool bootcpu)
91{
92 struct tsc_adjust *cur = this_cpu_ptr(&tsc_adjust);
93 s64 bootval;
94
95 if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
96 return false;
97
98 rdmsrl(MSR_IA32_TSC_ADJUST, bootval);
99 cur->bootval = bootval;
100 cur->nextcheck = jiffies + HZ;
101 tsc_sanitize_first_cpu(cur, bootval, smp_processor_id(), bootcpu);
102 return false;
103}
104
105#else /* !CONFIG_SMP */
106
107/*
108 * Store and check the TSC ADJUST MSR if available
109 */
110bool tsc_store_and_check_tsc_adjust(bool bootcpu)
111{
112 struct tsc_adjust *ref, *cur = this_cpu_ptr(&tsc_adjust);
113 unsigned int refcpu, cpu = smp_processor_id();
114 struct cpumask *mask;
115 s64 bootval;
116
117 if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
118 return false;
119
120 rdmsrl(MSR_IA32_TSC_ADJUST, bootval);
121 cur->bootval = bootval;
122 cur->nextcheck = jiffies + HZ;
123 cur->warned = false;
124
125 /*
126 * Check whether this CPU is the first in a package to come up. In
127 * this case do not check the boot value against another package
128 * because the new package might have been physically hotplugged,
129 * where TSC_ADJUST is expected to be different. When called on the
130 * boot CPU topology_core_cpumask() might not be available yet.
131 */
132 mask = topology_core_cpumask(cpu);
133 refcpu = mask ? cpumask_any_but(mask, cpu) : nr_cpu_ids;
134
135 if (refcpu >= nr_cpu_ids) {
136 tsc_sanitize_first_cpu(cur, bootval, smp_processor_id(),
137 bootcpu);
138 return false;
139 }
140
141 ref = per_cpu_ptr(&tsc_adjust, refcpu);
142 /*
143 * Compare the boot value and complain if it differs in the
144 * package.
145 */
146 if (bootval != ref->bootval) {
147 pr_warn(FW_BUG "TSC ADJUST differs: Reference CPU%u: %lld CPU%u: %lld\n",
148 refcpu, ref->bootval, cpu, bootval);
149 }
150 /*
151 * The TSC_ADJUST values in a package must be the same. If the boot
152 * value on this newly upcoming CPU differs from the adjustment
153 * value of the already online CPU in this package, set it to that
154 * adjusted value.
155 */
156 if (bootval != ref->adjusted) {
157 pr_warn("TSC ADJUST synchronize: Reference CPU%u: %lld CPU%u: %lld\n",
158 refcpu, ref->adjusted, cpu, bootval);
159 cur->adjusted = ref->adjusted;
160 wrmsrl(MSR_IA32_TSC_ADJUST, ref->adjusted);
161 }
162 /*
163 * We have the TSCs forced to be in sync on this package. Skip sync
164 * test:
165 */
166 return true;
167}
168
23/* 169/*
24 * Entry/exit counters that make sure that both CPUs 170 * Entry/exit counters that make sure that both CPUs
25 * run the measurement code at once: 171 * run the measurement code at once:
26 */ 172 */
27static atomic_t start_count; 173static atomic_t start_count;
28static atomic_t stop_count; 174static atomic_t stop_count;
175static atomic_t skip_test;
176static atomic_t test_runs;
29 177
30/* 178/*
31 * We use a raw spinlock in this exceptional case, because 179 * We use a raw spinlock in this exceptional case, because
@@ -37,15 +185,16 @@ static arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
37static cycles_t last_tsc; 185static cycles_t last_tsc;
38static cycles_t max_warp; 186static cycles_t max_warp;
39static int nr_warps; 187static int nr_warps;
188static int random_warps;
40 189
41/* 190/*
42 * TSC-warp measurement loop running on both CPUs. This is not called 191 * TSC-warp measurement loop running on both CPUs. This is not called
43 * if there is no TSC. 192 * if there is no TSC.
44 */ 193 */
45static void check_tsc_warp(unsigned int timeout) 194static cycles_t check_tsc_warp(unsigned int timeout)
46{ 195{
47 cycles_t start, now, prev, end; 196 cycles_t start, now, prev, end, cur_max_warp = 0;
48 int i; 197 int i, cur_warps = 0;
49 198
50 start = rdtsc_ordered(); 199 start = rdtsc_ordered();
51 /* 200 /*
@@ -85,13 +234,22 @@ static void check_tsc_warp(unsigned int timeout)
85 if (unlikely(prev > now)) { 234 if (unlikely(prev > now)) {
86 arch_spin_lock(&sync_lock); 235 arch_spin_lock(&sync_lock);
87 max_warp = max(max_warp, prev - now); 236 max_warp = max(max_warp, prev - now);
237 cur_max_warp = max_warp;
238 /*
239 * Check whether this bounces back and forth. Only
240 * one CPU should observe time going backwards.
241 */
242 if (cur_warps != nr_warps)
243 random_warps++;
88 nr_warps++; 244 nr_warps++;
245 cur_warps = nr_warps;
89 arch_spin_unlock(&sync_lock); 246 arch_spin_unlock(&sync_lock);
90 } 247 }
91 } 248 }
92 WARN(!(now-start), 249 WARN(!(now-start),
93 "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", 250 "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
94 now-start, end-start); 251 now-start, end-start);
252 return cur_max_warp;
95} 253}
96 254
97/* 255/*
@@ -136,15 +294,26 @@ void check_tsc_sync_source(int cpu)
136 } 294 }
137 295
138 /* 296 /*
139 * Reset it - in case this is a second bootup: 297 * Set the maximum number of test runs to
298 * 1 if the CPU does not provide the TSC_ADJUST MSR
299 * 3 if the MSR is available, so the target can try to adjust
140 */ 300 */
141 atomic_set(&stop_count, 0); 301 if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
142 302 atomic_set(&test_runs, 1);
303 else
304 atomic_set(&test_runs, 3);
305retry:
143 /* 306 /*
144 * Wait for the target to arrive: 307 * Wait for the target to start or to skip the test:
145 */ 308 */
146 while (atomic_read(&start_count) != cpus-1) 309 while (atomic_read(&start_count) != cpus - 1) {
310 if (atomic_read(&skip_test) > 0) {
311 atomic_set(&skip_test, 0);
312 return;
313 }
147 cpu_relax(); 314 cpu_relax();
315 }
316
148 /* 317 /*
149 * Trigger the target to continue into the measurement too: 318 * Trigger the target to continue into the measurement too:
150 */ 319 */
@@ -155,21 +324,35 @@ void check_tsc_sync_source(int cpu)
155 while (atomic_read(&stop_count) != cpus-1) 324 while (atomic_read(&stop_count) != cpus-1)
156 cpu_relax(); 325 cpu_relax();
157 326
158 if (nr_warps) { 327 /*
328 * If the test was successful set the number of runs to zero and
329 * stop. If not, decrement the number of runs an check if we can
330 * retry. In case of random warps no retry is attempted.
331 */
332 if (!nr_warps) {
333 atomic_set(&test_runs, 0);
334
335 pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
336 smp_processor_id(), cpu);
337
338 } else if (atomic_dec_and_test(&test_runs) || random_warps) {
339 /* Force it to 0 if random warps brought us here */
340 atomic_set(&test_runs, 0);
341
159 pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", 342 pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
160 smp_processor_id(), cpu); 343 smp_processor_id(), cpu);
161 pr_warning("Measured %Ld cycles TSC warp between CPUs, " 344 pr_warning("Measured %Ld cycles TSC warp between CPUs, "
162 "turning off TSC clock.\n", max_warp); 345 "turning off TSC clock.\n", max_warp);
346 if (random_warps)
347 pr_warning("TSC warped randomly between CPUs\n");
163 mark_tsc_unstable("check_tsc_sync_source failed"); 348 mark_tsc_unstable("check_tsc_sync_source failed");
164 } else {
165 pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
166 smp_processor_id(), cpu);
167 } 349 }
168 350
169 /* 351 /*
170 * Reset it - just in case we boot another CPU later: 352 * Reset it - just in case we boot another CPU later:
171 */ 353 */
172 atomic_set(&start_count, 0); 354 atomic_set(&start_count, 0);
355 random_warps = 0;
173 nr_warps = 0; 356 nr_warps = 0;
174 max_warp = 0; 357 max_warp = 0;
175 last_tsc = 0; 358 last_tsc = 0;
@@ -178,6 +361,12 @@ void check_tsc_sync_source(int cpu)
178 * Let the target continue with the bootup: 361 * Let the target continue with the bootup:
179 */ 362 */
180 atomic_inc(&stop_count); 363 atomic_inc(&stop_count);
364
365 /*
366 * Retry, if there is a chance to do so.
367 */
368 if (atomic_read(&test_runs) > 0)
369 goto retry;
181} 370}
182 371
183/* 372/*
@@ -185,6 +374,9 @@ void check_tsc_sync_source(int cpu)
185 */ 374 */
186void check_tsc_sync_target(void) 375void check_tsc_sync_target(void)
187{ 376{
377 struct tsc_adjust *cur = this_cpu_ptr(&tsc_adjust);
378 unsigned int cpu = smp_processor_id();
379 cycles_t cur_max_warp, gbl_max_warp;
188 int cpus = 2; 380 int cpus = 2;
189 381
190 /* Also aborts if there is no TSC. */ 382 /* Also aborts if there is no TSC. */
@@ -192,6 +384,16 @@ void check_tsc_sync_target(void)
192 return; 384 return;
193 385
194 /* 386 /*
387 * Store, verify and sanitize the TSC adjust register. If
388 * successful skip the test.
389 */
390 if (tsc_store_and_check_tsc_adjust(false)) {
391 atomic_inc(&skip_test);
392 return;
393 }
394
395retry:
396 /*
195 * Register this CPU's participation and wait for the 397 * Register this CPU's participation and wait for the
196 * source CPU to start the measurement: 398 * source CPU to start the measurement:
197 */ 399 */
@@ -199,7 +401,12 @@ void check_tsc_sync_target(void)
199 while (atomic_read(&start_count) != cpus) 401 while (atomic_read(&start_count) != cpus)
200 cpu_relax(); 402 cpu_relax();
201 403
202 check_tsc_warp(loop_timeout(smp_processor_id())); 404 cur_max_warp = check_tsc_warp(loop_timeout(cpu));
405
406 /*
407 * Store the maximum observed warp value for a potential retry:
408 */
409 gbl_max_warp = max_warp;
203 410
204 /* 411 /*
205 * Ok, we are done: 412 * Ok, we are done:
@@ -211,4 +418,61 @@ void check_tsc_sync_target(void)
211 */ 418 */
212 while (atomic_read(&stop_count) != cpus) 419 while (atomic_read(&stop_count) != cpus)
213 cpu_relax(); 420 cpu_relax();
421
422 /*
423 * Reset it for the next sync test:
424 */
425 atomic_set(&stop_count, 0);
426
427 /*
428 * Check the number of remaining test runs. If not zero, the test
429 * failed and a retry with adjusted TSC is possible. If zero the
430 * test was either successful or failed terminally.
431 */
432 if (!atomic_read(&test_runs))
433 return;
434
435 /*
436 * If the warp value of this CPU is 0, then the other CPU
437 * observed time going backwards so this TSC was ahead and
438 * needs to move backwards.
439 */
440 if (!cur_max_warp)
441 cur_max_warp = -gbl_max_warp;
442
443 /*
444 * Add the result to the previous adjustment value.
445 *
446 * The adjustement value is slightly off by the overhead of the
447 * sync mechanism (observed values are ~200 TSC cycles), but this
448 * really depends on CPU, node distance and frequency. So
449 * compensating for this is hard to get right. Experiments show
450 * that the warp is not longer detectable when the observed warp
451 * value is used. In the worst case the adjustment needs to go
452 * through a 3rd run for fine tuning.
453 */
454 cur->adjusted += cur_max_warp;
455
456 /*
457 * TSC deadline timer stops working or creates an interrupt storm
458 * with adjust values < 0 and > x07ffffff.
459 *
460 * To allow adjust values > 0x7FFFFFFF we need to disable the
461 * deadline timer and use the local APIC timer, but that requires
462 * more intrusive changes and we do not have any useful information
463 * from Intel about the underlying HW wreckage yet.
464 */
465 if (cur->adjusted < 0)
466 cur->adjusted = 0;
467 if (cur->adjusted > 0x7FFFFFFF)
468 cur->adjusted = 0x7FFFFFFF;
469
470 pr_warn("TSC ADJUST compensate: CPU%u observed %lld warp. Adjust: %lld\n",
471 cpu, cur_max_warp, cur->adjusted);
472
473 wrmsrl(MSR_IA32_TSC_ADJUST, cur->adjusted);
474 goto retry;
475
214} 476}
477
478#endif /* CONFIG_SMP */
diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c
index 1eb47b6298c2..e793fe509971 100644
--- a/arch/x86/platform/intel-mid/mfld.c
+++ b/arch/x86/platform/intel-mid/mfld.c
@@ -49,8 +49,13 @@ static unsigned long __init mfld_calibrate_tsc(void)
49 fast_calibrate = ratio * fsb; 49 fast_calibrate = ratio * fsb;
50 pr_debug("read penwell tsc %lu khz\n", fast_calibrate); 50 pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
51 lapic_timer_frequency = fsb * 1000 / HZ; 51 lapic_timer_frequency = fsb * 1000 / HZ;
52 /* mark tsc clocksource as reliable */ 52
53 set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); 53 /*
54 * TSC on Intel Atom SoCs is reliable and of known frequency.
55 * See tsc_msr.c for details.
56 */
57 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
58 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
54 59
55 return fast_calibrate; 60 return fast_calibrate;
56} 61}
diff --git a/arch/x86/platform/intel-mid/mrfld.c b/arch/x86/platform/intel-mid/mrfld.c
index 59253db41bbc..e0607c77a1bd 100644
--- a/arch/x86/platform/intel-mid/mrfld.c
+++ b/arch/x86/platform/intel-mid/mrfld.c
@@ -78,8 +78,12 @@ static unsigned long __init tangier_calibrate_tsc(void)
78 pr_debug("Setting lapic_timer_frequency = %d\n", 78 pr_debug("Setting lapic_timer_frequency = %d\n",
79 lapic_timer_frequency); 79 lapic_timer_frequency);
80 80
81 /* mark tsc clocksource as reliable */ 81 /*
82 set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); 82 * TSC on Intel Atom SoCs is reliable and of known frequency.
83 * See tsc_msr.c for details.
84 */
85 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
86 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
83 87
84 return fast_calibrate; 88 return fast_calibrate;
85} 89}
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 53cace2ec0e2..66ade16c7693 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -252,6 +252,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
252 fix_processor_context(); 252 fix_processor_context();
253 253
254 do_fpu_end(); 254 do_fpu_end();
255 tsc_verify_tsc_adjust(true);
255 x86_platform.restore_sched_clock_state(); 256 x86_platform.restore_sched_clock_state();
256 mtrr_bp_restore(); 257 mtrr_bp_restore();
257 perf_restore_debug_store(); 258 perf_restore_debug_store();