aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/tsc.c
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2008-09-02 18:54:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-09-02 23:35:56 -0400
commitfbb16e243887332dd5754e48ffe5b963378f3cd2 (patch)
tree774bf58f1d23db0c3f167ff30fb6cdeb674d6980 /arch/x86/kernel/tsc.c
parent8b76f46a2db29407fed66cf4aca19d61b3dcb3e1 (diff)
[x86] Fix TSC calibration issues
Larry Finger reported at http://lkml.org/lkml/2008/9/1/90: An ancient laptop of mine started throwing errors from b43legacy when I started using 2.6.27 on it. This has been bisected to commit bfc0f59 "x86: merge tsc calibration". The unification of the TSC code adopted mostly the 64bit code, which prefers PMTIMER/HPET over the PIT calibration. Larrys system has an AMD K6 CPU. Such systems are known to have PMTIMER incarnations which run at double speed. This results in a miscalibration of the TSC by factor 0.5. So the resulting calibrated CPU/TSC speed is half of the real CPU speed, which means that the TSC based delay loop will run half the time it should run. That might explain why the b43legacy driver went berserk. On the other hand we know about systems, where the PIT based calibration results in random crap due to heavy SMI/SMM disturbance. On those systems the PMTIMER/HPET based calibration logic with SMI detection shows better results. According to Alok also virtualized systems suffer from the PIT calibration method. The solution is to use a more wreckage aware aproach than the current either/or decision. 1) reimplement the retry loop which was dropped from the 32bit code during the merge. It repeats the calibration and selects the lowest frequency value as this is probably the closest estimate to the real frequency 2) Monitor the delta of the TSC values in the delay loop which waits for the PIT counter to reach zero. If the maximum value is significantly different from the minimum, then we have a pretty safe indicator that the loop was disturbed by an SMI. 3) keep the pmtimer/hpet reference as a backup solution for systems where the SMI disturbance is a permanent point of failure for PIT based calibration 4) do the loop iteration for both methods, record the lowest value and decide after all iterations finished. 5) Set a clear preference to PIT based calibration when the result makes sense. The implementation does the reference calibration based on HPET/PMTIMER around the delay, which is necessary for the PIT anyway, but keeps separate TSC values to ensure the "independency" of the resulting calibration values. Tested on various 32bit/64bit machines including Geode 266Mhz, AMD K6 (affected machine with a double speed pmtimer which I grabbed out of the dump), Pentium class machines and AMD/Intel 64 bit boxen. Bisected-by: Larry Finger <Larry.Finger@lwfinger.net> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Larry Finger <Larry.Finger@lwfinger.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/x86/kernel/tsc.c')
-rw-r--r--arch/x86/kernel/tsc.c235
1 files changed, 181 insertions, 54 deletions
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 8e786b0d665a..ac79bd143da8 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -127,75 +127,202 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet)
127 */ 127 */
128unsigned long native_calibrate_tsc(void) 128unsigned long native_calibrate_tsc(void)
129{ 129{
130 unsigned long flags; 130 u64 tsc1, tsc2, tr1, tr2, tsc, delta, pm1, pm2, hpet1, hpet2;
131 u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; 131 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
132 int hpet = is_hpet_enabled(); 132 unsigned long flags, tscmin, tscmax;
133 unsigned int tsc_khz_val = 0; 133 int hpet = is_hpet_enabled(), pitcnt, i;
134 134
135 local_irq_save(flags); 135 /*
136 136 * Run 5 calibration loops to get the lowest frequency value
137 tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); 137 * (the best estimate). We use two different calibration modes
138 138 * here:
139 outb((inb(0x61) & ~0x02) | 0x01, 0x61); 139 *
140 140 * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and
141 outb(0xb0, 0x43); 141 * load a timeout of 50ms. We read the time right after we
142 outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); 142 * started the timer and wait until the PIT count down reaches
143 outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); 143 * zero. In each wait loop iteration we read the TSC and check
144 tr1 = get_cycles(); 144 * the delta to the previous read. We keep track of the min
145 while ((inb(0x61) & 0x20) == 0); 145 * and max values of that delta. The delta is mostly defined
146 tr2 = get_cycles(); 146 * by the IO time of the PIT access, so we can detect when a
147 147 * SMI/SMM disturbance happend between the two reads. If the
148 tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); 148 * maximum time is significantly larger than the minimum time,
149 149 * then we discard the result and have another try.
150 local_irq_restore(flags); 150 *
151 * 2) Reference counter. If available we use the HPET or the
152 * PMTIMER as a reference to check the sanity of that value.
153 * We use separate TSC readouts and check inside of the
154 * reference read for a SMI/SMM disturbance. We dicard
155 * disturbed values here as well. We do that around the PIT
156 * calibration delay loop as we have to wait for a certain
157 * amount of time anyway.
158 */
159 for (i = 0; i < 5; i++) {
160
161 tscmin = ULONG_MAX;
162 tscmax = 0;
163 pitcnt = 0;
164
165 local_irq_save(flags);
166
167 /*
168 * Read the start value and the reference count of
169 * hpet/pmtimer when available:
170 */
171 tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
172
173 /* Set the Gate high, disable speaker */
174 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
175
176 /*
177 * Setup CTC channel 2* for mode 0, (interrupt on terminal
178 * count mode), binary count. Set the latch register to 50ms
179 * (LSB then MSB) to begin countdown.
180 *
181 * Some devices need a delay here.
182 */
183 outb(0xb0, 0x43);
184 outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
185 outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
186
187 tsc = tr1 = tr2 = get_cycles();
188
189 while ((inb(0x61) & 0x20) == 0) {
190 tr2 = get_cycles();
191 delta = tr2 - tsc;
192 tsc = tr2;
193 if ((unsigned int) delta < tscmin)
194 tscmin = (unsigned int) delta;
195 if ((unsigned int) delta > tscmax)
196 tscmax = (unsigned int) delta;
197 pitcnt++;
198 }
199
200 /*
201 * We waited at least 50ms above. Now read
202 * pmtimer/hpet reference again
203 */
204 tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
205
206 local_irq_restore(flags);
207
208 /*
209 * Sanity checks:
210 *
211 * If we were not able to read the PIT more than 5000
212 * times, then we have been hit by a massive SMI
213 *
214 * If the maximum is 10 times larger than the minimum,
215 * then we got hit by an SMI as well.
216 */
217 if (pitcnt > 5000 && tscmax < 10 * tscmin) {
218
219 /* Calculate the PIT value */
220 delta = tr2 - tr1;
221 do_div(delta, 50);
222
223 /* We take the smallest value into account */
224 tsc_pit_min = min(tsc_pit_min, (unsigned long) delta);
225 }
226
227 /* hpet or pmtimer available ? */
228 if (!hpet && !pm1 && !pm2)
229 continue;
230
231 /* Check, whether the sampling was disturbed by an SMI */
232 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
233 continue;
234
235 tsc2 = (tsc2 - tsc1) * 1000000LL;
236
237 if (hpet) {
238 if (hpet2 < hpet1)
239 hpet2 += 0x100000000ULL;
240 hpet2 -= hpet1;
241 tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
242 do_div(tsc1, 1000000);
243 } else {
244 if (pm2 < pm1)
245 pm2 += (u64)ACPI_PM_OVRRUN;
246 pm2 -= pm1;
247 tsc1 = pm2 * 1000000000LL;
248 do_div(tsc1, PMTMR_TICKS_PER_SEC);
249 }
250
251 do_div(tsc2, tsc1);
252 tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
253 }
151 254
152 /* 255 /*
153 * Preset the result with the raw and inaccurate PIT 256 * Now check the results.
154 * calibration value
155 */ 257 */
156 delta = (tr2 - tr1); 258 if (tsc_pit_min == ULONG_MAX) {
157 do_div(delta, 50); 259 /* PIT gave no useful value */
158 tsc_khz_val = delta; 260 printk(KERN_WARNING "TSC: PIT calibration failed due to "
261 "SMI disturbance.\n");
262
263 /* We don't have an alternative source, disable TSC */
264 if (!hpet && !pm1 && !pm2) {
265 printk("TSC: No reference (HPET/PMTIMER) available\n");
266 return 0;
267 }
268
269 /* The alternative source failed as well, disable TSC */
270 if (tsc_ref_min == ULONG_MAX) {
271 printk(KERN_WARNING "TSC: HPET/PMTIMER calibration "
272 "failed due to SMI disturbance.\n");
273 return 0;
274 }
275
276 /* Use the alternative source */
277 printk(KERN_INFO "TSC: using %s reference calibration\n",
278 hpet ? "HPET" : "PMTIMER");
279
280 return tsc_ref_min;
281 }
159 282
160 /* hpet or pmtimer available ? */ 283 /* We don't have an alternative source, use the PIT calibration value */
161 if (!hpet && !pm1 && !pm2) { 284 if (!hpet && !pm1 && !pm2) {
162 printk(KERN_INFO "TSC calibrated against PIT\n"); 285 printk(KERN_INFO "TSC: Using PIT calibration value\n");
163 goto out; 286 return tsc_pit_min;
164 } 287 }
165 288
166 /* Check, whether the sampling was disturbed by an SMI */ 289 /* The alternative source failed, use the PIT calibration value */
167 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) { 290 if (tsc_ref_min == ULONG_MAX) {
168 printk(KERN_WARNING "TSC calibration disturbed by SMI, " 291 printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due "
169 "using PIT calibration result\n"); 292 "to SMI disturbance. Using PIT calibration\n");
170 goto out; 293 return tsc_pit_min;
171 } 294 }
172 295
173 tsc2 = (tsc2 - tsc1) * 1000000LL; 296 /* Check the reference deviation */
174 297 delta = ((u64) tsc_pit_min) * 100;
175 if (hpet) { 298 do_div(delta, tsc_ref_min);
176 printk(KERN_INFO "TSC calibrated against HPET\n"); 299
177 if (hpet2 < hpet1) 300 /*
178 hpet2 += 0x100000000ULL; 301 * If both calibration results are inside a 5% window, the we
179 hpet2 -= hpet1; 302 * use the lower frequency of those as it is probably the
180 tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); 303 * closest estimate.
181 do_div(tsc1, 1000000); 304 */
182 } else { 305 if (delta >= 95 && delta <= 105) {
183 printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); 306 printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n",
184 if (pm2 < pm1) 307 hpet ? "HPET" : "PMTIMER");
185 pm2 += (u64)ACPI_PM_OVRRUN; 308 printk(KERN_INFO "TSC: using %s calibration value\n",
186 pm2 -= pm1; 309 tsc_pit_min <= tsc_ref_min ? "PIT" :
187 tsc1 = pm2 * 1000000000LL; 310 hpet ? "HPET" : "PMTIMER");
188 do_div(tsc1, PMTMR_TICKS_PER_SEC); 311 return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min;
189 } 312 }
190 313
191 do_div(tsc2, tsc1); 314 printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
192 tsc_khz_val = tsc2; 315 hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
193 316
194out: 317 /*
195 return tsc_khz_val; 318 * The calibration values differ too much. In doubt, we use
319 * the PIT value as we know that there are PMTIMERs around
320 * running at double speed.
321 */
322 printk(KERN_INFO "TSC: Using PIT calibration value\n");
323 return tsc_pit_min;
196} 324}
197 325
198
199#ifdef CONFIG_X86_32 326#ifdef CONFIG_X86_32
200/* Only called from the Powernow K7 cpu freq driver */ 327/* Only called from the Powernow K7 cpu freq driver */
201int recalibrate_cpu_khz(void) 328int recalibrate_cpu_khz(void)