aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2007-02-16 04:28:06 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-16 11:13:59 -0500
commitd36b49b91065dbfa305c5a66010b3497c741eee0 (patch)
tree2c1873a72da1b18900c5bf9984de1a33ee2ca38c /arch/i386/kernel
parente9e2cdb412412326c4827fc78ba27f410d837e6e (diff)
[PATCH] i386 rework local apic timer calibration
The local apic timer calibration has two problem cases: 1. The calibration is based on readout of the PIT/HPET timer to detect the wrap of the periodic tick. It happens that a box gets stuck in the calibration loop due to a PIT with a broken readout function. 2. CoreDuo boxen show a sporadic PIT runs too slow defect, which results in a wrong lapic calibration. The PIT goes back to normal operation once the lapic timer is switched to periodic mode. Both are existing and unfixed problems in the current upstream kernel and prevent certain laptops and other systems from booting Linux. Rework the code to address both problems: - Make the calibration interrupt driven. This removes the wait_timer_tick magic hackery from lapic.c and time_hpet.c. The clockevents framework allows easy substitution of the global tick event handler for the calibration. This is more accurate than monitoring jiffies. At this point of the boot process, nothing disturbes the interrupt delivery, so the results are very accurate. - Verify the calibration against the PM timer, when available by using the early access function. When the measured calibration period is outside of an one percent window, then the lapic timer calibration is adjusted to the pm timer result. - Verify the calibration by running the lapic timer with the calibration handler. Disable lapic timer in case of deviation. This also removes the "synchronization" of the local apic timer to the global tick. This synchronization never worked, as there is no way to synchronize PIT(HPET) and local APIC timer. The synchronization by waiting for the tick just alignes the local APIC timer for the first events, but later the events drift away due to the different clocks. Removing the "sync" is just randomizing the asynchronous behaviour at setup time. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu> Cc: Zachary Amsden <zach@vmware.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Rohit Seth <rohitseth@google.com> Cc: Andi Kleen <ak@suse.de> Cc: john stultz <johnstul@us.ibm.com> Cc: Roman Zippel <zippel@linux-m68k.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/apic.c365
1 files changed, 215 insertions, 150 deletions
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index e98b5c750bdf..9655c233e6f1 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -26,6 +26,7 @@
26#include <linux/sysdev.h> 26#include <linux/sysdev.h>
27#include <linux/cpu.h> 27#include <linux/cpu.h>
28#include <linux/clockchips.h> 28#include <linux/clockchips.h>
29#include <linux/acpi_pmtmr.h>
29#include <linux/module.h> 30#include <linux/module.h>
30 31
31#include <asm/atomic.h> 32#include <asm/atomic.h>
@@ -59,8 +60,8 @@
59 */ 60 */
60static int enable_local_apic __initdata = 0; 61static int enable_local_apic __initdata = 0;
61 62
62/* Enable local APIC timer for highres/dyntick on UP */ 63/* Local APIC timer verification ok */
63static int enable_local_apic_timer __initdata = 0; 64static int local_apic_timer_verify_ok;
64 65
65/* 66/*
66 * Debug level, exported for io_apic.c 67 * Debug level, exported for io_apic.c
@@ -82,7 +83,7 @@ static void apic_pm_activate(void);
82static struct clock_event_device lapic_clockevent = { 83static struct clock_event_device lapic_clockevent = {
83 .name = "lapic", 84 .name = "lapic",
84 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT 85 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
85 | CLOCK_EVT_FEAT_C3STOP, 86 | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
86 .shift = 32, 87 .shift = 32,
87 .set_mode = lapic_timer_setup, 88 .set_mode = lapic_timer_setup,
88 .set_next_event = lapic_next_event, 89 .set_next_event = lapic_next_event,
@@ -159,64 +160,8 @@ int lapic_get_maxlvt(void)
159 * Local APIC timer 160 * Local APIC timer
160 */ 161 */
161 162
162/* 163/* Clock divisor is set to 16 */
163 * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts 164#define APIC_DIVISOR 16
164 * per second. We assume that the caller has already set up the local
165 * APIC.
166 *
167 * The APIC timer is not exactly sync with the external timer chip, it
168 * closely follows bus clocks.
169 */
170
171/*
172 * FIXME: Move this to i8253.h. There is no need to keep the access to
173 * the PIT scattered all around the place -tglx
174 */
175
176/*
177 * The timer chip is already set up at HZ interrupts per second here,
178 * but we do not accept timer interrupts yet. We only allow the BP
179 * to calibrate.
180 */
181static unsigned int __devinit get_8254_timer_count(void)
182{
183 unsigned long flags;
184
185 unsigned int count;
186
187 spin_lock_irqsave(&i8253_lock, flags);
188
189 outb_p(0x00, PIT_MODE);
190 count = inb_p(PIT_CH0);
191 count |= inb_p(PIT_CH0) << 8;
192
193 spin_unlock_irqrestore(&i8253_lock, flags);
194
195 return count;
196}
197
198/* next tick in 8254 can be caught by catching timer wraparound */
199static void __devinit wait_8254_wraparound(void)
200{
201 unsigned int curr_count, prev_count;
202
203 curr_count = get_8254_timer_count();
204 do {
205 prev_count = curr_count;
206 curr_count = get_8254_timer_count();
207
208 /* workaround for broken Mercury/Neptune */
209 if (prev_count >= curr_count + 0x100)
210 curr_count = get_8254_timer_count();
211
212 } while (prev_count >= curr_count);
213}
214
215/*
216 * Default initialization for 8254 timers. If we use other timers like HPET,
217 * we override this later
218 */
219void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound;
220 165
221/* 166/*
222 * This function sets up the local APIC timer, with a timeout of 167 * This function sets up the local APIC timer, with a timeout of
@@ -228,9 +173,6 @@ void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound;
228 * We do reads before writes even if unnecessary, to get around the 173 * We do reads before writes even if unnecessary, to get around the
229 * P5 APIC double write bug. 174 * P5 APIC double write bug.
230 */ 175 */
231
232#define APIC_DIVISOR 16
233
234static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) 176static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
235{ 177{
236 unsigned int lvtt_value, tmp_value; 178 unsigned int lvtt_value, tmp_value;
@@ -277,6 +219,10 @@ static void lapic_timer_setup(enum clock_event_mode mode,
277 unsigned long flags; 219 unsigned long flags;
278 unsigned int v; 220 unsigned int v;
279 221
222 /* Lapic used for broadcast ? */
223 if (!local_apic_timer_verify_ok)
224 return;
225
280 local_irq_save(flags); 226 local_irq_save(flags);
281 227
282 switch (mode) { 228 switch (mode) {
@@ -321,111 +267,238 @@ static void __devinit setup_APIC_timer(void)
321} 267}
322 268
323/* 269/*
324 * In this function we calibrate APIC bus clocks to the external 270 * In this functions we calibrate APIC bus clocks to the external timer.
325 * timer. Unfortunately we cannot use jiffies and the timer irq 271 *
326 * to calibrate, since some later bootup code depends on getting 272 * We want to do the calibration only once since we want to have local timer
327 * the first irq? Ugh. 273 * irqs syncron. CPUs connected by the same APIC bus have the very same bus
274 * frequency.
328 * 275 *
329 * TODO: Fix this rather than saying "Ugh" -tglx 276 * This was previously done by reading the PIT/HPET and waiting for a wrap
277 * around to find out, that a tick has elapsed. I have a box, where the PIT
278 * readout is broken, so it never gets out of the wait loop again. This was
279 * also reported by others.
330 * 280 *
331 * We want to do the calibration only once since we 281 * Monitoring the jiffies value is inaccurate and the clockevents
332 * want to have local timer irqs syncron. CPUs connected 282 * infrastructure allows us to do a simple substitution of the interrupt
333 * by the same APIC bus have the very same bus frequency. 283 * handler.
334 * And we want to have irqs off anyways, no accidental 284 *
335 * APIC irq that way. 285 * The calibration routine also uses the pm_timer when possible, as the PIT
286 * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
287 * back to normal later in the boot process).
336 */ 288 */
337 289
338static int __init calibrate_APIC_clock(void) 290#define LAPIC_CAL_LOOPS (HZ/10)
291
292static __initdata volatile int lapic_cal_loops = -1;
293static __initdata long lapic_cal_t1, lapic_cal_t2;
294static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
295static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
296static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
297
298/*
299 * Temporary interrupt handler.
300 */
301static void __init lapic_cal_handler(struct clock_event_device *dev)
302{
303 unsigned long long tsc = 0;
304 long tapic = apic_read(APIC_TMCCT);
305 unsigned long pm = acpi_pm_read_early();
306
307 if (cpu_has_tsc)
308 rdtscll(tsc);
309
310 switch (lapic_cal_loops++) {
311 case 0:
312 lapic_cal_t1 = tapic;
313 lapic_cal_tsc1 = tsc;
314 lapic_cal_pm1 = pm;
315 lapic_cal_j1 = jiffies;
316 break;
317
318 case LAPIC_CAL_LOOPS:
319 lapic_cal_t2 = tapic;
320 lapic_cal_tsc2 = tsc;
321 if (pm < lapic_cal_pm1)
322 pm += ACPI_PM_OVRRUN;
323 lapic_cal_pm2 = pm;
324 lapic_cal_j2 = jiffies;
325 break;
326 }
327}
328
329/*
330 * Setup the boot APIC
331 *
332 * Calibrate and verify the result.
333 */
334void __init setup_boot_APIC_clock(void)
339{ 335{
340 unsigned long long t1 = 0, t2 = 0; 336 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
341 long tt1, tt2; 337 const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
342 long result; 338 const long pm_thresh = pm_100ms/100;
343 int i; 339 void (*real_handler)(struct clock_event_device *dev);
344 const int LOOPS = HZ/10; 340 unsigned long deltaj;
341 long delta, deltapm;
342
343 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
344 "calibrating APIC timer ...\n");
345 345
346 apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n"); 346 local_irq_disable();
347
348 /* Replace the global interrupt handler */
349 real_handler = global_clock_event->event_handler;
350 global_clock_event->event_handler = lapic_cal_handler;
347 351
348 /* 352 /*
349 * Put whatever arbitrary (but long enough) timeout 353 * Setup the APIC counter to 1e9. There is no way the lapic
350 * value into the APIC clock, we just want to get the 354 * can underflow in the 100ms detection time frame
351 * counter running for calibration.
352 */ 355 */
353 __setup_APIC_LVTT(1000000000, 0, 0); 356 __setup_APIC_LVTT(1000000000, 0, 0);
354 357
355 /* 358 /* Let the interrupts run */
356 * The timer chip counts down to zero. Let's wait 359 local_irq_enable();
357 * for a wraparound to start exact measurement:
358 * (the current tick might have been already half done)
359 */
360 360
361 wait_timer_tick(); 361 while(lapic_cal_loops <= LAPIC_CAL_LOOPS);
362 362
363 /* 363 local_irq_disable();
364 * We wrapped around just now. Let's start:
365 */
366 if (cpu_has_tsc)
367 rdtscll(t1);
368 tt1 = apic_read(APIC_TMCCT);
369 364
370 /* 365 /* Restore the real event handler */
371 * Let's wait LOOPS wraprounds: 366 global_clock_event->event_handler = real_handler;
372 */
373 for (i = 0; i < LOOPS; i++)
374 wait_timer_tick();
375 367
376 tt2 = apic_read(APIC_TMCCT); 368 /* Build delta t1-t2 as apic timer counts down */
377 if (cpu_has_tsc) 369 delta = lapic_cal_t1 - lapic_cal_t2;
378 rdtscll(t2); 370 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
379 371
380 /* 372 /* Check, if the PM timer is available */
381 * The APIC bus clock counter is 32 bits only, it 373 deltapm = lapic_cal_pm2 - lapic_cal_pm1;
382 * might have overflown, but note that we use signed 374 apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
383 * longs, thus no extra care needed.
384 *
385 * underflown to be exact, as the timer counts down ;)
386 */
387 375
388 result = (tt1-tt2)*APIC_DIVISOR/LOOPS; 376 if (deltapm) {
377 unsigned long mult;
378 u64 res;
379
380 mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
381
382 if (deltapm > (pm_100ms - pm_thresh) &&
383 deltapm < (pm_100ms + pm_thresh)) {
384 apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
385 } else {
386 res = (((u64) deltapm) * mult) >> 22;
387 do_div(res, 1000000);
388 printk(KERN_WARNING "APIC calibration not consistent "
389 "with PM Timer: %ldms instead of 100ms\n",
390 (long)res);
391 /* Correct the lapic counter value */
392 res = (((u64) delta ) * pm_100ms);
393 do_div(res, deltapm);
394 printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
395 "%lu (%ld)\n", (unsigned long) res, delta);
396 delta = (long) res;
397 }
398 }
389 399
390 /* Calculate the scaled math multiplication factor */ 400 /* Calculate the scaled math multiplication factor */
391 lapic_clockevent.mult = div_sc(tt1-tt2, TICK_NSEC * LOOPS, 32); 401 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32);
392 lapic_clockevent.max_delta_ns = 402 lapic_clockevent.max_delta_ns =
393 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); 403 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
394 lapic_clockevent.min_delta_ns = 404 lapic_clockevent.min_delta_ns =
395 clockevent_delta2ns(0xF, &lapic_clockevent); 405 clockevent_delta2ns(0xF, &lapic_clockevent);
396 406
397 apic_printk(APIC_VERBOSE, "..... tt1-tt2 %ld\n", tt1 - tt2); 407 calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
408
409 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
398 apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); 410 apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult);
399 apic_printk(APIC_VERBOSE, "..... calibration result: %ld\n", result); 411 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
412 calibration_result);
400 413
401 if (cpu_has_tsc) 414 if (cpu_has_tsc) {
415 delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
402 apic_printk(APIC_VERBOSE, "..... CPU clock speed is " 416 apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
403 "%ld.%04ld MHz.\n", 417 "%ld.%04ld MHz.\n",
404 ((long)(t2-t1)/LOOPS)/(1000000/HZ), 418 (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ),
405 ((long)(t2-t1)/LOOPS)%(1000000/HZ)); 419 (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ));
420 }
406 421
407 apic_printk(APIC_VERBOSE, "..... host bus clock speed is " 422 apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
408 "%ld.%04ld MHz.\n", 423 "%u.%04u MHz.\n",
409 result/(1000000/HZ), 424 calibration_result / (1000000 / HZ),
410 result%(1000000/HZ)); 425 calibration_result % (1000000 / HZ));
411
412 return result;
413}
414 426
415void __init setup_boot_APIC_clock(void)
416{
417 unsigned long flags;
418 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
419 427
420 local_irq_save(flags); 428 apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
421 429
422 calibration_result = calibrate_APIC_clock();
423 /* 430 /*
424 * Now set up the timer for real. 431 * Setup the apic timer manually
425 */ 432 */
426 setup_APIC_timer(); 433 local_apic_timer_verify_ok = 1;
434 levt->event_handler = lapic_cal_handler;
435 lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
436 lapic_cal_loops = -1;
437
438 /* Let the interrupts run */
439 local_irq_enable();
440
441 while(lapic_cal_loops <= LAPIC_CAL_LOOPS);
442
443 local_irq_disable();
444
445 /* Stop the lapic timer */
446 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
447
448 local_irq_enable();
449
450 /* Jiffies delta */
451 deltaj = lapic_cal_j2 - lapic_cal_j1;
452 apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
453
454 /* Check, if the PM timer is available */
455 deltapm = lapic_cal_pm2 - lapic_cal_pm1;
456 apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
457
458 local_apic_timer_verify_ok = 0;
459
460 if (deltapm) {
461 if (deltapm > (pm_100ms - pm_thresh) &&
462 deltapm < (pm_100ms + pm_thresh)) {
463 apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
464 /* Check, if the jiffies result is consistent */
465 if (deltaj < LAPIC_CAL_LOOPS-2 ||
466 deltaj > LAPIC_CAL_LOOPS+2) {
467 /*
468 * Not sure, what we can do about this one.
469 * When high resultion timers are active
470 * and the lapic timer does not stop in C3
471 * we are fine. Otherwise more trouble might
472 * be waiting. -- tglx
473 */
474 printk(KERN_WARNING "Global event device %s "
475 "has wrong frequency "
476 "(%lu ticks instead of %d)\n",
477 global_clock_event->name, deltaj,
478 LAPIC_CAL_LOOPS);
479 }
480 local_apic_timer_verify_ok = 1;
481 }
482 } else {
483 /* Check, if the jiffies result is consistent */
484 if (deltaj >= LAPIC_CAL_LOOPS-2 &&
485 deltaj <= LAPIC_CAL_LOOPS+2) {
486 apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
487 local_apic_timer_verify_ok = 1;
488 }
489 }
427 490
428 local_irq_restore(flags); 491 if (!local_apic_timer_verify_ok) {
492 printk(KERN_WARNING
493 "APIC timer disabled due to verification failure.\n");
494 /* No broadcast on UP ! */
495 if (num_possible_cpus() == 1)
496 return;
497 } else
498 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
499
500 /* Setup the lapic or request the broadcast */
501 setup_APIC_timer();
429} 502}
430 503
431void __devinit setup_secondary_APIC_clock(void) 504void __devinit setup_secondary_APIC_clock(void)
@@ -442,16 +515,15 @@ static void local_apic_timer_interrupt(void)
442 struct clock_event_device *evt = &per_cpu(lapic_events, cpu); 515 struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
443 516
444 /* 517 /*
445 * Normally we should not be here till LAPIC has been 518 * Normally we should not be here till LAPIC has been initialized but
446 * initialized but in some cases like kdump, its possible that 519 * in some cases like kdump, its possible that there is a pending LAPIC
447 * there is a pending LAPIC timer interrupt from previous 520 * timer interrupt from previous kernel's context and is delivered in
448 * kernel's context and is delivered in new kernel the moment 521 * new kernel the moment interrupts are enabled.
449 * interrupts are enabled.
450 * 522 *
451 * Interrupts are enabled early and LAPIC is setup much later, 523 * Interrupts are enabled early and LAPIC is setup much later, hence
452 * hence its possible that when we get here evt->event_handler 524 * its possible that when we get here evt->event_handler is NULL.
453 * is NULL. Check for event_handler being NULL and discard 525 * Check for event_handler being NULL and discard the interrupt as
454 * the interrupt as spurious. 526 * spurious.
455 */ 527 */
456 if (!evt->event_handler) { 528 if (!evt->event_handler) {
457 printk(KERN_WARNING 529 printk(KERN_WARNING
@@ -1126,13 +1198,6 @@ static int __init parse_nolapic(char *arg)
1126} 1198}
1127early_param("nolapic", parse_nolapic); 1199early_param("nolapic", parse_nolapic);
1128 1200
1129static int __init apic_enable_lapic_timer(char *str)
1130{
1131 enable_local_apic_timer = 1;
1132 return 0;
1133}
1134early_param("lapictimer", apic_enable_lapic_timer);
1135
1136static int __init apic_set_verbosity(char *str) 1201static int __init apic_set_verbosity(char *str)
1137{ 1202{
1138 if (strcmp("debug", str) == 0) 1203 if (strcmp("debug", str) == 0)