diff options
Diffstat (limited to 'arch/x86/kernel/tsc.c')
-rw-r--r-- | arch/x86/kernel/tsc.c | 424 |
1 files changed, 369 insertions, 55 deletions
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7603c0553909..161bb850fc47 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); | |||
104 | /* | 104 | /* |
105 | * Read TSC and the reference counters. Take care of SMI disturbance | 105 | * Read TSC and the reference counters. Take care of SMI disturbance |
106 | */ | 106 | */ |
107 | static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) | 107 | static u64 tsc_read_refs(u64 *p, int hpet) |
108 | { | 108 | { |
109 | u64 t1, t2; | 109 | u64 t1, t2; |
110 | int i; | 110 | int i; |
@@ -112,9 +112,9 @@ static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) | |||
112 | for (i = 0; i < MAX_RETRIES; i++) { | 112 | for (i = 0; i < MAX_RETRIES; i++) { |
113 | t1 = get_cycles(); | 113 | t1 = get_cycles(); |
114 | if (hpet) | 114 | if (hpet) |
115 | *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; | 115 | *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; |
116 | else | 116 | else |
117 | *pm = acpi_pm_read_early(); | 117 | *p = acpi_pm_read_early(); |
118 | t2 = get_cycles(); | 118 | t2 = get_cycles(); |
119 | if ((t2 - t1) < SMI_TRESHOLD) | 119 | if ((t2 - t1) < SMI_TRESHOLD) |
120 | return t2; | 120 | return t2; |
@@ -122,80 +122,390 @@ static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) | |||
122 | return ULLONG_MAX; | 122 | return ULLONG_MAX; |
123 | } | 123 | } |
124 | 124 | ||
125 | /** | 125 | /* |
126 | * native_calibrate_tsc - calibrate the tsc on boot | 126 | * Calculate the TSC frequency from HPET reference |
127 | */ | 127 | */ |
128 | unsigned long native_calibrate_tsc(void) | 128 | static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) |
129 | { | 129 | { |
130 | unsigned long flags; | 130 | u64 tmp; |
131 | u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; | ||
132 | int hpet = is_hpet_enabled(); | ||
133 | unsigned int tsc_khz_val = 0; | ||
134 | 131 | ||
135 | local_irq_save(flags); | 132 | if (hpet2 < hpet1) |
133 | hpet2 += 0x100000000ULL; | ||
134 | hpet2 -= hpet1; | ||
135 | tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); | ||
136 | do_div(tmp, 1000000); | ||
137 | do_div(deltatsc, tmp); | ||
138 | |||
139 | return (unsigned long) deltatsc; | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Calculate the TSC frequency from PMTimer reference | ||
144 | */ | ||
145 | static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) | ||
146 | { | ||
147 | u64 tmp; | ||
136 | 148 | ||
137 | tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); | 149 | if (!pm1 && !pm2) |
150 | return ULONG_MAX; | ||
151 | |||
152 | if (pm2 < pm1) | ||
153 | pm2 += (u64)ACPI_PM_OVRRUN; | ||
154 | pm2 -= pm1; | ||
155 | tmp = pm2 * 1000000000LL; | ||
156 | do_div(tmp, PMTMR_TICKS_PER_SEC); | ||
157 | do_div(deltatsc, tmp); | ||
158 | |||
159 | return (unsigned long) deltatsc; | ||
160 | } | ||
161 | |||
162 | #define CAL_MS 10 | ||
163 | #define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) | ||
164 | #define CAL_PIT_LOOPS 1000 | ||
165 | |||
166 | #define CAL2_MS 50 | ||
167 | #define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS)) | ||
168 | #define CAL2_PIT_LOOPS 5000 | ||
169 | |||
170 | |||
171 | /* | ||
172 | * Try to calibrate the TSC against the Programmable | ||
173 | * Interrupt Timer and return the frequency of the TSC | ||
174 | * in kHz. | ||
175 | * | ||
176 | * Return ULONG_MAX on failure to calibrate. | ||
177 | */ | ||
178 | static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) | ||
179 | { | ||
180 | u64 tsc, t1, t2, delta; | ||
181 | unsigned long tscmin, tscmax; | ||
182 | int pitcnt; | ||
138 | 183 | ||
184 | /* Set the Gate high, disable speaker */ | ||
139 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); | 185 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); |
140 | 186 | ||
187 | /* | ||
188 | * Setup CTC channel 2* for mode 0, (interrupt on terminal | ||
189 | * count mode), binary count. Set the latch register to 50ms | ||
190 | * (LSB then MSB) to begin countdown. | ||
191 | */ | ||
141 | outb(0xb0, 0x43); | 192 | outb(0xb0, 0x43); |
142 | outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); | 193 | outb(latch & 0xff, 0x42); |
143 | outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); | 194 | outb(latch >> 8, 0x42); |
144 | tr1 = get_cycles(); | 195 | |
145 | while ((inb(0x61) & 0x20) == 0); | 196 | tsc = t1 = t2 = get_cycles(); |
146 | tr2 = get_cycles(); | 197 | |
198 | pitcnt = 0; | ||
199 | tscmax = 0; | ||
200 | tscmin = ULONG_MAX; | ||
201 | while ((inb(0x61) & 0x20) == 0) { | ||
202 | t2 = get_cycles(); | ||
203 | delta = t2 - tsc; | ||
204 | tsc = t2; | ||
205 | if ((unsigned long) delta < tscmin) | ||
206 | tscmin = (unsigned int) delta; | ||
207 | if ((unsigned long) delta > tscmax) | ||
208 | tscmax = (unsigned int) delta; | ||
209 | pitcnt++; | ||
210 | } | ||
211 | |||
212 | /* | ||
213 | * Sanity checks: | ||
214 | * | ||
215 | * If we were not able to read the PIT more than loopmin | ||
216 | * times, then we have been hit by a massive SMI | ||
217 | * | ||
218 | * If the maximum is 10 times larger than the minimum, | ||
219 | * then we got hit by an SMI as well. | ||
220 | */ | ||
221 | if (pitcnt < loopmin || tscmax > 10 * tscmin) | ||
222 | return ULONG_MAX; | ||
223 | |||
224 | /* Calculate the PIT value */ | ||
225 | delta = t2 - t1; | ||
226 | do_div(delta, ms); | ||
227 | return delta; | ||
228 | } | ||
147 | 229 | ||
148 | tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); | 230 | /* |
231 | * This reads the current MSB of the PIT counter, and | ||
232 | * checks if we are running on sufficiently fast and | ||
233 | * non-virtualized hardware. | ||
234 | * | ||
235 | * Our expectations are: | ||
236 | * | ||
237 | * - the PIT is running at roughly 1.19MHz | ||
238 | * | ||
239 | * - each IO is going to take about 1us on real hardware, | ||
240 | * but we allow it to be much faster (by a factor of 10) or | ||
241 | * _slightly_ slower (ie we allow up to a 2us read+counter | ||
242 | * update - anything else implies a unacceptably slow CPU | ||
243 | * or PIT for the fast calibration to work. | ||
244 | * | ||
245 | * - with 256 PIT ticks to read the value, we have 214us to | ||
246 | * see the same MSB (and overhead like doing a single TSC | ||
247 | * read per MSB value etc). | ||
248 | * | ||
249 | * - We're doing 2 reads per loop (LSB, MSB), and we expect | ||
250 | * them each to take about a microsecond on real hardware. | ||
251 | * So we expect a count value of around 100. But we'll be | ||
252 | * generous, and accept anything over 50. | ||
253 | * | ||
254 | * - if the PIT is stuck, and we see *many* more reads, we | ||
255 | * return early (and the next caller of pit_expect_msb() | ||
256 | * then consider it a failure when they don't see the | ||
257 | * next expected value). | ||
258 | * | ||
259 | * These expectations mean that we know that we have seen the | ||
260 | * transition from one expected value to another with a fairly | ||
261 | * high accuracy, and we didn't miss any events. We can thus | ||
262 | * use the TSC value at the transitions to calculate a pretty | ||
263 | * good value for the TSC frequencty. | ||
264 | */ | ||
265 | static inline int pit_expect_msb(unsigned char val) | ||
266 | { | ||
267 | int count = 0; | ||
149 | 268 | ||
269 | for (count = 0; count < 50000; count++) { | ||
270 | /* Ignore LSB */ | ||
271 | inb(0x42); | ||
272 | if (inb(0x42) != val) | ||
273 | break; | ||
274 | } | ||
275 | return count > 50; | ||
276 | } | ||
277 | |||
278 | /* | ||
279 | * How many MSB values do we want to see? We aim for a | ||
280 | * 15ms calibration, which assuming a 2us counter read | ||
281 | * error should give us roughly 150 ppm precision for | ||
282 | * the calibration. | ||
283 | */ | ||
284 | #define QUICK_PIT_MS 15 | ||
285 | #define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) | ||
286 | |||
287 | static unsigned long quick_pit_calibrate(void) | ||
288 | { | ||
289 | /* Set the Gate high, disable speaker */ | ||
290 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); | ||
291 | |||
292 | /* | ||
293 | * Counter 2, mode 0 (one-shot), binary count | ||
294 | * | ||
295 | * NOTE! Mode 2 decrements by two (and then the | ||
296 | * output is flipped each time, giving the same | ||
297 | * final output frequency as a decrement-by-one), | ||
298 | * so mode 0 is much better when looking at the | ||
299 | * individual counts. | ||
300 | */ | ||
301 | outb(0xb0, 0x43); | ||
302 | |||
303 | /* Start at 0xffff */ | ||
304 | outb(0xff, 0x42); | ||
305 | outb(0xff, 0x42); | ||
306 | |||
307 | if (pit_expect_msb(0xff)) { | ||
308 | int i; | ||
309 | u64 t1, t2, delta; | ||
310 | unsigned char expect = 0xfe; | ||
311 | |||
312 | t1 = get_cycles(); | ||
313 | for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { | ||
314 | if (!pit_expect_msb(expect)) | ||
315 | goto failed; | ||
316 | } | ||
317 | t2 = get_cycles(); | ||
318 | |||
319 | /* | ||
320 | * Make sure we can rely on the second TSC timestamp: | ||
321 | */ | ||
322 | if (!pit_expect_msb(expect)) | ||
323 | goto failed; | ||
324 | |||
325 | /* | ||
326 | * Ok, if we get here, then we've seen the | ||
327 | * MSB of the PIT decrement QUICK_PIT_ITERATIONS | ||
328 | * times, and each MSB had many hits, so we never | ||
329 | * had any sudden jumps. | ||
330 | * | ||
331 | * As a result, we can depend on there not being | ||
332 | * any odd delays anywhere, and the TSC reads are | ||
333 | * reliable. | ||
334 | * | ||
335 | * kHz = ticks / time-in-seconds / 1000; | ||
336 | * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000 | ||
337 | * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000) | ||
338 | */ | ||
339 | delta = (t2 - t1)*PIT_TICK_RATE; | ||
340 | do_div(delta, QUICK_PIT_ITERATIONS*256*1000); | ||
341 | printk("Fast TSC calibration using PIT\n"); | ||
342 | return delta; | ||
343 | } | ||
344 | failed: | ||
345 | return 0; | ||
346 | } | ||
347 | |||
348 | /** | ||
349 | * native_calibrate_tsc - calibrate the tsc on boot | ||
350 | */ | ||
351 | unsigned long native_calibrate_tsc(void) | ||
352 | { | ||
353 | u64 tsc1, tsc2, delta, ref1, ref2; | ||
354 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; | ||
355 | unsigned long flags, latch, ms, fast_calibrate; | ||
356 | int hpet = is_hpet_enabled(), i, loopmin; | ||
357 | |||
358 | local_irq_save(flags); | ||
359 | fast_calibrate = quick_pit_calibrate(); | ||
150 | local_irq_restore(flags); | 360 | local_irq_restore(flags); |
361 | if (fast_calibrate) | ||
362 | return fast_calibrate; | ||
151 | 363 | ||
152 | /* | 364 | /* |
153 | * Preset the result with the raw and inaccurate PIT | 365 | * Run 5 calibration loops to get the lowest frequency value |
154 | * calibration value | 366 | * (the best estimate). We use two different calibration modes |
367 | * here: | ||
368 | * | ||
369 | * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and | ||
370 | * load a timeout of 50ms. We read the time right after we | ||
371 | * started the timer and wait until the PIT count down reaches | ||
372 | * zero. In each wait loop iteration we read the TSC and check | ||
373 | * the delta to the previous read. We keep track of the min | ||
374 | * and max values of that delta. The delta is mostly defined | ||
375 | * by the IO time of the PIT access, so we can detect when a | ||
376 | * SMI/SMM disturbance happend between the two reads. If the | ||
377 | * maximum time is significantly larger than the minimum time, | ||
378 | * then we discard the result and have another try. | ||
379 | * | ||
380 | * 2) Reference counter. If available we use the HPET or the | ||
381 | * PMTIMER as a reference to check the sanity of that value. | ||
382 | * We use separate TSC readouts and check inside of the | ||
383 | * reference read for a SMI/SMM disturbance. We dicard | ||
384 | * disturbed values here as well. We do that around the PIT | ||
385 | * calibration delay loop as we have to wait for a certain | ||
386 | * amount of time anyway. | ||
155 | */ | 387 | */ |
156 | delta = (tr2 - tr1); | 388 | |
157 | do_div(delta, 50); | 389 | /* Preset PIT loop values */ |
158 | tsc_khz_val = delta; | 390 | latch = CAL_LATCH; |
159 | 391 | ms = CAL_MS; | |
160 | /* hpet or pmtimer available ? */ | 392 | loopmin = CAL_PIT_LOOPS; |
161 | if (!hpet && !pm1 && !pm2) { | 393 | |
162 | printk(KERN_INFO "TSC calibrated against PIT\n"); | 394 | for (i = 0; i < 3; i++) { |
163 | goto out; | 395 | unsigned long tsc_pit_khz; |
396 | |||
397 | /* | ||
398 | * Read the start value and the reference count of | ||
399 | * hpet/pmtimer when available. Then do the PIT | ||
400 | * calibration, which will take at least 50ms, and | ||
401 | * read the end value. | ||
402 | */ | ||
403 | local_irq_save(flags); | ||
404 | tsc1 = tsc_read_refs(&ref1, hpet); | ||
405 | tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); | ||
406 | tsc2 = tsc_read_refs(&ref2, hpet); | ||
407 | local_irq_restore(flags); | ||
408 | |||
409 | /* Pick the lowest PIT TSC calibration so far */ | ||
410 | tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); | ||
411 | |||
412 | /* hpet or pmtimer available ? */ | ||
413 | if (!hpet && !ref1 && !ref2) | ||
414 | continue; | ||
415 | |||
416 | /* Check, whether the sampling was disturbed by an SMI */ | ||
417 | if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) | ||
418 | continue; | ||
419 | |||
420 | tsc2 = (tsc2 - tsc1) * 1000000LL; | ||
421 | if (hpet) | ||
422 | tsc2 = calc_hpet_ref(tsc2, ref1, ref2); | ||
423 | else | ||
424 | tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); | ||
425 | |||
426 | tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); | ||
427 | |||
428 | /* Check the reference deviation */ | ||
429 | delta = ((u64) tsc_pit_min) * 100; | ||
430 | do_div(delta, tsc_ref_min); | ||
431 | |||
432 | /* | ||
433 | * If both calibration results are inside a 10% window | ||
434 | * then we can be sure, that the calibration | ||
435 | * succeeded. We break out of the loop right away. We | ||
436 | * use the reference value, as it is more precise. | ||
437 | */ | ||
438 | if (delta >= 90 && delta <= 110) { | ||
439 | printk(KERN_INFO | ||
440 | "TSC: PIT calibration matches %s. %d loops\n", | ||
441 | hpet ? "HPET" : "PMTIMER", i + 1); | ||
442 | return tsc_ref_min; | ||
443 | } | ||
444 | |||
445 | /* | ||
446 | * Check whether PIT failed more than once. This | ||
447 | * happens in virtualized environments. We need to | ||
448 | * give the virtual PC a slightly longer timeframe for | ||
449 | * the HPET/PMTIMER to make the result precise. | ||
450 | */ | ||
451 | if (i == 1 && tsc_pit_min == ULONG_MAX) { | ||
452 | latch = CAL2_LATCH; | ||
453 | ms = CAL2_MS; | ||
454 | loopmin = CAL2_PIT_LOOPS; | ||
455 | } | ||
164 | } | 456 | } |
165 | 457 | ||
166 | /* Check, whether the sampling was disturbed by an SMI */ | 458 | /* |
167 | if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) { | 459 | * Now check the results. |
168 | printk(KERN_WARNING "TSC calibration disturbed by SMI, " | 460 | */ |
169 | "using PIT calibration result\n"); | 461 | if (tsc_pit_min == ULONG_MAX) { |
170 | goto out; | 462 | /* PIT gave no useful value */ |
463 | printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); | ||
464 | |||
465 | /* We don't have an alternative source, disable TSC */ | ||
466 | if (!hpet && !ref1 && !ref2) { | ||
467 | printk("TSC: No reference (HPET/PMTIMER) available\n"); | ||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | /* The alternative source failed as well, disable TSC */ | ||
472 | if (tsc_ref_min == ULONG_MAX) { | ||
473 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " | ||
474 | "failed.\n"); | ||
475 | return 0; | ||
476 | } | ||
477 | |||
478 | /* Use the alternative source */ | ||
479 | printk(KERN_INFO "TSC: using %s reference calibration\n", | ||
480 | hpet ? "HPET" : "PMTIMER"); | ||
481 | |||
482 | return tsc_ref_min; | ||
171 | } | 483 | } |
172 | 484 | ||
173 | tsc2 = (tsc2 - tsc1) * 1000000LL; | 485 | /* We don't have an alternative source, use the PIT calibration value */ |
174 | 486 | if (!hpet && !ref1 && !ref2) { | |
175 | if (hpet) { | 487 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); |
176 | printk(KERN_INFO "TSC calibrated against HPET\n"); | 488 | return tsc_pit_min; |
177 | if (hpet2 < hpet1) | ||
178 | hpet2 += 0x100000000ULL; | ||
179 | hpet2 -= hpet1; | ||
180 | tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); | ||
181 | do_div(tsc1, 1000000); | ||
182 | } else { | ||
183 | printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); | ||
184 | if (pm2 < pm1) | ||
185 | pm2 += (u64)ACPI_PM_OVRRUN; | ||
186 | pm2 -= pm1; | ||
187 | tsc1 = pm2 * 1000000000LL; | ||
188 | do_div(tsc1, PMTMR_TICKS_PER_SEC); | ||
189 | } | 489 | } |
190 | 490 | ||
191 | do_div(tsc2, tsc1); | 491 | /* The alternative source failed, use the PIT calibration value */ |
192 | tsc_khz_val = tsc2; | 492 | if (tsc_ref_min == ULONG_MAX) { |
493 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " | ||
494 | "Using PIT calibration\n"); | ||
495 | return tsc_pit_min; | ||
496 | } | ||
193 | 497 | ||
194 | out: | 498 | /* |
195 | return tsc_khz_val; | 499 | * The calibration values differ too much. In doubt, we use |
500 | * the PIT value as we know that there are PMTIMERs around | ||
501 | * running at double speed. At least we let the user know: | ||
502 | */ | ||
503 | printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", | ||
504 | hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); | ||
505 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); | ||
506 | return tsc_pit_min; | ||
196 | } | 507 | } |
197 | 508 | ||
198 | |||
199 | #ifdef CONFIG_X86_32 | 509 | #ifdef CONFIG_X86_32 |
200 | /* Only called from the Powernow K7 cpu freq driver */ | 510 | /* Only called from the Powernow K7 cpu freq driver */ |
201 | int recalibrate_cpu_khz(void) | 511 | int recalibrate_cpu_khz(void) |
@@ -314,7 +624,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
314 | mark_tsc_unstable("cpufreq changes"); | 624 | mark_tsc_unstable("cpufreq changes"); |
315 | } | 625 | } |
316 | 626 | ||
317 | set_cyc2ns_scale(tsc_khz_ref, freq->cpu); | 627 | set_cyc2ns_scale(tsc_khz, freq->cpu); |
318 | 628 | ||
319 | return 0; | 629 | return 0; |
320 | } | 630 | } |
@@ -325,6 +635,10 @@ static struct notifier_block time_cpufreq_notifier_block = { | |||
325 | 635 | ||
326 | static int __init cpufreq_tsc(void) | 636 | static int __init cpufreq_tsc(void) |
327 | { | 637 | { |
638 | if (!cpu_has_tsc) | ||
639 | return 0; | ||
640 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
641 | return 0; | ||
328 | cpufreq_register_notifier(&time_cpufreq_notifier_block, | 642 | cpufreq_register_notifier(&time_cpufreq_notifier_block, |
329 | CPUFREQ_TRANSITION_NOTIFIER); | 643 | CPUFREQ_TRANSITION_NOTIFIER); |
330 | return 0; | 644 | return 0; |