aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/tsc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-09-04 13:41:22 -0400
committerIngo Molnar <mingo@elte.hu>2008-09-04 16:54:50 -0400
commit6ac40ed0413ef4096720f966e11c7cdf259eee3f (patch)
tree48d1a1ce1145b19529c42c6168b4e6804eb0933a /arch/x86/kernel/tsc.c
parenta977c400957451f3bd92b9ed6022f5fe8a6cbbf5 (diff)
x86: quick TSC calibration
Introduce a fast TSC-calibration method on sane hardware. It only uses 17920 PIT timer ticks to calibrate the TSC, plus 256 ticks on each side to make sure the TSC values were very close to the tick, so the whole calibration takes 15ms. Yet, despite only takign 15ms, we can actually give pretty stringent guarantees of accuracy: - the code requires that we hit each 256-counter block at least 50 times, so the TSC error is basically at *MOST* just a few PIT cycles off in any direction. In practice, it's going to be about one microseconds off (which is how long it takes to read the counter) - so over 17920 PIT cycles, we can pretty much guarantee that the calibration error is less than one half of a percent. My testing bears this out: on my machine, the quick-calibration reports 2934.085kHz, while the slow one reports 2933.415. Yes, the slower calibration is still more precise. For me, the slow calibration is stable to within about one hundreth of a percent, so it's (at a guess) roughly an order-and-a-half of magnitude more precise. The longer you wait, the more precise you can be. However, the nice thing about the fast TSC PIT synchronization is that it's pretty much _guaranteed_ to give that 0.5% precision, and fail gracefully (and very quickly) if it doesn't get it. And it really is fairly simple (even if there's a lot of _details_ there, and I didn't get all of those right ont he first try or even the second ;) The patch says "110 insertions", but 63 of those new lines are actually comments. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/kernel/tsc.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 110 insertions(+), 1 deletions(-)
Diffstat (limited to 'arch/x86/kernel/tsc.c')
-rw-r--r--arch/x86/kernel/tsc.c119
1 files changed, 118 insertions, 1 deletions
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index da033b5b3e19..839070ba8465 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -227,6 +227,117 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
227 return delta; 227 return delta;
228} 228}
229 229
230/*
231 * This reads the current MSB of the PIT counter, and
232 * checks if we are running on sufficiently fast and
233 * non-virtualized hardware.
234 *
235 * Our expectations are:
236 *
237 * - the PIT is running at roughly 1.19MHz
238 *
239 * - each IO is going to take about 1us on real hardware,
240 * but we allow it to be much faster (by a factor of 10) or
241 * _slightly_ slower (ie we allow up to a 2us read+counter
242 * update - anything else implies a unacceptably slow CPU
243 * or PIT for the fast calibration to work.
244 *
245 * - with 256 PIT ticks to read the value, we have 214us to
246 * see the same MSB (and overhead like doing a single TSC
247 * read per MSB value etc).
248 *
249 * - We're doing 2 reads per loop (LSB, MSB), and we expect
250 * them each to take about a microsecond on real hardware.
251 * So we expect a count value of around 100. But we'll be
252 * generous, and accept anything over 50.
253 *
254 * - if the PIT is stuck, and we see *many* more reads, we
255 * return early (and the next caller of pit_expect_msb()
256 * then consider it a failure when they don't see the
257 * next expected value).
258 *
259 * These expectations mean that we know that we have seen the
260 * transition from one expected value to another with a fairly
261 * high accuracy, and we didn't miss any events. We can thus
262 * use the TSC value at the transitions to calculate a pretty
263 * good value for the TSC frequencty.
264 */
265static inline int pit_expect_msb(unsigned char val)
266{
267 int count = 0;
268
269 for (count = 0; count < 50000; count++) {
270 /* Ignore LSB */
271 inb(0x42);
272 if (inb(0x42) != val)
273 break;
274 }
275 return count > 50;
276}
277
278/*
279 * How many MSB values do we want to see? We aim for a
280 * 15ms calibration, which assuming a 2us counter read
281 * error should give us roughly 150 ppm precision for
282 * the calibration.
283 */
284#define QUICK_PIT_MS 15
285#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
286
287static unsigned long quick_pit_calibrate(void)
288{
289 /* Set the Gate high, disable speaker */
290 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
291
292 /*
293 * Counter 2, mode 0 (one-shot), binary count
294 *
295 * NOTE! Mode 2 decrements by two (and then the
296 * output is flipped each time, giving the same
297 * final output frequency as a decrement-by-one),
298 * so mode 0 is much better when looking at the
299 * individual counts.
300 */
301 outb(0xb0, 0x43);
302
303 /* Start at 0xffff */
304 outb(0xff, 0x42);
305 outb(0xff, 0x42);
306
307 if (pit_expect_msb(0xff)) {
308 int i;
309 u64 t1, t2, delta;
310 unsigned char expect = 0xfe;
311
312 t1 = get_cycles();
313 for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) {
314 if (!pit_expect_msb(expect))
315 goto failed;
316 }
317 t2 = get_cycles();
318
319 /*
320 * Ok, if we get here, then we've seen the
321 * MSB of the PIT decrement QUICK_PIT_ITERATIONS
322 * times, and each MSB had many hits, so we never
323 * had any sudden jumps.
324 *
325 * As a result, we can depend on there not being
326 * any odd delays anywhere, and the TSC reads are
327 * reliable.
328 *
329 * kHz = ticks / time-in-seconds / 1000;
330 * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000
331 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000)
332 */
333 delta = (t2 - t1)*PIT_TICK_RATE;
334 do_div(delta, QUICK_PIT_ITERATIONS*256*1000);
335 printk("Fast TSC calibration using PIT\n");
336 return delta;
337 }
338failed:
339 return 0;
340}
230 341
231/** 342/**
232 * native_calibrate_tsc - calibrate the tsc on boot 343 * native_calibrate_tsc - calibrate the tsc on boot
@@ -235,9 +346,15 @@ unsigned long native_calibrate_tsc(void)
235{ 346{
236 u64 tsc1, tsc2, delta, ref1, ref2; 347 u64 tsc1, tsc2, delta, ref1, ref2;
237 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; 348 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
238 unsigned long flags, latch, ms; 349 unsigned long flags, latch, ms, fast_calibrate;
239 int hpet = is_hpet_enabled(), i, loopmin; 350 int hpet = is_hpet_enabled(), i, loopmin;
240 351
352 local_irq_save(flags);
353 fast_calibrate = quick_pit_calibrate();
354 local_irq_restore(flags);
355 if (fast_calibrate)
356 return fast_calibrate;
357
241 /* 358 /*
242 * Run 5 calibration loops to get the lowest frequency value 359 * Run 5 calibration loops to get the lowest frequency value
243 * (the best estimate). We use two different calibration modes 360 * (the best estimate). We use two different calibration modes