aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel/timers/timer_tsc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/i386/kernel/timers/timer_tsc.c
Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/i386/kernel/timers/timer_tsc.c')
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c560
1 files changed, 560 insertions, 0 deletions
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
new file mode 100644
index 00000000000..a685994e5c8
--- /dev/null
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -0,0 +1,560 @@
1/*
2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
4 *
5 * 2004-06-25 Jesper Juhl
6 * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
7 * failing to inline.
8 */
9
10#include <linux/spinlock.h>
11#include <linux/init.h>
12#include <linux/timex.h>
13#include <linux/errno.h>
14#include <linux/cpufreq.h>
15#include <linux/string.h>
16#include <linux/jiffies.h>
17
18#include <asm/timer.h>
19#include <asm/io.h>
20/* processor.h for distable_tsc flag */
21#include <asm/processor.h>
22
23#include "io_ports.h"
24#include "mach_timer.h"
25
26#include <asm/hpet.h>
27
28#ifdef CONFIG_HPET_TIMER
29static unsigned long hpet_usec_quotient;
30static unsigned long hpet_last;
31static struct timer_opts timer_tsc;
32#endif
33
34static inline void cpufreq_delayed_get(void);
35
36int tsc_disable __initdata = 0;
37
38extern spinlock_t i8253_lock;
39
40static int use_tsc;
41/* Number of usecs that the last interrupt was delayed */
42static int delay_at_last_interrupt;
43
44static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
45static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
46static unsigned long long monotonic_base;
47static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
48
49/* convert from cycles(64bits) => nanoseconds (64bits)
50 * basic equation:
51 * ns = cycles / (freq / ns_per_sec)
52 * ns = cycles * (ns_per_sec / freq)
53 * ns = cycles * (10^9 / (cpu_mhz * 10^6))
54 * ns = cycles * (10^3 / cpu_mhz)
55 *
56 * Then we use scaling math (suggested by george@mvista.com) to get:
57 * ns = cycles * (10^3 * SC / cpu_mhz) / SC
58 * ns = cycles * cyc2ns_scale / SC
59 *
60 * And since SC is a constant power of two, we can convert the div
61 * into a shift.
62 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
63 */
64static unsigned long cyc2ns_scale;
65#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
66
67static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
68{
69 cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
70}
71
72static inline unsigned long long cycles_2_ns(unsigned long long cyc)
73{
74 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
75}
76
77static int count2; /* counter for mark_offset_tsc() */
78
79/* Cached *multiplier* to convert TSC counts to microseconds.
80 * (see the equation below).
81 * Equal to 2^32 * (1 / (clocks per usec) ).
82 * Initialized in time_init.
83 */
84static unsigned long fast_gettimeoffset_quotient;
85
86static unsigned long get_offset_tsc(void)
87{
88 register unsigned long eax, edx;
89
90 /* Read the Time Stamp Counter */
91
92 rdtsc(eax,edx);
93
94 /* .. relative to previous jiffy (32 bits is enough) */
95 eax -= last_tsc_low; /* tsc_low delta */
96
97 /*
98 * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
99 * = (tsc_low delta) * (usecs_per_clock)
100 * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
101 *
102 * Using a mull instead of a divl saves up to 31 clock cycles
103 * in the critical path.
104 */
105
106 __asm__("mull %2"
107 :"=a" (eax), "=d" (edx)
108 :"rm" (fast_gettimeoffset_quotient),
109 "0" (eax));
110
111 /* our adjusted time offset in microseconds */
112 return delay_at_last_interrupt + edx;
113}
114
115static unsigned long long monotonic_clock_tsc(void)
116{
117 unsigned long long last_offset, this_offset, base;
118 unsigned seq;
119
120 /* atomically read monotonic base & last_offset */
121 do {
122 seq = read_seqbegin(&monotonic_lock);
123 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
124 base = monotonic_base;
125 } while (read_seqretry(&monotonic_lock, seq));
126
127 /* Read the Time Stamp Counter */
128 rdtscll(this_offset);
129
130 /* return the value in ns */
131 return base + cycles_2_ns(this_offset - last_offset);
132}
133
134/*
135 * Scheduler clock - returns current time in nanosec units.
136 */
137unsigned long long sched_clock(void)
138{
139 unsigned long long this_offset;
140
141 /*
142 * In the NUMA case we dont use the TSC as they are not
143 * synchronized across all CPUs.
144 */
145#ifndef CONFIG_NUMA
146 if (!use_tsc)
147#endif
148 /* no locking but a rare wrong value is not a big deal */
149 return jiffies_64 * (1000000000 / HZ);
150
151 /* Read the Time Stamp Counter */
152 rdtscll(this_offset);
153
154 /* return the value in ns */
155 return cycles_2_ns(this_offset);
156}
157
158static void delay_tsc(unsigned long loops)
159{
160 unsigned long bclock, now;
161
162 rdtscl(bclock);
163 do
164 {
165 rep_nop();
166 rdtscl(now);
167 } while ((now-bclock) < loops);
168}
169
170#ifdef CONFIG_HPET_TIMER
171static void mark_offset_tsc_hpet(void)
172{
173 unsigned long long this_offset, last_offset;
174 unsigned long offset, temp, hpet_current;
175
176 write_seqlock(&monotonic_lock);
177 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
178 /*
179 * It is important that these two operations happen almost at
180 * the same time. We do the RDTSC stuff first, since it's
181 * faster. To avoid any inconsistencies, we need interrupts
182 * disabled locally.
183 */
184 /*
185 * Interrupts are just disabled locally since the timer irq
186 * has the SA_INTERRUPT flag set. -arca
187 */
188 /* read Pentium cycle counter */
189
190 hpet_current = hpet_readl(HPET_COUNTER);
191 rdtsc(last_tsc_low, last_tsc_high);
192
193 /* lost tick compensation */
194 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
195 if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
196 int lost_ticks = (offset - hpet_last) / hpet_tick;
197 jiffies_64 += lost_ticks;
198 }
199 hpet_last = hpet_current;
200
201 /* update the monotonic base value */
202 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
203 monotonic_base += cycles_2_ns(this_offset - last_offset);
204 write_sequnlock(&monotonic_lock);
205
206 /* calculate delay_at_last_interrupt */
207 /*
208 * Time offset = (hpet delta) * ( usecs per HPET clock )
209 * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
210 * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
211 * Where,
212 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
213 */
214 delay_at_last_interrupt = hpet_current - offset;
215 ASM_MUL64_REG(temp, delay_at_last_interrupt,
216 hpet_usec_quotient, delay_at_last_interrupt);
217}
218#endif
219
220
221#ifdef CONFIG_CPU_FREQ
222#include <linux/workqueue.h>
223
224static unsigned int cpufreq_delayed_issched = 0;
225static unsigned int cpufreq_init = 0;
226static struct work_struct cpufreq_delayed_get_work;
227
228static void handle_cpufreq_delayed_get(void *v)
229{
230 unsigned int cpu;
231 for_each_online_cpu(cpu) {
232 cpufreq_get(cpu);
233 }
234 cpufreq_delayed_issched = 0;
235}
236
237/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
238 * to verify the CPU frequency the timing core thinks the CPU is running
239 * at is still correct.
240 */
241static inline void cpufreq_delayed_get(void)
242{
243 if (cpufreq_init && !cpufreq_delayed_issched) {
244 cpufreq_delayed_issched = 1;
245 printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
246 schedule_work(&cpufreq_delayed_get_work);
247 }
248}
249
250/* If the CPU frequency is scaled, TSC-based delays will need a different
251 * loops_per_jiffy value to function properly.
252 */
253
254static unsigned int ref_freq = 0;
255static unsigned long loops_per_jiffy_ref = 0;
256
257#ifndef CONFIG_SMP
258static unsigned long fast_gettimeoffset_ref = 0;
259static unsigned long cpu_khz_ref = 0;
260#endif
261
262static int
263time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
264 void *data)
265{
266 struct cpufreq_freqs *freq = data;
267
268 if (val != CPUFREQ_RESUMECHANGE)
269 write_seqlock_irq(&xtime_lock);
270 if (!ref_freq) {
271 ref_freq = freq->old;
272 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
273#ifndef CONFIG_SMP
274 fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
275 cpu_khz_ref = cpu_khz;
276#endif
277 }
278
279 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
280 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
281 (val == CPUFREQ_RESUMECHANGE)) {
282 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
283 cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
284#ifndef CONFIG_SMP
285 if (cpu_khz)
286 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
287 if (use_tsc) {
288 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
289 fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
290 set_cyc2ns_scale(cpu_khz/1000);
291 }
292 }
293#endif
294 }
295
296 if (val != CPUFREQ_RESUMECHANGE)
297 write_sequnlock_irq(&xtime_lock);
298
299 return 0;
300}
301
302static struct notifier_block time_cpufreq_notifier_block = {
303 .notifier_call = time_cpufreq_notifier
304};
305
306
307static int __init cpufreq_tsc(void)
308{
309 int ret;
310 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
311 ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
312 CPUFREQ_TRANSITION_NOTIFIER);
313 if (!ret)
314 cpufreq_init = 1;
315 return ret;
316}
317core_initcall(cpufreq_tsc);
318
319#else /* CONFIG_CPU_FREQ */
320static inline void cpufreq_delayed_get(void) { return; }
321#endif
322
323static void mark_offset_tsc(void)
324{
325 unsigned long lost,delay;
326 unsigned long delta = last_tsc_low;
327 int count;
328 int countmp;
329 static int count1 = 0;
330 unsigned long long this_offset, last_offset;
331 static int lost_count = 0;
332
333 write_seqlock(&monotonic_lock);
334 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
335 /*
336 * It is important that these two operations happen almost at
337 * the same time. We do the RDTSC stuff first, since it's
338 * faster. To avoid any inconsistencies, we need interrupts
339 * disabled locally.
340 */
341
342 /*
343 * Interrupts are just disabled locally since the timer irq
344 * has the SA_INTERRUPT flag set. -arca
345 */
346
347 /* read Pentium cycle counter */
348
349 rdtsc(last_tsc_low, last_tsc_high);
350
351 spin_lock(&i8253_lock);
352 outb_p(0x00, PIT_MODE); /* latch the count ASAP */
353
354 count = inb_p(PIT_CH0); /* read the latched count */
355 count |= inb(PIT_CH0) << 8;
356
357 /*
358 * VIA686a test code... reset the latch if count > max + 1
359 * from timer_pit.c - cjb
360 */
361 if (count > LATCH) {
362 outb_p(0x34, PIT_MODE);
363 outb_p(LATCH & 0xff, PIT_CH0);
364 outb(LATCH >> 8, PIT_CH0);
365 count = LATCH - 1;
366 }
367
368 spin_unlock(&i8253_lock);
369
370 if (pit_latch_buggy) {
371 /* get center value of last 3 time lutch */
372 if ((count2 >= count && count >= count1)
373 || (count1 >= count && count >= count2)) {
374 count2 = count1; count1 = count;
375 } else if ((count1 >= count2 && count2 >= count)
376 || (count >= count2 && count2 >= count1)) {
377 countmp = count;count = count2;
378 count2 = count1;count1 = countmp;
379 } else {
380 count2 = count1; count1 = count; count = count1;
381 }
382 }
383
384 /* lost tick compensation */
385 delta = last_tsc_low - delta;
386 {
387 register unsigned long eax, edx;
388 eax = delta;
389 __asm__("mull %2"
390 :"=a" (eax), "=d" (edx)
391 :"rm" (fast_gettimeoffset_quotient),
392 "0" (eax));
393 delta = edx;
394 }
395 delta += delay_at_last_interrupt;
396 lost = delta/(1000000/HZ);
397 delay = delta%(1000000/HZ);
398 if (lost >= 2) {
399 jiffies_64 += lost-1;
400
401 /* sanity check to ensure we're not always losing ticks */
402 if (lost_count++ > 100) {
403 printk(KERN_WARNING "Losing too many ticks!\n");
404 printk(KERN_WARNING "TSC cannot be used as a timesource. \n");
405 printk(KERN_WARNING "Possible reasons for this are:\n");
406 printk(KERN_WARNING " You're running with Speedstep,\n");
407 printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n");
408 printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n");
409 printk(KERN_WARNING "Falling back to a sane timesource now.\n");
410
411 clock_fallback();
412 }
413 /* ... but give the TSC a fair chance */
414 if (lost_count > 25)
415 cpufreq_delayed_get();
416 } else
417 lost_count = 0;
418 /* update the monotonic base value */
419 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
420 monotonic_base += cycles_2_ns(this_offset - last_offset);
421 write_sequnlock(&monotonic_lock);
422
423 /* calculate delay_at_last_interrupt */
424 count = ((LATCH-1) - count) * TICK_SIZE;
425 delay_at_last_interrupt = (count + LATCH/2) / LATCH;
426
427 /* catch corner case where tick rollover occured
428 * between tsc and pit reads (as noted when
429 * usec delta is > 90% # of usecs/tick)
430 */
431 if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
432 jiffies_64++;
433}
434
435static int __init init_tsc(char* override)
436{
437
438 /* check clock override */
439 if (override[0] && strncmp(override,"tsc",3)) {
440#ifdef CONFIG_HPET_TIMER
441 if (is_hpet_enabled()) {
442 printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
443 } else
444#endif
445 {
446 return -ENODEV;
447 }
448 }
449
450 /*
451 * If we have APM enabled or the CPU clock speed is variable
452 * (CPU stops clock on HLT or slows clock to save power)
453 * then the TSC timestamps may diverge by up to 1 jiffy from
454 * 'real time' but nothing will break.
455 * The most frequent case is that the CPU is "woken" from a halt
456 * state by the timer interrupt itself, so we get 0 error. In the
457 * rare cases where a driver would "wake" the CPU and request a
458 * timestamp, the maximum error is < 1 jiffy. But timestamps are
459 * still perfectly ordered.
460 * Note that the TSC counter will be reset if APM suspends
461 * to disk; this won't break the kernel, though, 'cuz we're
462 * smart. See arch/i386/kernel/apm.c.
463 */
464 /*
465 * Firstly we have to do a CPU check for chips with
466 * a potentially buggy TSC. At this point we haven't run
467 * the ident/bugs checks so we must run this hook as it
468 * may turn off the TSC flag.
469 *
470 * NOTE: this doesn't yet handle SMP 486 machines where only
471 * some CPU's have a TSC. Thats never worked and nobody has
472 * moaned if you have the only one in the world - you fix it!
473 */
474
475 count2 = LATCH; /* initialize counter for mark_offset_tsc() */
476
477 if (cpu_has_tsc) {
478 unsigned long tsc_quotient;
479#ifdef CONFIG_HPET_TIMER
480 if (is_hpet_enabled()){
481 unsigned long result, remain;
482 printk("Using TSC for gettimeofday\n");
483 tsc_quotient = calibrate_tsc_hpet(NULL);
484 timer_tsc.mark_offset = &mark_offset_tsc_hpet;
485 /*
486 * Math to calculate hpet to usec multiplier
487 * Look for the comments at get_offset_tsc_hpet()
488 */
489 ASM_DIV64_REG(result, remain, hpet_tick,
490 0, KERNEL_TICK_USEC);
491 if (remain > (hpet_tick >> 1))
492 result++; /* rounding the result */
493
494 hpet_usec_quotient = result;
495 } else
496#endif
497 {
498 tsc_quotient = calibrate_tsc();
499 }
500
501 if (tsc_quotient) {
502 fast_gettimeoffset_quotient = tsc_quotient;
503 use_tsc = 1;
504 /*
505 * We could be more selective here I suspect
506 * and just enable this for the next intel chips ?
507 */
508 /* report CPU clock rate in Hz.
509 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
510 * clock/second. Our precision is about 100 ppm.
511 */
512 { unsigned long eax=0, edx=1000;
513 __asm__("divl %2"
514 :"=a" (cpu_khz), "=d" (edx)
515 :"r" (tsc_quotient),
516 "0" (eax), "1" (edx));
517 printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
518 }
519 set_cyc2ns_scale(cpu_khz/1000);
520 return 0;
521 }
522 }
523 return -ENODEV;
524}
525
526#ifndef CONFIG_X86_TSC
527/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
528 * in cpu/common.c */
529static int __init tsc_setup(char *str)
530{
531 tsc_disable = 1;
532 return 1;
533}
534#else
535static int __init tsc_setup(char *str)
536{
537 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
538 "cannot disable TSC.\n");
539 return 1;
540}
541#endif
542__setup("notsc", tsc_setup);
543
544
545
546/************************************************************/
547
548/* tsc timer_opts struct */
549static struct timer_opts timer_tsc = {
550 .name = "tsc",
551 .mark_offset = mark_offset_tsc,
552 .get_offset = get_offset_tsc,
553 .monotonic_clock = monotonic_clock_tsc,
554 .delay = delay_tsc,
555};
556
557struct init_timer_opts __initdata timer_tsc_init = {
558 .init = init_tsc,
559 .opts = &timer_tsc,
560};