diff options
author | Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> | 2005-10-30 17:59:25 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 20:37:11 -0500 |
commit | dacb16b1a034fa7a0b868ee30758119fbfd90bc1 (patch) | |
tree | daaa631c9c6fa2ad011647fb3acd219784faf2e2 /arch/i386/kernel | |
parent | bfd51626cbf61cb23f787d8ff972ef0d5ddacc0b (diff) |
[PATCH] i386 and x86_64 TSC set_cyc2ns_scale imprecision
I just found out that some precision is unnecessarily lost in the
arch/i386/kernel/timers/timer_tsc.c:set_cyc2ns_scale function. It uses a
cpu_mhz parameter when it could use a cpu_khz. In the specific case of an
Intel P4 running at 3001.171 Mhz, the truncation to 3001 Mhz leads to an
imprecision of 19 microseconds per second : this is very sad for a timer with
nearly nanosecond accuracy.
Fix the x86_64 architecture too.
Cc: george anzinger <george@mvista.com>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r-- | arch/i386/kernel/timers/timer_hpet.c | 17 | ||||
-rw-r--r-- | arch/i386/kernel/timers/timer_tsc.c | 21 |
2 files changed, 24 insertions, 14 deletions
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index d973a8b681fd..be242723c339 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c | |||
@@ -30,23 +30,28 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | |||
30 | * basic equation: | 30 | * basic equation: |
31 | * ns = cycles / (freq / ns_per_sec) | 31 | * ns = cycles / (freq / ns_per_sec) |
32 | * ns = cycles * (ns_per_sec / freq) | 32 | * ns = cycles * (ns_per_sec / freq) |
33 | * ns = cycles * (10^9 / (cpu_mhz * 10^6)) | 33 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) |
34 | * ns = cycles * (10^3 / cpu_mhz) | 34 | * ns = cycles * (10^6 / cpu_khz) |
35 | * | 35 | * |
36 | * Then we use scaling math (suggested by george@mvista.com) to get: | 36 | * Then we use scaling math (suggested by george@mvista.com) to get: |
37 | * ns = cycles * (10^3 * SC / cpu_mhz) / SC | 37 | * ns = cycles * (10^6 * SC / cpu_khz) / SC |
38 | * ns = cycles * cyc2ns_scale / SC | 38 | * ns = cycles * cyc2ns_scale / SC |
39 | * | 39 | * |
40 | * And since SC is a constant power of two, we can convert the div | 40 | * And since SC is a constant power of two, we can convert the div |
41 | * into a shift. | 41 | * into a shift. |
42 | * | ||
43 | * We can use khz divisor instead of mhz to keep a better percision, since | ||
44 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
45 | * (mathieu.desnoyers@polymtl.ca) | ||
46 | * | ||
42 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | 47 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" |
43 | */ | 48 | */ |
44 | static unsigned long cyc2ns_scale; | 49 | static unsigned long cyc2ns_scale; |
45 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | 50 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ |
46 | 51 | ||
47 | static inline void set_cyc2ns_scale(unsigned long cpu_mhz) | 52 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) |
48 | { | 53 | { |
49 | cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; | 54 | cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; |
50 | } | 55 | } |
51 | 56 | ||
52 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | 57 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) |
@@ -163,7 +168,7 @@ static int __init init_hpet(char* override) | |||
163 | printk("Detected %u.%03u MHz processor.\n", | 168 | printk("Detected %u.%03u MHz processor.\n", |
164 | cpu_khz / 1000, cpu_khz % 1000); | 169 | cpu_khz / 1000, cpu_khz % 1000); |
165 | } | 170 | } |
166 | set_cyc2ns_scale(cpu_khz/1000); | 171 | set_cyc2ns_scale(cpu_khz); |
167 | } | 172 | } |
168 | /* set this only when cpu_has_tsc */ | 173 | /* set this only when cpu_has_tsc */ |
169 | timer_hpet.read_timer = read_timer_tsc; | 174 | timer_hpet.read_timer = read_timer_tsc; |
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 6dd470cc9f72..d395e3b42485 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c | |||
@@ -49,23 +49,28 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | |||
49 | * basic equation: | 49 | * basic equation: |
50 | * ns = cycles / (freq / ns_per_sec) | 50 | * ns = cycles / (freq / ns_per_sec) |
51 | * ns = cycles * (ns_per_sec / freq) | 51 | * ns = cycles * (ns_per_sec / freq) |
52 | * ns = cycles * (10^9 / (cpu_mhz * 10^6)) | 52 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) |
53 | * ns = cycles * (10^3 / cpu_mhz) | 53 | * ns = cycles * (10^6 / cpu_khz) |
54 | * | 54 | * |
55 | * Then we use scaling math (suggested by george@mvista.com) to get: | 55 | * Then we use scaling math (suggested by george@mvista.com) to get: |
56 | * ns = cycles * (10^3 * SC / cpu_mhz) / SC | 56 | * ns = cycles * (10^6 * SC / cpu_khz) / SC |
57 | * ns = cycles * cyc2ns_scale / SC | 57 | * ns = cycles * cyc2ns_scale / SC |
58 | * | 58 | * |
59 | * And since SC is a constant power of two, we can convert the div | 59 | * And since SC is a constant power of two, we can convert the div |
60 | * into a shift. | 60 | * into a shift. |
61 | * | ||
62 | * We can use khz divisor instead of mhz to keep a better percision, since | ||
63 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
64 | * (mathieu.desnoyers@polymtl.ca) | ||
65 | * | ||
61 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | 66 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" |
62 | */ | 67 | */ |
63 | static unsigned long cyc2ns_scale; | 68 | static unsigned long cyc2ns_scale; |
64 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | 69 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ |
65 | 70 | ||
66 | static inline void set_cyc2ns_scale(unsigned long cpu_mhz) | 71 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) |
67 | { | 72 | { |
68 | cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; | 73 | cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; |
69 | } | 74 | } |
70 | 75 | ||
71 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | 76 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) |
@@ -286,7 +291,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
286 | if (use_tsc) { | 291 | if (use_tsc) { |
287 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { | 292 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { |
288 | fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq); | 293 | fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq); |
289 | set_cyc2ns_scale(cpu_khz/1000); | 294 | set_cyc2ns_scale(cpu_khz); |
290 | } | 295 | } |
291 | } | 296 | } |
292 | #endif | 297 | #endif |
@@ -536,7 +541,7 @@ static int __init init_tsc(char* override) | |||
536 | printk("Detected %u.%03u MHz processor.\n", | 541 | printk("Detected %u.%03u MHz processor.\n", |
537 | cpu_khz / 1000, cpu_khz % 1000); | 542 | cpu_khz / 1000, cpu_khz % 1000); |
538 | } | 543 | } |
539 | set_cyc2ns_scale(cpu_khz/1000); | 544 | set_cyc2ns_scale(cpu_khz); |
540 | return 0; | 545 | return 0; |
541 | } | 546 | } |
542 | } | 547 | } |