diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2009-02-28 04:44:28 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2009-06-08 11:57:51 -0400 |
commit | 5636919b5c909fee54a6ef5226475ecae012ad02 (patch) | |
tree | c77fa89c56ee2d493fb82117ab5dbc5b28a8deeb /arch/mips/include | |
parent | 3a553147eaad5d4de90ab1f695aa13ddbea684ec (diff) |
MIPS: Outline udelay and fix a few issues.
Outlining fixes the issue were on certain CPUs such as the R10000 family
the delay loop would need an extra cycle if it overlaps a cacheline
boundary.
The rewrite also fixes build errors with GCC 4.4 which was changed in
way incompatible with the kernel's inline assembly.
Relying on pure C for computation of the delay value removes the need for
explicit. The price we pay is a slight slowdown of the computation - to
be fixed on another day.
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/include')
-rw-r--r-- | arch/mips/include/asm/cpu-info.h | 4 | ||||
-rw-r--r-- | arch/mips/include/asm/delay.h | 92 |
2 files changed, 7 insertions, 89 deletions
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h index 744cd8fb107f..126044308dec 100644 --- a/arch/mips/include/asm/cpu-info.h +++ b/arch/mips/include/asm/cpu-info.h | |||
@@ -39,8 +39,8 @@ struct cache_desc { | |||
39 | #define MIPS_CACHE_PINDEX 0x00000020 /* Physically indexed cache */ | 39 | #define MIPS_CACHE_PINDEX 0x00000020 /* Physically indexed cache */ |
40 | 40 | ||
41 | struct cpuinfo_mips { | 41 | struct cpuinfo_mips { |
42 | unsigned long udelay_val; | 42 | unsigned int udelay_val; |
43 | unsigned long asid_cache; | 43 | unsigned int asid_cache; |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * Capability and feature descriptor structure for MIPS CPU | 46 | * Capability and feature descriptor structure for MIPS CPU |
diff --git a/arch/mips/include/asm/delay.h b/arch/mips/include/asm/delay.h index b0bccd2c4ed5..a07e51b2be13 100644 --- a/arch/mips/include/asm/delay.h +++ b/arch/mips/include/asm/delay.h | |||
@@ -11,94 +11,12 @@ | |||
11 | #ifndef _ASM_DELAY_H | 11 | #ifndef _ASM_DELAY_H |
12 | #define _ASM_DELAY_H | 12 | #define _ASM_DELAY_H |
13 | 13 | ||
14 | #include <linux/param.h> | 14 | extern void __delay(unsigned int loops); |
15 | #include <linux/smp.h> | 15 | extern void __ndelay(unsigned int ns); |
16 | extern void __udelay(unsigned int us); | ||
16 | 17 | ||
17 | #include <asm/compiler.h> | 18 | #define ndelay(ns) __udelay(ns) |
18 | #include <asm/war.h> | 19 | #define udelay(us) __udelay(us) |
19 | |||
20 | static inline void __delay(unsigned long loops) | ||
21 | { | ||
22 | if (sizeof(long) == 4) | ||
23 | __asm__ __volatile__ ( | ||
24 | " .set noreorder \n" | ||
25 | " .align 3 \n" | ||
26 | "1: bnez %0, 1b \n" | ||
27 | " subu %0, 1 \n" | ||
28 | " .set reorder \n" | ||
29 | : "=r" (loops) | ||
30 | : "0" (loops)); | ||
31 | else if (sizeof(long) == 8 && !DADDI_WAR) | ||
32 | __asm__ __volatile__ ( | ||
33 | " .set noreorder \n" | ||
34 | " .align 3 \n" | ||
35 | "1: bnez %0, 1b \n" | ||
36 | " dsubu %0, 1 \n" | ||
37 | " .set reorder \n" | ||
38 | : "=r" (loops) | ||
39 | : "0" (loops)); | ||
40 | else if (sizeof(long) == 8 && DADDI_WAR) | ||
41 | __asm__ __volatile__ ( | ||
42 | " .set noreorder \n" | ||
43 | " .align 3 \n" | ||
44 | "1: bnez %0, 1b \n" | ||
45 | " dsubu %0, %2 \n" | ||
46 | " .set reorder \n" | ||
47 | : "=r" (loops) | ||
48 | : "0" (loops), "r" (1)); | ||
49 | } | ||
50 | |||
51 | |||
52 | /* | ||
53 | * Division by multiplication: you don't have to worry about | ||
54 | * loss of precision. | ||
55 | * | ||
56 | * Use only for very small delays ( < 1 msec). Should probably use a | ||
57 | * lookup table, really, as the multiplications take much too long with | ||
58 | * short delays. This is a "reasonable" implementation, though (and the | ||
59 | * first constant multiplications gets optimized away if the delay is | ||
60 | * a constant) | ||
61 | */ | ||
62 | |||
63 | static inline void __udelay(unsigned long usecs, unsigned long lpj) | ||
64 | { | ||
65 | unsigned long hi, lo; | ||
66 | |||
67 | /* | ||
68 | * The rates of 128 is rounded wrongly by the catchall case | ||
69 | * for 64-bit. Excessive precission? Probably ... | ||
70 | */ | ||
71 | #if defined(CONFIG_64BIT) && (HZ == 128) | ||
72 | usecs *= 0x0008637bd05af6c7UL; /* 2**64 / (1000000 / HZ) */ | ||
73 | #elif defined(CONFIG_64BIT) | ||
74 | usecs *= (0x8000000000000000UL / (500000 / HZ)); | ||
75 | #else /* 32-bit junk follows here */ | ||
76 | usecs *= (unsigned long) (((0x8000000000000000ULL / (500000 / HZ)) + | ||
77 | 0x80000000ULL) >> 32); | ||
78 | #endif | ||
79 | |||
80 | if (sizeof(long) == 4) | ||
81 | __asm__("multu\t%2, %3" | ||
82 | : "=h" (usecs), "=l" (lo) | ||
83 | : "r" (usecs), "r" (lpj) | ||
84 | : GCC_REG_ACCUM); | ||
85 | else if (sizeof(long) == 8 && !R4000_WAR) | ||
86 | __asm__("dmultu\t%2, %3" | ||
87 | : "=h" (usecs), "=l" (lo) | ||
88 | : "r" (usecs), "r" (lpj) | ||
89 | : GCC_REG_ACCUM); | ||
90 | else if (sizeof(long) == 8 && R4000_WAR) | ||
91 | __asm__("dmultu\t%3, %4\n\tmfhi\t%0" | ||
92 | : "=r" (usecs), "=h" (hi), "=l" (lo) | ||
93 | : "r" (usecs), "r" (lpj) | ||
94 | : GCC_REG_ACCUM); | ||
95 | |||
96 | __delay(usecs); | ||
97 | } | ||
98 | |||
99 | #define __udelay_val cpu_data[raw_smp_processor_id()].udelay_val | ||
100 | |||
101 | #define udelay(usecs) __udelay((usecs), __udelay_val) | ||
102 | 20 | ||
103 | /* make sure "usecs *= ..." in udelay do not overflow. */ | 21 | /* make sure "usecs *= ..." in udelay do not overflow. */ |
104 | #if HZ >= 1000 | 22 | #if HZ >= 1000 |