diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2009-02-28 04:44:28 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2009-06-08 11:57:51 -0400 |
commit | 5636919b5c909fee54a6ef5226475ecae012ad02 (patch) | |
tree | c77fa89c56ee2d493fb82117ab5dbc5b28a8deeb | |
parent | 3a553147eaad5d4de90ab1f695aa13ddbea684ec (diff) |
MIPS: Outline udelay and fix a few issues.
Outlining fixes the issue were on certain CPUs such as the R10000 family
the delay loop would need an extra cycle if it overlaps a cacheline
boundary.
The rewrite also fixes build errors with GCC 4.4 which was changed in
way incompatible with the kernel's inline assembly.
Relying on pure C for computation of the delay value removes the need for
explicit. The price we pay is a slight slowdown of the computation - to
be fixed on another day.
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
-rw-r--r-- | arch/mips/include/asm/cpu-info.h | 4 | ||||
-rw-r--r-- | arch/mips/include/asm/delay.h | 92 | ||||
-rw-r--r-- | arch/mips/kernel/proc.c | 2 | ||||
-rw-r--r-- | arch/mips/lib/Makefile | 4 | ||||
-rw-r--r-- | arch/mips/lib/delay.c | 56 |
5 files changed, 66 insertions, 92 deletions
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h index 744cd8fb107f..126044308dec 100644 --- a/arch/mips/include/asm/cpu-info.h +++ b/arch/mips/include/asm/cpu-info.h | |||
@@ -39,8 +39,8 @@ struct cache_desc { | |||
39 | #define MIPS_CACHE_PINDEX 0x00000020 /* Physically indexed cache */ | 39 | #define MIPS_CACHE_PINDEX 0x00000020 /* Physically indexed cache */ |
40 | 40 | ||
41 | struct cpuinfo_mips { | 41 | struct cpuinfo_mips { |
42 | unsigned long udelay_val; | 42 | unsigned int udelay_val; |
43 | unsigned long asid_cache; | 43 | unsigned int asid_cache; |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * Capability and feature descriptor structure for MIPS CPU | 46 | * Capability and feature descriptor structure for MIPS CPU |
diff --git a/arch/mips/include/asm/delay.h b/arch/mips/include/asm/delay.h index b0bccd2c4ed5..a07e51b2be13 100644 --- a/arch/mips/include/asm/delay.h +++ b/arch/mips/include/asm/delay.h | |||
@@ -11,94 +11,12 @@ | |||
11 | #ifndef _ASM_DELAY_H | 11 | #ifndef _ASM_DELAY_H |
12 | #define _ASM_DELAY_H | 12 | #define _ASM_DELAY_H |
13 | 13 | ||
14 | #include <linux/param.h> | 14 | extern void __delay(unsigned int loops); |
15 | #include <linux/smp.h> | 15 | extern void __ndelay(unsigned int ns); |
16 | extern void __udelay(unsigned int us); | ||
16 | 17 | ||
17 | #include <asm/compiler.h> | 18 | #define ndelay(ns) __udelay(ns) |
18 | #include <asm/war.h> | 19 | #define udelay(us) __udelay(us) |
19 | |||
20 | static inline void __delay(unsigned long loops) | ||
21 | { | ||
22 | if (sizeof(long) == 4) | ||
23 | __asm__ __volatile__ ( | ||
24 | " .set noreorder \n" | ||
25 | " .align 3 \n" | ||
26 | "1: bnez %0, 1b \n" | ||
27 | " subu %0, 1 \n" | ||
28 | " .set reorder \n" | ||
29 | : "=r" (loops) | ||
30 | : "0" (loops)); | ||
31 | else if (sizeof(long) == 8 && !DADDI_WAR) | ||
32 | __asm__ __volatile__ ( | ||
33 | " .set noreorder \n" | ||
34 | " .align 3 \n" | ||
35 | "1: bnez %0, 1b \n" | ||
36 | " dsubu %0, 1 \n" | ||
37 | " .set reorder \n" | ||
38 | : "=r" (loops) | ||
39 | : "0" (loops)); | ||
40 | else if (sizeof(long) == 8 && DADDI_WAR) | ||
41 | __asm__ __volatile__ ( | ||
42 | " .set noreorder \n" | ||
43 | " .align 3 \n" | ||
44 | "1: bnez %0, 1b \n" | ||
45 | " dsubu %0, %2 \n" | ||
46 | " .set reorder \n" | ||
47 | : "=r" (loops) | ||
48 | : "0" (loops), "r" (1)); | ||
49 | } | ||
50 | |||
51 | |||
52 | /* | ||
53 | * Division by multiplication: you don't have to worry about | ||
54 | * loss of precision. | ||
55 | * | ||
56 | * Use only for very small delays ( < 1 msec). Should probably use a | ||
57 | * lookup table, really, as the multiplications take much too long with | ||
58 | * short delays. This is a "reasonable" implementation, though (and the | ||
59 | * first constant multiplications gets optimized away if the delay is | ||
60 | * a constant) | ||
61 | */ | ||
62 | |||
63 | static inline void __udelay(unsigned long usecs, unsigned long lpj) | ||
64 | { | ||
65 | unsigned long hi, lo; | ||
66 | |||
67 | /* | ||
68 | * The rates of 128 is rounded wrongly by the catchall case | ||
69 | * for 64-bit. Excessive precission? Probably ... | ||
70 | */ | ||
71 | #if defined(CONFIG_64BIT) && (HZ == 128) | ||
72 | usecs *= 0x0008637bd05af6c7UL; /* 2**64 / (1000000 / HZ) */ | ||
73 | #elif defined(CONFIG_64BIT) | ||
74 | usecs *= (0x8000000000000000UL / (500000 / HZ)); | ||
75 | #else /* 32-bit junk follows here */ | ||
76 | usecs *= (unsigned long) (((0x8000000000000000ULL / (500000 / HZ)) + | ||
77 | 0x80000000ULL) >> 32); | ||
78 | #endif | ||
79 | |||
80 | if (sizeof(long) == 4) | ||
81 | __asm__("multu\t%2, %3" | ||
82 | : "=h" (usecs), "=l" (lo) | ||
83 | : "r" (usecs), "r" (lpj) | ||
84 | : GCC_REG_ACCUM); | ||
85 | else if (sizeof(long) == 8 && !R4000_WAR) | ||
86 | __asm__("dmultu\t%2, %3" | ||
87 | : "=h" (usecs), "=l" (lo) | ||
88 | : "r" (usecs), "r" (lpj) | ||
89 | : GCC_REG_ACCUM); | ||
90 | else if (sizeof(long) == 8 && R4000_WAR) | ||
91 | __asm__("dmultu\t%3, %4\n\tmfhi\t%0" | ||
92 | : "=r" (usecs), "=h" (hi), "=l" (lo) | ||
93 | : "r" (usecs), "r" (lpj) | ||
94 | : GCC_REG_ACCUM); | ||
95 | |||
96 | __delay(usecs); | ||
97 | } | ||
98 | |||
99 | #define __udelay_val cpu_data[raw_smp_processor_id()].udelay_val | ||
100 | |||
101 | #define udelay(usecs) __udelay((usecs), __udelay_val) | ||
102 | 20 | ||
103 | /* make sure "usecs *= ..." in udelay do not overflow. */ | 21 | /* make sure "usecs *= ..." in udelay do not overflow. */ |
104 | #if HZ >= 1000 | 22 | #if HZ >= 1000 |
diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index 26760cad8b69..e0a4ac18fa07 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c | |||
@@ -42,7 +42,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
42 | seq_printf(m, fmt, __cpu_name[n], | 42 | seq_printf(m, fmt, __cpu_name[n], |
43 | (version >> 4) & 0x0f, version & 0x0f, | 43 | (version >> 4) & 0x0f, version & 0x0f, |
44 | (fp_vers >> 4) & 0x0f, fp_vers & 0x0f); | 44 | (fp_vers >> 4) & 0x0f, fp_vers & 0x0f); |
45 | seq_printf(m, "BogoMIPS\t\t: %lu.%02lu\n", | 45 | seq_printf(m, "BogoMIPS\t\t: %u.%02u\n", |
46 | cpu_data[n].udelay_val / (500000/HZ), | 46 | cpu_data[n].udelay_val / (500000/HZ), |
47 | (cpu_data[n].udelay_val / (5000/HZ)) % 100); | 47 | (cpu_data[n].udelay_val / (5000/HZ)) % 100); |
48 | seq_printf(m, "wait instruction\t: %s\n", cpu_wait ? "yes" : "no"); | 48 | seq_printf(m, "wait instruction\t: %s\n", cpu_wait ? "yes" : "no"); |
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index c13c7ad2cdae..2adead5a8a37 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile | |||
@@ -2,8 +2,8 @@ | |||
2 | # Makefile for MIPS-specific library files.. | 2 | # Makefile for MIPS-specific library files.. |
3 | # | 3 | # |
4 | 4 | ||
5 | lib-y += csum_partial.o memcpy.o memcpy-inatomic.o memset.o strlen_user.o \ | 5 | lib-y += csum_partial.o delay.o memcpy.o memcpy-inatomic.o memset.o \ |
6 | strncpy_user.o strnlen_user.o uncached.o | 6 | strlen_user.o strncpy_user.o strnlen_user.o uncached.o |
7 | 7 | ||
8 | obj-y += iomap.o | 8 | obj-y += iomap.o |
9 | obj-$(CONFIG_PCI) += iomap-pci.o | 9 | obj-$(CONFIG_PCI) += iomap-pci.o |
diff --git a/arch/mips/lib/delay.c b/arch/mips/lib/delay.c new file mode 100644 index 000000000000..f69c6b569eb3 --- /dev/null +++ b/arch/mips/lib/delay.c | |||
@@ -0,0 +1,56 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * Copyright (C) 1994 by Waldorf Electronics | ||
7 | * Copyright (C) 1995 - 2000, 01, 03 by Ralf Baechle | ||
8 | * Copyright (C) 1999, 2000 Silicon Graphics, Inc. | ||
9 | * Copyright (C) 2007 Maciej W. Rozycki | ||
10 | */ | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/param.h> | ||
13 | #include <linux/smp.h> | ||
14 | |||
15 | #include <asm/compiler.h> | ||
16 | #include <asm/war.h> | ||
17 | |||
18 | inline void __delay(unsigned int loops) | ||
19 | { | ||
20 | __asm__ __volatile__ ( | ||
21 | " .set noreorder \n" | ||
22 | " .align 3 \n" | ||
23 | "1: bnez %0, 1b \n" | ||
24 | " subu %0, 1 \n" | ||
25 | " .set reorder \n" | ||
26 | : "=r" (loops) | ||
27 | : "0" (loops)); | ||
28 | } | ||
29 | EXPORT_SYMBOL(__delay); | ||
30 | |||
31 | /* | ||
32 | * Division by multiplication: you don't have to worry about | ||
33 | * loss of precision. | ||
34 | * | ||
35 | * Use only for very small delays ( < 1 msec). Should probably use a | ||
36 | * lookup table, really, as the multiplications take much too long with | ||
37 | * short delays. This is a "reasonable" implementation, though (and the | ||
38 | * first constant multiplications gets optimized away if the delay is | ||
39 | * a constant) | ||
40 | */ | ||
41 | |||
42 | void __udelay(unsigned long us) | ||
43 | { | ||
44 | unsigned int lpj = current_cpu_data.udelay_val; | ||
45 | |||
46 | __delay((us * 0x000010c7 * HZ * lpj) >> 32); | ||
47 | } | ||
48 | EXPORT_SYMBOL(__udelay); | ||
49 | |||
50 | void __ndelay(unsigned long ns) | ||
51 | { | ||
52 | unsigned int lpj = current_cpu_data.udelay_val; | ||
53 | |||
54 | __delay((us * 0x00000005 * HZ * lpj) >> 32); | ||
55 | } | ||
56 | EXPORT_SYMBOL(__ndelay); | ||