diff options
author | Paul Mundt <lethal@linux-sh.org> | 2006-09-26 22:29:55 -0400 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2006-09-26 22:29:55 -0400 |
commit | a252710fc5b63b24934905ca47ecf661702d7f00 (patch) | |
tree | 7fbaddfb977095879f42c68a2a2f115d0e2314ee /arch/sh/mm/cache-sh4.c | |
parent | e4e3b5ccd77226c9c4dbb0737106b868dfc182d9 (diff) |
sh: flush_cache_range() cleanup and optimizations.
flush_cache_range() wasn't page aligning the end of the range,
we can't assume that it will always be page aligned, and we
ended up getting unaligned faults in some rare call paths.
Additionally, we add a small optimization to just purge the
dcache entirely if the range is large enough that the page
table walking will take longer. We use an arbitrary value of
64 pages for the large range size, as per sh64.
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/sh/mm/cache-sh4.c')
-rw-r--r-- | arch/sh/mm/cache-sh4.c | 72 |
1 files changed, 46 insertions, 26 deletions
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 524cea5b47f9..94c05d09c3f7 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * arch/sh/mm/cache-sh4.c | 2 | * arch/sh/mm/cache-sh4.c |
3 | * | 3 | * |
4 | * Copyright (C) 1999, 2000, 2002 Niibe Yutaka | 4 | * Copyright (C) 1999, 2000, 2002 Niibe Yutaka |
5 | * Copyright (C) 2001, 2002, 2003, 2004 Paul Mundt | 5 | * Copyright (C) 2001, 2002, 2003, 2004, 2005 Paul Mundt |
6 | * Copyright (C) 2003 Richard Curnow | 6 | * Copyright (C) 2003 Richard Curnow |
7 | * | 7 | * |
8 | * This file is subject to the terms and conditions of the GNU General Public | 8 | * This file is subject to the terms and conditions of the GNU General Public |
@@ -25,6 +25,8 @@ | |||
25 | #include <asm/mmu_context.h> | 25 | #include <asm/mmu_context.h> |
26 | #include <asm/cacheflush.h> | 26 | #include <asm/cacheflush.h> |
27 | 27 | ||
28 | extern void __flush_cache_4096(unsigned long addr, unsigned long phys, | ||
29 | unsigned long exec_offset); | ||
28 | extern void __flush_cache_4096_all(unsigned long start); | 30 | extern void __flush_cache_4096_all(unsigned long start); |
29 | static void __flush_cache_4096_all_ex(unsigned long start); | 31 | static void __flush_cache_4096_all_ex(unsigned long start); |
30 | extern void __flush_dcache_all(void); | 32 | extern void __flush_dcache_all(void); |
@@ -112,9 +114,14 @@ static void __flush_dcache_all_ex(void) | |||
112 | { | 114 | { |
113 | unsigned long addr, end_addr, entry_offset; | 115 | unsigned long addr, end_addr, entry_offset; |
114 | 116 | ||
115 | end_addr = CACHE_OC_ADDRESS_ARRAY + (cpu_data->dcache.sets << cpu_data->dcache.entry_shift) * cpu_data->dcache.ways; | 117 | end_addr = CACHE_OC_ADDRESS_ARRAY + |
118 | (cpu_data->dcache.sets << cpu_data->dcache.entry_shift) * | ||
119 | cpu_data->dcache.ways; | ||
120 | |||
116 | entry_offset = 1 << cpu_data->dcache.entry_shift; | 121 | entry_offset = 1 << cpu_data->dcache.entry_shift; |
117 | for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; addr += entry_offset) { | 122 | for (addr = CACHE_OC_ADDRESS_ARRAY; |
123 | addr < end_addr; | ||
124 | addr += entry_offset) { | ||
118 | ctrl_outl(0, addr); | 125 | ctrl_outl(0, addr); |
119 | } | 126 | } |
120 | } | 127 | } |
@@ -125,7 +132,8 @@ static void __flush_cache_4096_all_ex(unsigned long start) | |||
125 | int i; | 132 | int i; |
126 | 133 | ||
127 | entry_offset = 1 << cpu_data->dcache.entry_shift; | 134 | entry_offset = 1 << cpu_data->dcache.entry_shift; |
128 | for (i = 0; i < cpu_data->dcache.ways; i++, start += cpu_data->dcache.way_incr) { | 135 | for (i = 0; i < cpu_data->dcache.ways; |
136 | i++, start += cpu_data->dcache.way_incr) { | ||
129 | for (addr = CACHE_OC_ADDRESS_ARRAY + start; | 137 | for (addr = CACHE_OC_ADDRESS_ARRAY + start; |
130 | addr < CACHE_OC_ADDRESS_ARRAY + 4096 + start; | 138 | addr < CACHE_OC_ADDRESS_ARRAY + 4096 + start; |
131 | addr += entry_offset) { | 139 | addr += entry_offset) { |
@@ -153,14 +161,14 @@ void flush_icache_range(unsigned long start, unsigned long end) | |||
153 | } | 161 | } |
154 | 162 | ||
155 | /* | 163 | /* |
156 | * Write back the D-cache and purge the I-cache for signal trampoline. | 164 | * Write back the D-cache and purge the I-cache for signal trampoline. |
157 | * .. which happens to be the same behavior as flush_icache_range(). | 165 | * .. which happens to be the same behavior as flush_icache_range(). |
158 | * So, we simply flush out a line. | 166 | * So, we simply flush out a line. |
159 | */ | 167 | */ |
160 | void flush_cache_sigtramp(unsigned long addr) | 168 | void flush_cache_sigtramp(unsigned long addr) |
161 | { | 169 | { |
162 | unsigned long v, index; | 170 | unsigned long v, index; |
163 | unsigned long flags; | 171 | unsigned long flags; |
164 | int i; | 172 | int i; |
165 | 173 | ||
166 | v = addr & ~(L1_CACHE_BYTES-1); | 174 | v = addr & ~(L1_CACHE_BYTES-1); |
@@ -172,7 +180,8 @@ void flush_cache_sigtramp(unsigned long addr) | |||
172 | 180 | ||
173 | local_irq_save(flags); | 181 | local_irq_save(flags); |
174 | jump_to_P2(); | 182 | jump_to_P2(); |
175 | for(i = 0; i < cpu_data->icache.ways; i++, index += cpu_data->icache.way_incr) | 183 | for (i = 0; i < cpu_data->icache.ways; |
184 | i++, index += cpu_data->icache.way_incr) | ||
176 | ctrl_outl(0, index); /* Clear out Valid-bit */ | 185 | ctrl_outl(0, index); /* Clear out Valid-bit */ |
177 | back_to_P1(); | 186 | back_to_P1(); |
178 | local_irq_restore(flags); | 187 | local_irq_restore(flags); |
@@ -181,8 +190,7 @@ void flush_cache_sigtramp(unsigned long addr) | |||
181 | static inline void flush_cache_4096(unsigned long start, | 190 | static inline void flush_cache_4096(unsigned long start, |
182 | unsigned long phys) | 191 | unsigned long phys) |
183 | { | 192 | { |
184 | unsigned long flags; | 193 | unsigned long flags; |
185 | extern void __flush_cache_4096(unsigned long addr, unsigned long phys, unsigned long exec_offset); | ||
186 | 194 | ||
187 | /* | 195 | /* |
188 | * SH7751, SH7751R, and ST40 have no restriction to handle cache. | 196 | * SH7751, SH7751R, and ST40 have no restriction to handle cache. |
@@ -191,10 +199,12 @@ static inline void flush_cache_4096(unsigned long start, | |||
191 | if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) | 199 | if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) |
192 | || start < CACHE_OC_ADDRESS_ARRAY) { | 200 | || start < CACHE_OC_ADDRESS_ARRAY) { |
193 | local_irq_save(flags); | 201 | local_irq_save(flags); |
194 | __flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), 0x20000000); | 202 | __flush_cache_4096(start | SH_CACHE_ASSOC, |
203 | P1SEGADDR(phys), 0x20000000); | ||
195 | local_irq_restore(flags); | 204 | local_irq_restore(flags); |
196 | } else { | 205 | } else { |
197 | __flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), 0); | 206 | __flush_cache_4096(start | SH_CACHE_ASSOC, |
207 | P1SEGADDR(phys), 0); | ||
198 | } | 208 | } |
199 | } | 209 | } |
200 | 210 | ||
@@ -231,29 +241,22 @@ static inline void flush_icache_all(void) | |||
231 | local_irq_restore(flags); | 241 | local_irq_restore(flags); |
232 | } | 242 | } |
233 | 243 | ||
234 | void flush_cache_all(void) | 244 | void flush_dcache_all(void) |
235 | { | 245 | { |
236 | if (cpu_data->dcache.ways == 1) | 246 | if (cpu_data->dcache.ways == 1) |
237 | __flush_dcache_all(); | 247 | __flush_dcache_all(); |
238 | else | 248 | else |
239 | __flush_dcache_all_ex(); | 249 | __flush_dcache_all_ex(); |
250 | } | ||
251 | |||
252 | void flush_cache_all(void) | ||
253 | { | ||
254 | flush_dcache_all(); | ||
240 | flush_icache_all(); | 255 | flush_icache_all(); |
241 | } | 256 | } |
242 | 257 | ||
243 | void flush_cache_mm(struct mm_struct *mm) | 258 | void flush_cache_mm(struct mm_struct *mm) |
244 | { | 259 | { |
245 | /* Is there any good way? */ | ||
246 | /* XXX: possibly call flush_cache_range for each vm area */ | ||
247 | /* | ||
248 | * FIXME: Really, the optimal solution here would be able to flush out | ||
249 | * individual lines created by the specified context, but this isn't | ||
250 | * feasible for a number of architectures (such as MIPS, and some | ||
251 | * SPARC) .. is this possible for SuperH? | ||
252 | * | ||
253 | * In the meantime, we'll just flush all of the caches.. this | ||
254 | * seems to be the simplest way to avoid at least a few wasted | ||
255 | * cache flushes. -Lethal | ||
256 | */ | ||
257 | flush_cache_all(); | 260 | flush_cache_all(); |
258 | } | 261 | } |
259 | 262 | ||
@@ -301,13 +304,30 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, | |||
301 | unsigned long p = start & PAGE_MASK; | 304 | unsigned long p = start & PAGE_MASK; |
302 | pgd_t *dir; | 305 | pgd_t *dir; |
303 | pmd_t *pmd; | 306 | pmd_t *pmd; |
307 | pud_t *pud; | ||
304 | pte_t *pte; | 308 | pte_t *pte; |
305 | pte_t entry; | 309 | pte_t entry; |
306 | unsigned long phys; | 310 | unsigned long phys; |
307 | unsigned long d = 0; | 311 | unsigned long d = 0; |
308 | 312 | ||
313 | /* | ||
314 | * Don't bother with the lookup and alias check if we have a | ||
315 | * wide range to cover, just blow away the dcache in its | ||
316 | * entirety instead. -- PFM. | ||
317 | */ | ||
318 | if (((end - start) >> PAGE_SHIFT) >= 64) { | ||
319 | flush_dcache_all(); | ||
320 | |||
321 | if (vma->vm_flags & VM_EXEC) | ||
322 | flush_icache_all(); | ||
323 | |||
324 | return; | ||
325 | } | ||
326 | |||
309 | dir = pgd_offset(vma->vm_mm, p); | 327 | dir = pgd_offset(vma->vm_mm, p); |
310 | pmd = pmd_offset(dir, p); | 328 | pud = pud_offset(dir, p); |
329 | pmd = pmd_offset(pud, p); | ||
330 | end = PAGE_ALIGN(end); | ||
311 | 331 | ||
312 | do { | 332 | do { |
313 | if (pmd_none(*pmd) || pmd_bad(*pmd)) { | 333 | if (pmd_none(*pmd) || pmd_bad(*pmd)) { |
@@ -322,7 +342,7 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, | |||
322 | if ((pte_val(entry) & _PAGE_PRESENT)) { | 342 | if ((pte_val(entry) & _PAGE_PRESENT)) { |
323 | phys = pte_val(entry)&PTE_PHYS_MASK; | 343 | phys = pte_val(entry)&PTE_PHYS_MASK; |
324 | if ((p^phys) & CACHE_ALIAS) { | 344 | if ((p^phys) & CACHE_ALIAS) { |
325 | d |= 1 << ((p & CACHE_ALIAS)>>12); | 345 | d |= 1 << ((p & CACHE_ALIAS)>>12); |
326 | d |= 1 << ((phys & CACHE_ALIAS)>>12); | 346 | d |= 1 << ((phys & CACHE_ALIAS)>>12); |
327 | if (d == 0x0f) | 347 | if (d == 0x0f) |
328 | goto loop_exit; | 348 | goto loop_exit; |