aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2006-09-26 22:29:55 -0400
committerPaul Mundt <lethal@linux-sh.org>2006-09-26 22:29:55 -0400
commita252710fc5b63b24934905ca47ecf661702d7f00 (patch)
tree7fbaddfb977095879f42c68a2a2f115d0e2314ee /arch
parente4e3b5ccd77226c9c4dbb0737106b868dfc182d9 (diff)
sh: flush_cache_range() cleanup and optimizations.
flush_cache_range() wasn't page aligning the end of the range, we can't assume that it will always be page aligned, and we ended up getting unaligned faults in some rare call paths. Additionally, we add a small optimization to just purge the dcache entirely if the range is large enough that the page table walking will take longer. We use an arbitrary value of 64 pages for the large range size, as per sh64. Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/sh/mm/cache-sh4.c72
1 files changed, 46 insertions, 26 deletions
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index 524cea5b47f9..94c05d09c3f7 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -2,7 +2,7 @@
2 * arch/sh/mm/cache-sh4.c 2 * arch/sh/mm/cache-sh4.c
3 * 3 *
4 * Copyright (C) 1999, 2000, 2002 Niibe Yutaka 4 * Copyright (C) 1999, 2000, 2002 Niibe Yutaka
5 * Copyright (C) 2001, 2002, 2003, 2004 Paul Mundt 5 * Copyright (C) 2001, 2002, 2003, 2004, 2005 Paul Mundt
6 * Copyright (C) 2003 Richard Curnow 6 * Copyright (C) 2003 Richard Curnow
7 * 7 *
8 * This file is subject to the terms and conditions of the GNU General Public 8 * This file is subject to the terms and conditions of the GNU General Public
@@ -25,6 +25,8 @@
25#include <asm/mmu_context.h> 25#include <asm/mmu_context.h>
26#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
27 27
28extern void __flush_cache_4096(unsigned long addr, unsigned long phys,
29 unsigned long exec_offset);
28extern void __flush_cache_4096_all(unsigned long start); 30extern void __flush_cache_4096_all(unsigned long start);
29static void __flush_cache_4096_all_ex(unsigned long start); 31static void __flush_cache_4096_all_ex(unsigned long start);
30extern void __flush_dcache_all(void); 32extern void __flush_dcache_all(void);
@@ -112,9 +114,14 @@ static void __flush_dcache_all_ex(void)
112{ 114{
113 unsigned long addr, end_addr, entry_offset; 115 unsigned long addr, end_addr, entry_offset;
114 116
115 end_addr = CACHE_OC_ADDRESS_ARRAY + (cpu_data->dcache.sets << cpu_data->dcache.entry_shift) * cpu_data->dcache.ways; 117 end_addr = CACHE_OC_ADDRESS_ARRAY +
118 (cpu_data->dcache.sets << cpu_data->dcache.entry_shift) *
119 cpu_data->dcache.ways;
120
116 entry_offset = 1 << cpu_data->dcache.entry_shift; 121 entry_offset = 1 << cpu_data->dcache.entry_shift;
117 for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; addr += entry_offset) { 122 for (addr = CACHE_OC_ADDRESS_ARRAY;
123 addr < end_addr;
124 addr += entry_offset) {
118 ctrl_outl(0, addr); 125 ctrl_outl(0, addr);
119 } 126 }
120} 127}
@@ -125,7 +132,8 @@ static void __flush_cache_4096_all_ex(unsigned long start)
125 int i; 132 int i;
126 133
127 entry_offset = 1 << cpu_data->dcache.entry_shift; 134 entry_offset = 1 << cpu_data->dcache.entry_shift;
128 for (i = 0; i < cpu_data->dcache.ways; i++, start += cpu_data->dcache.way_incr) { 135 for (i = 0; i < cpu_data->dcache.ways;
136 i++, start += cpu_data->dcache.way_incr) {
129 for (addr = CACHE_OC_ADDRESS_ARRAY + start; 137 for (addr = CACHE_OC_ADDRESS_ARRAY + start;
130 addr < CACHE_OC_ADDRESS_ARRAY + 4096 + start; 138 addr < CACHE_OC_ADDRESS_ARRAY + 4096 + start;
131 addr += entry_offset) { 139 addr += entry_offset) {
@@ -153,14 +161,14 @@ void flush_icache_range(unsigned long start, unsigned long end)
153} 161}
154 162
155/* 163/*
156 * Write back the D-cache and purge the I-cache for signal trampoline. 164 * Write back the D-cache and purge the I-cache for signal trampoline.
157 * .. which happens to be the same behavior as flush_icache_range(). 165 * .. which happens to be the same behavior as flush_icache_range().
158 * So, we simply flush out a line. 166 * So, we simply flush out a line.
159 */ 167 */
160void flush_cache_sigtramp(unsigned long addr) 168void flush_cache_sigtramp(unsigned long addr)
161{ 169{
162 unsigned long v, index; 170 unsigned long v, index;
163 unsigned long flags; 171 unsigned long flags;
164 int i; 172 int i;
165 173
166 v = addr & ~(L1_CACHE_BYTES-1); 174 v = addr & ~(L1_CACHE_BYTES-1);
@@ -172,7 +180,8 @@ void flush_cache_sigtramp(unsigned long addr)
172 180
173 local_irq_save(flags); 181 local_irq_save(flags);
174 jump_to_P2(); 182 jump_to_P2();
175 for(i = 0; i < cpu_data->icache.ways; i++, index += cpu_data->icache.way_incr) 183 for (i = 0; i < cpu_data->icache.ways;
184 i++, index += cpu_data->icache.way_incr)
176 ctrl_outl(0, index); /* Clear out Valid-bit */ 185 ctrl_outl(0, index); /* Clear out Valid-bit */
177 back_to_P1(); 186 back_to_P1();
178 local_irq_restore(flags); 187 local_irq_restore(flags);
@@ -181,8 +190,7 @@ void flush_cache_sigtramp(unsigned long addr)
181static inline void flush_cache_4096(unsigned long start, 190static inline void flush_cache_4096(unsigned long start,
182 unsigned long phys) 191 unsigned long phys)
183{ 192{
184 unsigned long flags; 193 unsigned long flags;
185 extern void __flush_cache_4096(unsigned long addr, unsigned long phys, unsigned long exec_offset);
186 194
187 /* 195 /*
188 * SH7751, SH7751R, and ST40 have no restriction to handle cache. 196 * SH7751, SH7751R, and ST40 have no restriction to handle cache.
@@ -191,10 +199,12 @@ static inline void flush_cache_4096(unsigned long start,
191 if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) 199 if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG)
192 || start < CACHE_OC_ADDRESS_ARRAY) { 200 || start < CACHE_OC_ADDRESS_ARRAY) {
193 local_irq_save(flags); 201 local_irq_save(flags);
194 __flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), 0x20000000); 202 __flush_cache_4096(start | SH_CACHE_ASSOC,
203 P1SEGADDR(phys), 0x20000000);
195 local_irq_restore(flags); 204 local_irq_restore(flags);
196 } else { 205 } else {
197 __flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), 0); 206 __flush_cache_4096(start | SH_CACHE_ASSOC,
207 P1SEGADDR(phys), 0);
198 } 208 }
199} 209}
200 210
@@ -231,29 +241,22 @@ static inline void flush_icache_all(void)
231 local_irq_restore(flags); 241 local_irq_restore(flags);
232} 242}
233 243
234void flush_cache_all(void) 244void flush_dcache_all(void)
235{ 245{
236 if (cpu_data->dcache.ways == 1) 246 if (cpu_data->dcache.ways == 1)
237 __flush_dcache_all(); 247 __flush_dcache_all();
238 else 248 else
239 __flush_dcache_all_ex(); 249 __flush_dcache_all_ex();
250}
251
252void flush_cache_all(void)
253{
254 flush_dcache_all();
240 flush_icache_all(); 255 flush_icache_all();
241} 256}
242 257
243void flush_cache_mm(struct mm_struct *mm) 258void flush_cache_mm(struct mm_struct *mm)
244{ 259{
245 /* Is there any good way? */
246 /* XXX: possibly call flush_cache_range for each vm area */
247 /*
248 * FIXME: Really, the optimal solution here would be able to flush out
249 * individual lines created by the specified context, but this isn't
250 * feasible for a number of architectures (such as MIPS, and some
251 * SPARC) .. is this possible for SuperH?
252 *
253 * In the meantime, we'll just flush all of the caches.. this
254 * seems to be the simplest way to avoid at least a few wasted
255 * cache flushes. -Lethal
256 */
257 flush_cache_all(); 260 flush_cache_all();
258} 261}
259 262
@@ -301,13 +304,30 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
301 unsigned long p = start & PAGE_MASK; 304 unsigned long p = start & PAGE_MASK;
302 pgd_t *dir; 305 pgd_t *dir;
303 pmd_t *pmd; 306 pmd_t *pmd;
307 pud_t *pud;
304 pte_t *pte; 308 pte_t *pte;
305 pte_t entry; 309 pte_t entry;
306 unsigned long phys; 310 unsigned long phys;
307 unsigned long d = 0; 311 unsigned long d = 0;
308 312
313 /*
314 * Don't bother with the lookup and alias check if we have a
315 * wide range to cover, just blow away the dcache in its
316 * entirety instead. -- PFM.
317 */
318 if (((end - start) >> PAGE_SHIFT) >= 64) {
319 flush_dcache_all();
320
321 if (vma->vm_flags & VM_EXEC)
322 flush_icache_all();
323
324 return;
325 }
326
309 dir = pgd_offset(vma->vm_mm, p); 327 dir = pgd_offset(vma->vm_mm, p);
310 pmd = pmd_offset(dir, p); 328 pud = pud_offset(dir, p);
329 pmd = pmd_offset(pud, p);
330 end = PAGE_ALIGN(end);
311 331
312 do { 332 do {
313 if (pmd_none(*pmd) || pmd_bad(*pmd)) { 333 if (pmd_none(*pmd) || pmd_bad(*pmd)) {
@@ -322,7 +342,7 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
322 if ((pte_val(entry) & _PAGE_PRESENT)) { 342 if ((pte_val(entry) & _PAGE_PRESENT)) {
323 phys = pte_val(entry)&PTE_PHYS_MASK; 343 phys = pte_val(entry)&PTE_PHYS_MASK;
324 if ((p^phys) & CACHE_ALIAS) { 344 if ((p^phys) & CACHE_ALIAS) {
325 d |= 1 << ((p & CACHE_ALIAS)>>12); 345 d |= 1 << ((p & CACHE_ALIAS)>>12);
326 d |= 1 << ((phys & CACHE_ALIAS)>>12); 346 d |= 1 << ((phys & CACHE_ALIAS)>>12);
327 if (d == 0x0f) 347 if (d == 0x0f)
328 goto loop_exit; 348 goto loop_exit;