diff options
Diffstat (limited to 'arch/sh/mm/cache-sh4.c')
-rw-r--r-- | arch/sh/mm/cache-sh4.c | 685 |
1 files changed, 548 insertions, 137 deletions
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 524cea5b47..e48cc22724 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c | |||
@@ -2,49 +2,120 @@ | |||
2 | * arch/sh/mm/cache-sh4.c | 2 | * arch/sh/mm/cache-sh4.c |
3 | * | 3 | * |
4 | * Copyright (C) 1999, 2000, 2002 Niibe Yutaka | 4 | * Copyright (C) 1999, 2000, 2002 Niibe Yutaka |
5 | * Copyright (C) 2001, 2002, 2003, 2004 Paul Mundt | 5 | * Copyright (C) 2001 - 2006 Paul Mundt |
6 | * Copyright (C) 2003 Richard Curnow | 6 | * Copyright (C) 2003 Richard Curnow |
7 | * | 7 | * |
8 | * This file is subject to the terms and conditions of the GNU General Public | 8 | * This file is subject to the terms and conditions of the GNU General Public |
9 | * License. See the file "COPYING" in the main directory of this archive | 9 | * License. See the file "COPYING" in the main directory of this archive |
10 | * for more details. | 10 | * for more details. |
11 | */ | 11 | */ |
12 | |||
13 | #include <linux/init.h> | 12 | #include <linux/init.h> |
14 | #include <linux/mman.h> | ||
15 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
16 | #include <linux/threads.h> | ||
17 | #include <asm/addrspace.h> | 14 | #include <asm/addrspace.h> |
18 | #include <asm/page.h> | ||
19 | #include <asm/pgtable.h> | 15 | #include <asm/pgtable.h> |
20 | #include <asm/processor.h> | 16 | #include <asm/processor.h> |
21 | #include <asm/cache.h> | 17 | #include <asm/cache.h> |
22 | #include <asm/io.h> | 18 | #include <asm/io.h> |
23 | #include <asm/uaccess.h> | ||
24 | #include <asm/pgalloc.h> | 19 | #include <asm/pgalloc.h> |
25 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
26 | #include <asm/cacheflush.h> | 21 | #include <asm/cacheflush.h> |
27 | 22 | ||
28 | extern void __flush_cache_4096_all(unsigned long start); | 23 | /* |
29 | static void __flush_cache_4096_all_ex(unsigned long start); | 24 | * The maximum number of pages we support up to when doing ranged dcache |
30 | extern void __flush_dcache_all(void); | 25 | * flushing. Anything exceeding this will simply flush the dcache in its |
31 | static void __flush_dcache_all_ex(void); | 26 | * entirety. |
27 | */ | ||
28 | #define MAX_DCACHE_PAGES 64 /* XXX: Tune for ways */ | ||
29 | |||
30 | static void __flush_dcache_segment_1way(unsigned long start, | ||
31 | unsigned long extent); | ||
32 | static void __flush_dcache_segment_2way(unsigned long start, | ||
33 | unsigned long extent); | ||
34 | static void __flush_dcache_segment_4way(unsigned long start, | ||
35 | unsigned long extent); | ||
36 | |||
37 | static void __flush_cache_4096(unsigned long addr, unsigned long phys, | ||
38 | unsigned long exec_offset); | ||
39 | |||
40 | /* | ||
41 | * This is initialised here to ensure that it is not placed in the BSS. If | ||
42 | * that were to happen, note that cache_init gets called before the BSS is | ||
43 | * cleared, so this would get nulled out which would be hopeless. | ||
44 | */ | ||
45 | static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) = | ||
46 | (void (*)(unsigned long, unsigned long))0xdeadbeef; | ||
47 | |||
48 | static void compute_alias(struct cache_info *c) | ||
49 | { | ||
50 | c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1); | ||
51 | c->n_aliases = (c->alias_mask >> PAGE_SHIFT) + 1; | ||
52 | } | ||
53 | |||
54 | static void __init emit_cache_params(void) | ||
55 | { | ||
56 | printk("PVR=%08x CVR=%08x PRR=%08x\n", | ||
57 | ctrl_inl(CCN_PVR), | ||
58 | ctrl_inl(CCN_CVR), | ||
59 | ctrl_inl(CCN_PRR)); | ||
60 | printk("I-cache : n_ways=%d n_sets=%d way_incr=%d\n", | ||
61 | cpu_data->icache.ways, | ||
62 | cpu_data->icache.sets, | ||
63 | cpu_data->icache.way_incr); | ||
64 | printk("I-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n", | ||
65 | cpu_data->icache.entry_mask, | ||
66 | cpu_data->icache.alias_mask, | ||
67 | cpu_data->icache.n_aliases); | ||
68 | printk("D-cache : n_ways=%d n_sets=%d way_incr=%d\n", | ||
69 | cpu_data->dcache.ways, | ||
70 | cpu_data->dcache.sets, | ||
71 | cpu_data->dcache.way_incr); | ||
72 | printk("D-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n", | ||
73 | cpu_data->dcache.entry_mask, | ||
74 | cpu_data->dcache.alias_mask, | ||
75 | cpu_data->dcache.n_aliases); | ||
76 | |||
77 | if (!__flush_dcache_segment_fn) | ||
78 | panic("unknown number of cache ways\n"); | ||
79 | } | ||
32 | 80 | ||
33 | /* | 81 | /* |
34 | * SH-4 has virtually indexed and physically tagged cache. | 82 | * SH-4 has virtually indexed and physically tagged cache. |
35 | */ | 83 | */ |
36 | 84 | ||
37 | struct semaphore p3map_sem[4]; | 85 | /* Worst case assumed to be 64k cache, direct-mapped i.e. 4 synonym bits. */ |
86 | #define MAX_P3_SEMAPHORES 16 | ||
87 | |||
88 | struct semaphore p3map_sem[MAX_P3_SEMAPHORES]; | ||
38 | 89 | ||
39 | void __init p3_cache_init(void) | 90 | void __init p3_cache_init(void) |
40 | { | 91 | { |
41 | if (remap_area_pages(P3SEG, 0, PAGE_SIZE*4, _PAGE_CACHABLE)) | 92 | int i; |
93 | |||
94 | compute_alias(&cpu_data->icache); | ||
95 | compute_alias(&cpu_data->dcache); | ||
96 | |||
97 | switch (cpu_data->dcache.ways) { | ||
98 | case 1: | ||
99 | __flush_dcache_segment_fn = __flush_dcache_segment_1way; | ||
100 | break; | ||
101 | case 2: | ||
102 | __flush_dcache_segment_fn = __flush_dcache_segment_2way; | ||
103 | break; | ||
104 | case 4: | ||
105 | __flush_dcache_segment_fn = __flush_dcache_segment_4way; | ||
106 | break; | ||
107 | default: | ||
108 | __flush_dcache_segment_fn = NULL; | ||
109 | break; | ||
110 | } | ||
111 | |||
112 | emit_cache_params(); | ||
113 | |||
114 | if (remap_area_pages(P3SEG, 0, PAGE_SIZE * 4, _PAGE_CACHABLE)) | ||
42 | panic("%s failed.", __FUNCTION__); | 115 | panic("%s failed.", __FUNCTION__); |
43 | 116 | ||
44 | sema_init (&p3map_sem[0], 1); | 117 | for (i = 0; i < cpu_data->dcache.n_aliases; i++) |
45 | sema_init (&p3map_sem[1], 1); | 118 | sema_init(&p3map_sem[i], 1); |
46 | sema_init (&p3map_sem[2], 1); | ||
47 | sema_init (&p3map_sem[3], 1); | ||
48 | } | 119 | } |
49 | 120 | ||
50 | /* | 121 | /* |
@@ -89,7 +160,6 @@ void __flush_purge_region(void *start, int size) | |||
89 | } | 160 | } |
90 | } | 161 | } |
91 | 162 | ||
92 | |||
93 | /* | 163 | /* |
94 | * No write back please | 164 | * No write back please |
95 | */ | 165 | */ |
@@ -108,40 +178,6 @@ void __flush_invalidate_region(void *start, int size) | |||
108 | } | 178 | } |
109 | } | 179 | } |
110 | 180 | ||
111 | static void __flush_dcache_all_ex(void) | ||
112 | { | ||
113 | unsigned long addr, end_addr, entry_offset; | ||
114 | |||
115 | end_addr = CACHE_OC_ADDRESS_ARRAY + (cpu_data->dcache.sets << cpu_data->dcache.entry_shift) * cpu_data->dcache.ways; | ||
116 | entry_offset = 1 << cpu_data->dcache.entry_shift; | ||
117 | for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; addr += entry_offset) { | ||
118 | ctrl_outl(0, addr); | ||
119 | } | ||
120 | } | ||
121 | |||
122 | static void __flush_cache_4096_all_ex(unsigned long start) | ||
123 | { | ||
124 | unsigned long addr, entry_offset; | ||
125 | int i; | ||
126 | |||
127 | entry_offset = 1 << cpu_data->dcache.entry_shift; | ||
128 | for (i = 0; i < cpu_data->dcache.ways; i++, start += cpu_data->dcache.way_incr) { | ||
129 | for (addr = CACHE_OC_ADDRESS_ARRAY + start; | ||
130 | addr < CACHE_OC_ADDRESS_ARRAY + 4096 + start; | ||
131 | addr += entry_offset) { | ||
132 | ctrl_outl(0, addr); | ||
133 | } | ||
134 | } | ||
135 | } | ||
136 | |||
137 | void flush_cache_4096_all(unsigned long start) | ||
138 | { | ||
139 | if (cpu_data->dcache.ways == 1) | ||
140 | __flush_cache_4096_all(start); | ||
141 | else | ||
142 | __flush_cache_4096_all_ex(start); | ||
143 | } | ||
144 | |||
145 | /* | 181 | /* |
146 | * Write back the range of D-cache, and purge the I-cache. | 182 | * Write back the range of D-cache, and purge the I-cache. |
147 | * | 183 | * |
@@ -153,14 +189,14 @@ void flush_icache_range(unsigned long start, unsigned long end) | |||
153 | } | 189 | } |
154 | 190 | ||
155 | /* | 191 | /* |
156 | * Write back the D-cache and purge the I-cache for signal trampoline. | 192 | * Write back the D-cache and purge the I-cache for signal trampoline. |
157 | * .. which happens to be the same behavior as flush_icache_range(). | 193 | * .. which happens to be the same behavior as flush_icache_range(). |
158 | * So, we simply flush out a line. | 194 | * So, we simply flush out a line. |
159 | */ | 195 | */ |
160 | void flush_cache_sigtramp(unsigned long addr) | 196 | void flush_cache_sigtramp(unsigned long addr) |
161 | { | 197 | { |
162 | unsigned long v, index; | 198 | unsigned long v, index; |
163 | unsigned long flags; | 199 | unsigned long flags; |
164 | int i; | 200 | int i; |
165 | 201 | ||
166 | v = addr & ~(L1_CACHE_BYTES-1); | 202 | v = addr & ~(L1_CACHE_BYTES-1); |
@@ -172,30 +208,33 @@ void flush_cache_sigtramp(unsigned long addr) | |||
172 | 208 | ||
173 | local_irq_save(flags); | 209 | local_irq_save(flags); |
174 | jump_to_P2(); | 210 | jump_to_P2(); |
175 | for(i = 0; i < cpu_data->icache.ways; i++, index += cpu_data->icache.way_incr) | 211 | |
212 | for (i = 0; i < cpu_data->icache.ways; | ||
213 | i++, index += cpu_data->icache.way_incr) | ||
176 | ctrl_outl(0, index); /* Clear out Valid-bit */ | 214 | ctrl_outl(0, index); /* Clear out Valid-bit */ |
215 | |||
177 | back_to_P1(); | 216 | back_to_P1(); |
217 | wmb(); | ||
178 | local_irq_restore(flags); | 218 | local_irq_restore(flags); |
179 | } | 219 | } |
180 | 220 | ||
181 | static inline void flush_cache_4096(unsigned long start, | 221 | static inline void flush_cache_4096(unsigned long start, |
182 | unsigned long phys) | 222 | unsigned long phys) |
183 | { | 223 | { |
184 | unsigned long flags; | 224 | unsigned long flags, exec_offset = 0; |
185 | extern void __flush_cache_4096(unsigned long addr, unsigned long phys, unsigned long exec_offset); | ||
186 | 225 | ||
187 | /* | 226 | /* |
188 | * SH7751, SH7751R, and ST40 have no restriction to handle cache. | 227 | * All types of SH-4 require PC to be in P2 to operate on the I-cache. |
189 | * (While SH7750 must do that at P2 area.) | 228 | * Some types of SH-4 require PC to be in P2 to operate on the D-cache. |
190 | */ | 229 | */ |
191 | if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) | 230 | if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) || |
192 | || start < CACHE_OC_ADDRESS_ARRAY) { | 231 | (start < CACHE_OC_ADDRESS_ARRAY)) |
193 | local_irq_save(flags); | 232 | exec_offset = 0x20000000; |
194 | __flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), 0x20000000); | 233 | |
195 | local_irq_restore(flags); | 234 | local_irq_save(flags); |
196 | } else { | 235 | __flush_cache_4096(start | SH_CACHE_ASSOC, |
197 | __flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), 0); | 236 | P1SEGADDR(phys), exec_offset); |
198 | } | 237 | local_irq_restore(flags); |
199 | } | 238 | } |
200 | 239 | ||
201 | /* | 240 | /* |
@@ -206,15 +245,19 @@ void flush_dcache_page(struct page *page) | |||
206 | { | 245 | { |
207 | if (test_bit(PG_mapped, &page->flags)) { | 246 | if (test_bit(PG_mapped, &page->flags)) { |
208 | unsigned long phys = PHYSADDR(page_address(page)); | 247 | unsigned long phys = PHYSADDR(page_address(page)); |
248 | unsigned long addr = CACHE_OC_ADDRESS_ARRAY; | ||
249 | int i, n; | ||
209 | 250 | ||
210 | /* Loop all the D-cache */ | 251 | /* Loop all the D-cache */ |
211 | flush_cache_4096(CACHE_OC_ADDRESS_ARRAY, phys); | 252 | n = cpu_data->dcache.n_aliases; |
212 | flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x1000, phys); | 253 | for (i = 0; i < n; i++, addr += PAGE_SIZE) |
213 | flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x2000, phys); | 254 | flush_cache_4096(addr, phys); |
214 | flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x3000, phys); | ||
215 | } | 255 | } |
256 | |||
257 | wmb(); | ||
216 | } | 258 | } |
217 | 259 | ||
260 | /* TODO: Selective icache invalidation through IC address array.. */ | ||
218 | static inline void flush_icache_all(void) | 261 | static inline void flush_icache_all(void) |
219 | { | 262 | { |
220 | unsigned long flags, ccr; | 263 | unsigned long flags, ccr; |
@@ -227,34 +270,142 @@ static inline void flush_icache_all(void) | |||
227 | ccr |= CCR_CACHE_ICI; | 270 | ccr |= CCR_CACHE_ICI; |
228 | ctrl_outl(ccr, CCR); | 271 | ctrl_outl(ccr, CCR); |
229 | 272 | ||
273 | /* | ||
274 | * back_to_P1() will take care of the barrier for us, don't add | ||
275 | * another one! | ||
276 | */ | ||
277 | |||
230 | back_to_P1(); | 278 | back_to_P1(); |
231 | local_irq_restore(flags); | 279 | local_irq_restore(flags); |
232 | } | 280 | } |
233 | 281 | ||
282 | void flush_dcache_all(void) | ||
283 | { | ||
284 | (*__flush_dcache_segment_fn)(0UL, cpu_data->dcache.way_size); | ||
285 | wmb(); | ||
286 | } | ||
287 | |||
234 | void flush_cache_all(void) | 288 | void flush_cache_all(void) |
235 | { | 289 | { |
236 | if (cpu_data->dcache.ways == 1) | 290 | flush_dcache_all(); |
237 | __flush_dcache_all(); | ||
238 | else | ||
239 | __flush_dcache_all_ex(); | ||
240 | flush_icache_all(); | 291 | flush_icache_all(); |
241 | } | 292 | } |
242 | 293 | ||
294 | static void __flush_cache_mm(struct mm_struct *mm, unsigned long start, | ||
295 | unsigned long end) | ||
296 | { | ||
297 | unsigned long d = 0, p = start & PAGE_MASK; | ||
298 | unsigned long alias_mask = cpu_data->dcache.alias_mask; | ||
299 | unsigned long n_aliases = cpu_data->dcache.n_aliases; | ||
300 | unsigned long select_bit; | ||
301 | unsigned long all_aliases_mask; | ||
302 | unsigned long addr_offset; | ||
303 | pgd_t *dir; | ||
304 | pmd_t *pmd; | ||
305 | pud_t *pud; | ||
306 | pte_t *pte; | ||
307 | int i; | ||
308 | |||
309 | dir = pgd_offset(mm, p); | ||
310 | pud = pud_offset(dir, p); | ||
311 | pmd = pmd_offset(pud, p); | ||
312 | end = PAGE_ALIGN(end); | ||
313 | |||
314 | all_aliases_mask = (1 << n_aliases) - 1; | ||
315 | |||
316 | do { | ||
317 | if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) { | ||
318 | p &= PMD_MASK; | ||
319 | p += PMD_SIZE; | ||
320 | pmd++; | ||
321 | |||
322 | continue; | ||
323 | } | ||
324 | |||
325 | pte = pte_offset_kernel(pmd, p); | ||
326 | |||
327 | do { | ||
328 | unsigned long phys; | ||
329 | pte_t entry = *pte; | ||
330 | |||
331 | if (!(pte_val(entry) & _PAGE_PRESENT)) { | ||
332 | pte++; | ||
333 | p += PAGE_SIZE; | ||
334 | continue; | ||
335 | } | ||
336 | |||
337 | phys = pte_val(entry) & PTE_PHYS_MASK; | ||
338 | |||
339 | if ((p ^ phys) & alias_mask) { | ||
340 | d |= 1 << ((p & alias_mask) >> PAGE_SHIFT); | ||
341 | d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT); | ||
342 | |||
343 | if (d == all_aliases_mask) | ||
344 | goto loop_exit; | ||
345 | } | ||
346 | |||
347 | pte++; | ||
348 | p += PAGE_SIZE; | ||
349 | } while (p < end && ((unsigned long)pte & ~PAGE_MASK)); | ||
350 | pmd++; | ||
351 | } while (p < end); | ||
352 | |||
353 | loop_exit: | ||
354 | addr_offset = 0; | ||
355 | select_bit = 1; | ||
356 | |||
357 | for (i = 0; i < n_aliases; i++) { | ||
358 | if (d & select_bit) { | ||
359 | (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE); | ||
360 | wmb(); | ||
361 | } | ||
362 | |||
363 | select_bit <<= 1; | ||
364 | addr_offset += PAGE_SIZE; | ||
365 | } | ||
366 | } | ||
367 | |||
368 | /* | ||
369 | * Note : (RPC) since the caches are physically tagged, the only point | ||
370 | * of flush_cache_mm for SH-4 is to get rid of aliases from the | ||
371 | * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that | ||
372 | * lines can stay resident so long as the virtual address they were | ||
373 | * accessed with (hence cache set) is in accord with the physical | ||
374 | * address (i.e. tag). It's no different here. So I reckon we don't | ||
375 | * need to flush the I-cache, since aliases don't matter for that. We | ||
376 | * should try that. | ||
377 | * | ||
378 | * Caller takes mm->mmap_sem. | ||
379 | */ | ||
243 | void flush_cache_mm(struct mm_struct *mm) | 380 | void flush_cache_mm(struct mm_struct *mm) |
244 | { | 381 | { |
245 | /* Is there any good way? */ | 382 | /* |
246 | /* XXX: possibly call flush_cache_range for each vm area */ | 383 | * If cache is only 4k-per-way, there are never any 'aliases'. Since |
247 | /* | 384 | * the cache is physically tagged, the data can just be left in there. |
248 | * FIXME: Really, the optimal solution here would be able to flush out | ||
249 | * individual lines created by the specified context, but this isn't | ||
250 | * feasible for a number of architectures (such as MIPS, and some | ||
251 | * SPARC) .. is this possible for SuperH? | ||
252 | * | ||
253 | * In the meantime, we'll just flush all of the caches.. this | ||
254 | * seems to be the simplest way to avoid at least a few wasted | ||
255 | * cache flushes. -Lethal | ||
256 | */ | 385 | */ |
257 | flush_cache_all(); | 386 | if (cpu_data->dcache.n_aliases == 0) |
387 | return; | ||
388 | |||
389 | /* | ||
390 | * Don't bother groveling around the dcache for the VMA ranges | ||
391 | * if there are too many PTEs to make it worthwhile. | ||
392 | */ | ||
393 | if (mm->nr_ptes >= MAX_DCACHE_PAGES) | ||
394 | flush_dcache_all(); | ||
395 | else { | ||
396 | struct vm_area_struct *vma; | ||
397 | |||
398 | /* | ||
399 | * In this case there are reasonably sized ranges to flush, | ||
400 | * iterate through the VMA list and take care of any aliases. | ||
401 | */ | ||
402 | for (vma = mm->mmap; vma; vma = vma->vm_next) | ||
403 | __flush_cache_mm(mm, vma->vm_start, vma->vm_end); | ||
404 | } | ||
405 | |||
406 | /* Only touch the icache if one of the VMAs has VM_EXEC set. */ | ||
407 | if (mm->exec_vm) | ||
408 | flush_icache_all(); | ||
258 | } | 409 | } |
259 | 410 | ||
260 | /* | 411 | /* |
@@ -263,27 +414,40 @@ void flush_cache_mm(struct mm_struct *mm) | |||
263 | * ADDR: Virtual Address (U0 address) | 414 | * ADDR: Virtual Address (U0 address) |
264 | * PFN: Physical page number | 415 | * PFN: Physical page number |
265 | */ | 416 | */ |
266 | void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long pfn) | 417 | void flush_cache_page(struct vm_area_struct *vma, unsigned long address, |
418 | unsigned long pfn) | ||
267 | { | 419 | { |
268 | unsigned long phys = pfn << PAGE_SHIFT; | 420 | unsigned long phys = pfn << PAGE_SHIFT; |
421 | unsigned int alias_mask; | ||
422 | |||
423 | alias_mask = cpu_data->dcache.alias_mask; | ||
269 | 424 | ||
270 | /* We only need to flush D-cache when we have alias */ | 425 | /* We only need to flush D-cache when we have alias */ |
271 | if ((address^phys) & CACHE_ALIAS) { | 426 | if ((address^phys) & alias_mask) { |
272 | /* Loop 4K of the D-cache */ | 427 | /* Loop 4K of the D-cache */ |
273 | flush_cache_4096( | 428 | flush_cache_4096( |
274 | CACHE_OC_ADDRESS_ARRAY | (address & CACHE_ALIAS), | 429 | CACHE_OC_ADDRESS_ARRAY | (address & alias_mask), |
275 | phys); | 430 | phys); |
276 | /* Loop another 4K of the D-cache */ | 431 | /* Loop another 4K of the D-cache */ |
277 | flush_cache_4096( | 432 | flush_cache_4096( |
278 | CACHE_OC_ADDRESS_ARRAY | (phys & CACHE_ALIAS), | 433 | CACHE_OC_ADDRESS_ARRAY | (phys & alias_mask), |
279 | phys); | 434 | phys); |
280 | } | 435 | } |
281 | 436 | ||
282 | if (vma->vm_flags & VM_EXEC) | 437 | alias_mask = cpu_data->icache.alias_mask; |
283 | /* Loop 4K (half) of the I-cache */ | 438 | if (vma->vm_flags & VM_EXEC) { |
439 | /* | ||
440 | * Evict entries from the portion of the cache from which code | ||
441 | * may have been executed at this address (virtual). There's | ||
442 | * no need to evict from the portion corresponding to the | ||
443 | * physical address as for the D-cache, because we know the | ||
444 | * kernel has never executed the code through its identity | ||
445 | * translation. | ||
446 | */ | ||
284 | flush_cache_4096( | 447 | flush_cache_4096( |
285 | CACHE_IC_ADDRESS_ARRAY | (address & 0x1000), | 448 | CACHE_IC_ADDRESS_ARRAY | (address & alias_mask), |
286 | phys); | 449 | phys); |
450 | } | ||
287 | } | 451 | } |
288 | 452 | ||
289 | /* | 453 | /* |
@@ -298,52 +462,31 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigne | |||
298 | void flush_cache_range(struct vm_area_struct *vma, unsigned long start, | 462 | void flush_cache_range(struct vm_area_struct *vma, unsigned long start, |
299 | unsigned long end) | 463 | unsigned long end) |
300 | { | 464 | { |
301 | unsigned long p = start & PAGE_MASK; | 465 | /* |
302 | pgd_t *dir; | 466 | * If cache is only 4k-per-way, there are never any 'aliases'. Since |
303 | pmd_t *pmd; | 467 | * the cache is physically tagged, the data can just be left in there. |
304 | pte_t *pte; | 468 | */ |
305 | pte_t entry; | 469 | if (cpu_data->dcache.n_aliases == 0) |
306 | unsigned long phys; | 470 | return; |
307 | unsigned long d = 0; | ||
308 | |||
309 | dir = pgd_offset(vma->vm_mm, p); | ||
310 | pmd = pmd_offset(dir, p); | ||
311 | 471 | ||
312 | do { | 472 | /* |
313 | if (pmd_none(*pmd) || pmd_bad(*pmd)) { | 473 | * Don't bother with the lookup and alias check if we have a |
314 | p &= ~((1 << PMD_SHIFT) -1); | 474 | * wide range to cover, just blow away the dcache in its |
315 | p += (1 << PMD_SHIFT); | 475 | * entirety instead. -- PFM. |
316 | pmd++; | 476 | */ |
317 | continue; | 477 | if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES) |
318 | } | 478 | flush_dcache_all(); |
319 | pte = pte_offset_kernel(pmd, p); | 479 | else |
320 | do { | 480 | __flush_cache_mm(vma->vm_mm, start, end); |
321 | entry = *pte; | 481 | |
322 | if ((pte_val(entry) & _PAGE_PRESENT)) { | 482 | if (vma->vm_flags & VM_EXEC) { |
323 | phys = pte_val(entry)&PTE_PHYS_MASK; | 483 | /* |
324 | if ((p^phys) & CACHE_ALIAS) { | 484 | * TODO: Is this required??? Need to look at how I-cache |
325 | d |= 1 << ((p & CACHE_ALIAS)>>12); | 485 | * coherency is assured when new programs are loaded to see if |
326 | d |= 1 << ((phys & CACHE_ALIAS)>>12); | 486 | * this matters. |
327 | if (d == 0x0f) | 487 | */ |
328 | goto loop_exit; | ||
329 | } | ||
330 | } | ||
331 | pte++; | ||
332 | p += PAGE_SIZE; | ||
333 | } while (p < end && ((unsigned long)pte & ~PAGE_MASK)); | ||
334 | pmd++; | ||
335 | } while (p < end); | ||
336 | loop_exit: | ||
337 | if (d & 1) | ||
338 | flush_cache_4096_all(0); | ||
339 | if (d & 2) | ||
340 | flush_cache_4096_all(0x1000); | ||
341 | if (d & 4) | ||
342 | flush_cache_4096_all(0x2000); | ||
343 | if (d & 8) | ||
344 | flush_cache_4096_all(0x3000); | ||
345 | if (vma->vm_flags & VM_EXEC) | ||
346 | flush_icache_all(); | 488 | flush_icache_all(); |
489 | } | ||
347 | } | 490 | } |
348 | 491 | ||
349 | /* | 492 | /* |
@@ -357,5 +500,273 @@ void flush_icache_user_range(struct vm_area_struct *vma, | |||
357 | struct page *page, unsigned long addr, int len) | 500 | struct page *page, unsigned long addr, int len) |
358 | { | 501 | { |
359 | flush_cache_page(vma, addr, page_to_pfn(page)); | 502 | flush_cache_page(vma, addr, page_to_pfn(page)); |
503 | mb(); | ||
504 | } | ||
505 | |||
506 | /** | ||
507 | * __flush_cache_4096 | ||
508 | * | ||
509 | * @addr: address in memory mapped cache array | ||
510 | * @phys: P1 address to flush (has to match tags if addr has 'A' bit | ||
511 | * set i.e. associative write) | ||
512 | * @exec_offset: set to 0x20000000 if flush has to be executed from P2 | ||
513 | * region else 0x0 | ||
514 | * | ||
515 | * The offset into the cache array implied by 'addr' selects the | ||
516 | * 'colour' of the virtual address range that will be flushed. The | ||
517 | * operation (purge/write-back) is selected by the lower 2 bits of | ||
518 | * 'phys'. | ||
519 | */ | ||
520 | static void __flush_cache_4096(unsigned long addr, unsigned long phys, | ||
521 | unsigned long exec_offset) | ||
522 | { | ||
523 | int way_count; | ||
524 | unsigned long base_addr = addr; | ||
525 | struct cache_info *dcache; | ||
526 | unsigned long way_incr; | ||
527 | unsigned long a, ea, p; | ||
528 | unsigned long temp_pc; | ||
529 | |||
530 | dcache = &cpu_data->dcache; | ||
531 | /* Write this way for better assembly. */ | ||
532 | way_count = dcache->ways; | ||
533 | way_incr = dcache->way_incr; | ||
534 | |||
535 | /* | ||
536 | * Apply exec_offset (i.e. branch to P2 if required.). | ||
537 | * | ||
538 | * FIXME: | ||
539 | * | ||
540 | * If I write "=r" for the (temp_pc), it puts this in r6 hence | ||
541 | * trashing exec_offset before it's been added on - why? Hence | ||
542 | * "=&r" as a 'workaround' | ||
543 | */ | ||
544 | asm volatile("mov.l 1f, %0\n\t" | ||
545 | "add %1, %0\n\t" | ||
546 | "jmp @%0\n\t" | ||
547 | "nop\n\t" | ||
548 | ".balign 4\n\t" | ||
549 | "1: .long 2f\n\t" | ||
550 | "2:\n" : "=&r" (temp_pc) : "r" (exec_offset)); | ||
551 | |||
552 | /* | ||
553 | * We know there will be >=1 iteration, so write as do-while to avoid | ||
554 | * pointless nead-of-loop check for 0 iterations. | ||
555 | */ | ||
556 | do { | ||
557 | ea = base_addr + PAGE_SIZE; | ||
558 | a = base_addr; | ||
559 | p = phys; | ||
560 | |||
561 | do { | ||
562 | *(volatile unsigned long *)a = p; | ||
563 | /* | ||
564 | * Next line: intentionally not p+32, saves an add, p | ||
565 | * will do since only the cache tag bits need to | ||
566 | * match. | ||
567 | */ | ||
568 | *(volatile unsigned long *)(a+32) = p; | ||
569 | a += 64; | ||
570 | p += 64; | ||
571 | } while (a < ea); | ||
572 | |||
573 | base_addr += way_incr; | ||
574 | } while (--way_count != 0); | ||
360 | } | 575 | } |
361 | 576 | ||
577 | /* | ||
578 | * Break the 1, 2 and 4 way variants of this out into separate functions to | ||
579 | * avoid nearly all the overhead of having the conditional stuff in the function | ||
580 | * bodies (+ the 1 and 2 way cases avoid saving any registers too). | ||
581 | */ | ||
582 | static void __flush_dcache_segment_1way(unsigned long start, | ||
583 | unsigned long extent_per_way) | ||
584 | { | ||
585 | unsigned long orig_sr, sr_with_bl; | ||
586 | unsigned long base_addr; | ||
587 | unsigned long way_incr, linesz, way_size; | ||
588 | struct cache_info *dcache; | ||
589 | register unsigned long a0, a0e; | ||
590 | |||
591 | asm volatile("stc sr, %0" : "=r" (orig_sr)); | ||
592 | sr_with_bl = orig_sr | (1<<28); | ||
593 | base_addr = ((unsigned long)&empty_zero_page[0]); | ||
594 | |||
595 | /* | ||
596 | * The previous code aligned base_addr to 16k, i.e. the way_size of all | ||
597 | * existing SH-4 D-caches. Whilst I don't see a need to have this | ||
598 | * aligned to any better than the cache line size (which it will be | ||
599 | * anyway by construction), let's align it to at least the way_size of | ||
600 | * any existing or conceivable SH-4 D-cache. -- RPC | ||
601 | */ | ||
602 | base_addr = ((base_addr >> 16) << 16); | ||
603 | base_addr |= start; | ||
604 | |||
605 | dcache = &cpu_data->dcache; | ||
606 | linesz = dcache->linesz; | ||
607 | way_incr = dcache->way_incr; | ||
608 | way_size = dcache->way_size; | ||
609 | |||
610 | a0 = base_addr; | ||
611 | a0e = base_addr + extent_per_way; | ||
612 | do { | ||
613 | asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); | ||
614 | asm volatile("movca.l r0, @%0\n\t" | ||
615 | "ocbi @%0" : : "r" (a0)); | ||
616 | a0 += linesz; | ||
617 | asm volatile("movca.l r0, @%0\n\t" | ||
618 | "ocbi @%0" : : "r" (a0)); | ||
619 | a0 += linesz; | ||
620 | asm volatile("movca.l r0, @%0\n\t" | ||
621 | "ocbi @%0" : : "r" (a0)); | ||
622 | a0 += linesz; | ||
623 | asm volatile("movca.l r0, @%0\n\t" | ||
624 | "ocbi @%0" : : "r" (a0)); | ||
625 | asm volatile("ldc %0, sr" : : "r" (orig_sr)); | ||
626 | a0 += linesz; | ||
627 | } while (a0 < a0e); | ||
628 | } | ||
629 | |||
630 | static void __flush_dcache_segment_2way(unsigned long start, | ||
631 | unsigned long extent_per_way) | ||
632 | { | ||
633 | unsigned long orig_sr, sr_with_bl; | ||
634 | unsigned long base_addr; | ||
635 | unsigned long way_incr, linesz, way_size; | ||
636 | struct cache_info *dcache; | ||
637 | register unsigned long a0, a1, a0e; | ||
638 | |||
639 | asm volatile("stc sr, %0" : "=r" (orig_sr)); | ||
640 | sr_with_bl = orig_sr | (1<<28); | ||
641 | base_addr = ((unsigned long)&empty_zero_page[0]); | ||
642 | |||
643 | /* See comment under 1-way above */ | ||
644 | base_addr = ((base_addr >> 16) << 16); | ||
645 | base_addr |= start; | ||
646 | |||
647 | dcache = &cpu_data->dcache; | ||
648 | linesz = dcache->linesz; | ||
649 | way_incr = dcache->way_incr; | ||
650 | way_size = dcache->way_size; | ||
651 | |||
652 | a0 = base_addr; | ||
653 | a1 = a0 + way_incr; | ||
654 | a0e = base_addr + extent_per_way; | ||
655 | do { | ||
656 | asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); | ||
657 | asm volatile("movca.l r0, @%0\n\t" | ||
658 | "movca.l r0, @%1\n\t" | ||
659 | "ocbi @%0\n\t" | ||
660 | "ocbi @%1" : : | ||
661 | "r" (a0), "r" (a1)); | ||
662 | a0 += linesz; | ||
663 | a1 += linesz; | ||
664 | asm volatile("movca.l r0, @%0\n\t" | ||
665 | "movca.l r0, @%1\n\t" | ||
666 | "ocbi @%0\n\t" | ||
667 | "ocbi @%1" : : | ||
668 | "r" (a0), "r" (a1)); | ||
669 | a0 += linesz; | ||
670 | a1 += linesz; | ||
671 | asm volatile("movca.l r0, @%0\n\t" | ||
672 | "movca.l r0, @%1\n\t" | ||
673 | "ocbi @%0\n\t" | ||
674 | "ocbi @%1" : : | ||
675 | "r" (a0), "r" (a1)); | ||
676 | a0 += linesz; | ||
677 | a1 += linesz; | ||
678 | asm volatile("movca.l r0, @%0\n\t" | ||
679 | "movca.l r0, @%1\n\t" | ||
680 | "ocbi @%0\n\t" | ||
681 | "ocbi @%1" : : | ||
682 | "r" (a0), "r" (a1)); | ||
683 | asm volatile("ldc %0, sr" : : "r" (orig_sr)); | ||
684 | a0 += linesz; | ||
685 | a1 += linesz; | ||
686 | } while (a0 < a0e); | ||
687 | } | ||
688 | |||
689 | static void __flush_dcache_segment_4way(unsigned long start, | ||
690 | unsigned long extent_per_way) | ||
691 | { | ||
692 | unsigned long orig_sr, sr_with_bl; | ||
693 | unsigned long base_addr; | ||
694 | unsigned long way_incr, linesz, way_size; | ||
695 | struct cache_info *dcache; | ||
696 | register unsigned long a0, a1, a2, a3, a0e; | ||
697 | |||
698 | asm volatile("stc sr, %0" : "=r" (orig_sr)); | ||
699 | sr_with_bl = orig_sr | (1<<28); | ||
700 | base_addr = ((unsigned long)&empty_zero_page[0]); | ||
701 | |||
702 | /* See comment under 1-way above */ | ||
703 | base_addr = ((base_addr >> 16) << 16); | ||
704 | base_addr |= start; | ||
705 | |||
706 | dcache = &cpu_data->dcache; | ||
707 | linesz = dcache->linesz; | ||
708 | way_incr = dcache->way_incr; | ||
709 | way_size = dcache->way_size; | ||
710 | |||
711 | a0 = base_addr; | ||
712 | a1 = a0 + way_incr; | ||
713 | a2 = a1 + way_incr; | ||
714 | a3 = a2 + way_incr; | ||
715 | a0e = base_addr + extent_per_way; | ||
716 | do { | ||
717 | asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); | ||
718 | asm volatile("movca.l r0, @%0\n\t" | ||
719 | "movca.l r0, @%1\n\t" | ||
720 | "movca.l r0, @%2\n\t" | ||
721 | "movca.l r0, @%3\n\t" | ||
722 | "ocbi @%0\n\t" | ||
723 | "ocbi @%1\n\t" | ||
724 | "ocbi @%2\n\t" | ||
725 | "ocbi @%3\n\t" : : | ||
726 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
727 | a0 += linesz; | ||
728 | a1 += linesz; | ||
729 | a2 += linesz; | ||
730 | a3 += linesz; | ||
731 | asm volatile("movca.l r0, @%0\n\t" | ||
732 | "movca.l r0, @%1\n\t" | ||
733 | "movca.l r0, @%2\n\t" | ||
734 | "movca.l r0, @%3\n\t" | ||
735 | "ocbi @%0\n\t" | ||
736 | "ocbi @%1\n\t" | ||
737 | "ocbi @%2\n\t" | ||
738 | "ocbi @%3\n\t" : : | ||
739 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
740 | a0 += linesz; | ||
741 | a1 += linesz; | ||
742 | a2 += linesz; | ||
743 | a3 += linesz; | ||
744 | asm volatile("movca.l r0, @%0\n\t" | ||
745 | "movca.l r0, @%1\n\t" | ||
746 | "movca.l r0, @%2\n\t" | ||
747 | "movca.l r0, @%3\n\t" | ||
748 | "ocbi @%0\n\t" | ||
749 | "ocbi @%1\n\t" | ||
750 | "ocbi @%2\n\t" | ||
751 | "ocbi @%3\n\t" : : | ||
752 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
753 | a0 += linesz; | ||
754 | a1 += linesz; | ||
755 | a2 += linesz; | ||
756 | a3 += linesz; | ||
757 | asm volatile("movca.l r0, @%0\n\t" | ||
758 | "movca.l r0, @%1\n\t" | ||
759 | "movca.l r0, @%2\n\t" | ||
760 | "movca.l r0, @%3\n\t" | ||
761 | "ocbi @%0\n\t" | ||
762 | "ocbi @%1\n\t" | ||
763 | "ocbi @%2\n\t" | ||
764 | "ocbi @%3\n\t" : : | ||
765 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
766 | asm volatile("ldc %0, sr" : : "r" (orig_sr)); | ||
767 | a0 += linesz; | ||
768 | a1 += linesz; | ||
769 | a2 += linesz; | ||
770 | a3 += linesz; | ||
771 | } while (a0 < a0e); | ||
772 | } | ||