diff options
author | Richard Curnow <richard.curnow@st.com> | 2006-09-27 01:09:26 -0400 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2006-09-27 01:09:26 -0400 |
commit | b638d0b921dc95229af0dfd09cd24850336a2f75 (patch) | |
tree | 0ef34527a47b22421fb92ba2141052fecfe36482 /arch/sh/mm/cache-sh4.c | |
parent | fdfc74f9fcebdda14609159d5010b758a9409acf (diff) |
sh: Optimized cache handling for SH-4/SH-4A caches.
This reworks some of the SH-4 cache handling code to more easily
accomodate newer-style caches (particularly for the > direct-mapped
case), as well as optimizing some of the old code.
Signed-off-by: Richard Curnow <richard.curnow@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/sh/mm/cache-sh4.c')
-rw-r--r-- | arch/sh/mm/cache-sh4.c | 517 |
1 files changed, 430 insertions, 87 deletions
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 846b63d6f5e8..c036c2b4ac2b 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c | |||
@@ -25,28 +25,95 @@ | |||
25 | #include <asm/mmu_context.h> | 25 | #include <asm/mmu_context.h> |
26 | #include <asm/cacheflush.h> | 26 | #include <asm/cacheflush.h> |
27 | 27 | ||
28 | extern void __flush_cache_4096(unsigned long addr, unsigned long phys, | 28 | static void __flush_dcache_segment_1way(unsigned long start, |
29 | unsigned long extent); | ||
30 | static void __flush_dcache_segment_2way(unsigned long start, | ||
31 | unsigned long extent); | ||
32 | static void __flush_dcache_segment_4way(unsigned long start, | ||
33 | unsigned long extent); | ||
34 | |||
35 | static void __flush_cache_4096(unsigned long addr, unsigned long phys, | ||
29 | unsigned long exec_offset); | 36 | unsigned long exec_offset); |
30 | extern void __flush_cache_4096_all(unsigned long start); | 37 | |
31 | static void __flush_cache_4096_all_ex(unsigned long start); | 38 | /* |
32 | extern void __flush_dcache_all(void); | 39 | * This is initialised here to ensure that it is not placed in the BSS. If |
33 | static void __flush_dcache_all_ex(void); | 40 | * that were to happen, note that cache_init gets called before the BSS is |
41 | * cleared, so this would get nulled out which would be hopeless. | ||
42 | */ | ||
43 | static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) = | ||
44 | (void (*)(unsigned long, unsigned long))0xdeadbeef; | ||
45 | |||
46 | static void compute_alias(struct cache_info *c) | ||
47 | { | ||
48 | c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1); | ||
49 | c->n_aliases = (c->alias_mask >> PAGE_SHIFT) + 1; | ||
50 | } | ||
51 | |||
52 | static void __init emit_cache_params(void) | ||
53 | { | ||
54 | printk("PVR=%08x CVR=%08x PRR=%08x\n", | ||
55 | ctrl_inl(CCN_PVR), | ||
56 | ctrl_inl(CCN_CVR), | ||
57 | ctrl_inl(CCN_PRR)); | ||
58 | printk("I-cache : n_ways=%d n_sets=%d way_incr=%d\n", | ||
59 | cpu_data->icache.ways, | ||
60 | cpu_data->icache.sets, | ||
61 | cpu_data->icache.way_incr); | ||
62 | printk("I-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n", | ||
63 | cpu_data->icache.entry_mask, | ||
64 | cpu_data->icache.alias_mask, | ||
65 | cpu_data->icache.n_aliases); | ||
66 | printk("D-cache : n_ways=%d n_sets=%d way_incr=%d\n", | ||
67 | cpu_data->dcache.ways, | ||
68 | cpu_data->dcache.sets, | ||
69 | cpu_data->dcache.way_incr); | ||
70 | printk("D-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n", | ||
71 | cpu_data->dcache.entry_mask, | ||
72 | cpu_data->dcache.alias_mask, | ||
73 | cpu_data->dcache.n_aliases); | ||
74 | |||
75 | if (!__flush_dcache_segment_fn) | ||
76 | panic("unknown number of cache ways\n"); | ||
77 | } | ||
34 | 78 | ||
35 | /* | 79 | /* |
36 | * SH-4 has virtually indexed and physically tagged cache. | 80 | * SH-4 has virtually indexed and physically tagged cache. |
37 | */ | 81 | */ |
38 | 82 | ||
39 | struct semaphore p3map_sem[4]; | 83 | /* Worst case assumed to be 64k cache, direct-mapped i.e. 4 synonym bits. */ |
84 | #define MAX_P3_SEMAPHORES 16 | ||
85 | |||
86 | struct semaphore p3map_sem[MAX_P3_SEMAPHORES]; | ||
40 | 87 | ||
41 | void __init p3_cache_init(void) | 88 | void __init p3_cache_init(void) |
42 | { | 89 | { |
43 | if (remap_area_pages(P3SEG, 0, PAGE_SIZE*4, _PAGE_CACHABLE)) | 90 | int i; |
91 | |||
92 | compute_alias(&cpu_data->icache); | ||
93 | compute_alias(&cpu_data->dcache); | ||
94 | |||
95 | switch (cpu_data->dcache.ways) { | ||
96 | case 1: | ||
97 | __flush_dcache_segment_fn = __flush_dcache_segment_1way; | ||
98 | break; | ||
99 | case 2: | ||
100 | __flush_dcache_segment_fn = __flush_dcache_segment_2way; | ||
101 | break; | ||
102 | case 4: | ||
103 | __flush_dcache_segment_fn = __flush_dcache_segment_4way; | ||
104 | break; | ||
105 | default: | ||
106 | __flush_dcache_segment_fn = NULL; | ||
107 | break; | ||
108 | } | ||
109 | |||
110 | emit_cache_params(); | ||
111 | |||
112 | if (remap_area_pages(P3SEG, 0, PAGE_SIZE * 4, _PAGE_CACHABLE)) | ||
44 | panic("%s failed.", __FUNCTION__); | 113 | panic("%s failed.", __FUNCTION__); |
45 | 114 | ||
46 | sema_init (&p3map_sem[0], 1); | 115 | for (i = 0; i < cpu_data->dcache.n_aliases; i++) |
47 | sema_init (&p3map_sem[1], 1); | 116 | sema_init(&p3map_sem[i], 1); |
48 | sema_init (&p3map_sem[2], 1); | ||
49 | sema_init (&p3map_sem[3], 1); | ||
50 | } | 117 | } |
51 | 118 | ||
52 | /* | 119 | /* |
@@ -91,7 +158,6 @@ void __flush_purge_region(void *start, int size) | |||
91 | } | 158 | } |
92 | } | 159 | } |
93 | 160 | ||
94 | |||
95 | /* | 161 | /* |
96 | * No write back please | 162 | * No write back please |
97 | */ | 163 | */ |
@@ -110,46 +176,6 @@ void __flush_invalidate_region(void *start, int size) | |||
110 | } | 176 | } |
111 | } | 177 | } |
112 | 178 | ||
113 | static void __flush_dcache_all_ex(void) | ||
114 | { | ||
115 | unsigned long addr, end_addr, entry_offset; | ||
116 | |||
117 | end_addr = CACHE_OC_ADDRESS_ARRAY + | ||
118 | (cpu_data->dcache.sets << cpu_data->dcache.entry_shift) * | ||
119 | cpu_data->dcache.ways; | ||
120 | |||
121 | entry_offset = 1 << cpu_data->dcache.entry_shift; | ||
122 | for (addr = CACHE_OC_ADDRESS_ARRAY; | ||
123 | addr < end_addr; | ||
124 | addr += entry_offset) { | ||
125 | ctrl_outl(0, addr); | ||
126 | } | ||
127 | } | ||
128 | |||
129 | static void __flush_cache_4096_all_ex(unsigned long start) | ||
130 | { | ||
131 | unsigned long addr, entry_offset; | ||
132 | int i; | ||
133 | |||
134 | entry_offset = 1 << cpu_data->dcache.entry_shift; | ||
135 | for (i = 0; i < cpu_data->dcache.ways; | ||
136 | i++, start += cpu_data->dcache.way_incr) { | ||
137 | for (addr = CACHE_OC_ADDRESS_ARRAY + start; | ||
138 | addr < CACHE_OC_ADDRESS_ARRAY + 4096 + start; | ||
139 | addr += entry_offset) { | ||
140 | ctrl_outl(0, addr); | ||
141 | } | ||
142 | } | ||
143 | } | ||
144 | |||
145 | void flush_cache_4096_all(unsigned long start) | ||
146 | { | ||
147 | if (cpu_data->dcache.ways == 1) | ||
148 | __flush_cache_4096_all(start); | ||
149 | else | ||
150 | __flush_cache_4096_all_ex(start); | ||
151 | } | ||
152 | |||
153 | /* | 179 | /* |
154 | * Write back the range of D-cache, and purge the I-cache. | 180 | * Write back the range of D-cache, and purge the I-cache. |
155 | * | 181 | * |
@@ -180,9 +206,11 @@ void flush_cache_sigtramp(unsigned long addr) | |||
180 | 206 | ||
181 | local_irq_save(flags); | 207 | local_irq_save(flags); |
182 | jump_to_P2(); | 208 | jump_to_P2(); |
209 | |||
183 | for (i = 0; i < cpu_data->icache.ways; | 210 | for (i = 0; i < cpu_data->icache.ways; |
184 | i++, index += cpu_data->icache.way_incr) | 211 | i++, index += cpu_data->icache.way_incr) |
185 | ctrl_outl(0, index); /* Clear out Valid-bit */ | 212 | ctrl_outl(0, index); /* Clear out Valid-bit */ |
213 | |||
186 | back_to_P1(); | 214 | back_to_P1(); |
187 | wmb(); | 215 | wmb(); |
188 | local_irq_restore(flags); | 216 | local_irq_restore(flags); |
@@ -194,8 +222,8 @@ static inline void flush_cache_4096(unsigned long start, | |||
194 | unsigned long flags; | 222 | unsigned long flags; |
195 | 223 | ||
196 | /* | 224 | /* |
197 | * SH7751, SH7751R, and ST40 have no restriction to handle cache. | 225 | * All types of SH-4 require PC to be in P2 to operate on the I-cache. |
198 | * (While SH7750 must do that at P2 area.) | 226 | * Some types of SH-4 require PC to be in P2 to operate on the D-cache. |
199 | */ | 227 | */ |
200 | if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) | 228 | if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) |
201 | || start < CACHE_OC_ADDRESS_ARRAY) { | 229 | || start < CACHE_OC_ADDRESS_ARRAY) { |
@@ -217,12 +245,13 @@ void flush_dcache_page(struct page *page) | |||
217 | { | 245 | { |
218 | if (test_bit(PG_mapped, &page->flags)) { | 246 | if (test_bit(PG_mapped, &page->flags)) { |
219 | unsigned long phys = PHYSADDR(page_address(page)); | 247 | unsigned long phys = PHYSADDR(page_address(page)); |
248 | unsigned long addr = CACHE_OC_ADDRESS_ARRAY; | ||
249 | int i, n; | ||
220 | 250 | ||
221 | /* Loop all the D-cache */ | 251 | /* Loop all the D-cache */ |
222 | flush_cache_4096(CACHE_OC_ADDRESS_ARRAY, phys); | 252 | n = cpu_data->dcache.n_aliases; |
223 | flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x1000, phys); | 253 | for (i = 0; i < n; i++, addr += PAGE_SIZE) |
224 | flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x2000, phys); | 254 | flush_cache_4096(addr, phys); |
225 | flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x3000, phys); | ||
226 | } | 255 | } |
227 | 256 | ||
228 | wmb(); | 257 | wmb(); |
@@ -246,10 +275,7 @@ static inline void flush_icache_all(void) | |||
246 | 275 | ||
247 | void flush_dcache_all(void) | 276 | void flush_dcache_all(void) |
248 | { | 277 | { |
249 | if (cpu_data->dcache.ways == 1) | 278 | (*__flush_dcache_segment_fn)(0UL, cpu_data->dcache.way_size); |
250 | __flush_dcache_all(); | ||
251 | else | ||
252 | __flush_dcache_all_ex(); | ||
253 | wmb(); | 279 | wmb(); |
254 | } | 280 | } |
255 | 281 | ||
@@ -261,6 +287,16 @@ void flush_cache_all(void) | |||
261 | 287 | ||
262 | void flush_cache_mm(struct mm_struct *mm) | 288 | void flush_cache_mm(struct mm_struct *mm) |
263 | { | 289 | { |
290 | /* | ||
291 | * Note : (RPC) since the caches are physically tagged, the only point | ||
292 | * of flush_cache_mm for SH-4 is to get rid of aliases from the | ||
293 | * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that | ||
294 | * lines can stay resident so long as the virtual address they were | ||
295 | * accessed with (hence cache set) is in accord with the physical | ||
296 | * address (i.e. tag). It's no different here. So I reckon we don't | ||
297 | * need to flush the I-cache, since aliases don't matter for that. We | ||
298 | * should try that. | ||
299 | */ | ||
264 | flush_cache_all(); | 300 | flush_cache_all(); |
265 | } | 301 | } |
266 | 302 | ||
@@ -273,24 +309,36 @@ void flush_cache_mm(struct mm_struct *mm) | |||
273 | void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long pfn) | 309 | void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long pfn) |
274 | { | 310 | { |
275 | unsigned long phys = pfn << PAGE_SHIFT; | 311 | unsigned long phys = pfn << PAGE_SHIFT; |
312 | unsigned int alias_mask; | ||
313 | |||
314 | alias_mask = cpu_data->dcache.alias_mask; | ||
276 | 315 | ||
277 | /* We only need to flush D-cache when we have alias */ | 316 | /* We only need to flush D-cache when we have alias */ |
278 | if ((address^phys) & CACHE_ALIAS) { | 317 | if ((address^phys) & alias_mask) { |
279 | /* Loop 4K of the D-cache */ | 318 | /* Loop 4K of the D-cache */ |
280 | flush_cache_4096( | 319 | flush_cache_4096( |
281 | CACHE_OC_ADDRESS_ARRAY | (address & CACHE_ALIAS), | 320 | CACHE_OC_ADDRESS_ARRAY | (address & alias_mask), |
282 | phys); | 321 | phys); |
283 | /* Loop another 4K of the D-cache */ | 322 | /* Loop another 4K of the D-cache */ |
284 | flush_cache_4096( | 323 | flush_cache_4096( |
285 | CACHE_OC_ADDRESS_ARRAY | (phys & CACHE_ALIAS), | 324 | CACHE_OC_ADDRESS_ARRAY | (phys & alias_mask), |
286 | phys); | 325 | phys); |
287 | } | 326 | } |
288 | 327 | ||
289 | if (vma->vm_flags & VM_EXEC) | 328 | alias_mask = cpu_data->icache.alias_mask; |
290 | /* Loop 4K (half) of the I-cache */ | 329 | if (vma->vm_flags & VM_EXEC) { |
330 | /* | ||
331 | * Evict entries from the portion of the cache from which code | ||
332 | * may have been executed at this address (virtual). There's | ||
333 | * no need to evict from the portion corresponding to the | ||
334 | * physical address as for the D-cache, because we know the | ||
335 | * kernel has never executed the code through its identity | ||
336 | * translation. | ||
337 | */ | ||
291 | flush_cache_4096( | 338 | flush_cache_4096( |
292 | CACHE_IC_ADDRESS_ARRAY | (address & 0x1000), | 339 | CACHE_IC_ADDRESS_ARRAY | (address & alias_mask), |
293 | phys); | 340 | phys); |
341 | } | ||
294 | } | 342 | } |
295 | 343 | ||
296 | /* | 344 | /* |
@@ -305,14 +353,28 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigne | |||
305 | void flush_cache_range(struct vm_area_struct *vma, unsigned long start, | 353 | void flush_cache_range(struct vm_area_struct *vma, unsigned long start, |
306 | unsigned long end) | 354 | unsigned long end) |
307 | { | 355 | { |
308 | unsigned long p = start & PAGE_MASK; | 356 | unsigned long d = 0, p = start & PAGE_MASK; |
357 | unsigned long alias_mask = cpu_data->dcache.alias_mask; | ||
358 | unsigned long n_aliases = cpu_data->dcache.n_aliases; | ||
359 | unsigned long select_bit; | ||
360 | unsigned long all_aliases_mask; | ||
361 | unsigned long addr_offset; | ||
362 | unsigned long phys; | ||
309 | pgd_t *dir; | 363 | pgd_t *dir; |
310 | pmd_t *pmd; | 364 | pmd_t *pmd; |
311 | pud_t *pud; | 365 | pud_t *pud; |
312 | pte_t *pte; | 366 | pte_t *pte; |
313 | pte_t entry; | 367 | pte_t entry; |
314 | unsigned long phys; | 368 | int i; |
315 | unsigned long d = 0; | 369 | |
370 | /* | ||
371 | * If cache is only 4k-per-way, there are never any 'aliases'. Since | ||
372 | * the cache is physically tagged, the data can just be left in there. | ||
373 | */ | ||
374 | if (n_aliases == 0) | ||
375 | return; | ||
376 | |||
377 | all_aliases_mask = (1 << n_aliases) - 1; | ||
316 | 378 | ||
317 | /* | 379 | /* |
318 | * Don't bother with the lookup and alias check if we have a | 380 | * Don't bother with the lookup and alias check if we have a |
@@ -335,39 +397,52 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, | |||
335 | 397 | ||
336 | do { | 398 | do { |
337 | if (pmd_none(*pmd) || pmd_bad(*pmd)) { | 399 | if (pmd_none(*pmd) || pmd_bad(*pmd)) { |
338 | p &= ~((1 << PMD_SHIFT) -1); | 400 | p &= ~((1 << PMD_SHIFT) - 1); |
339 | p += (1 << PMD_SHIFT); | 401 | p += (1 << PMD_SHIFT); |
340 | pmd++; | 402 | pmd++; |
403 | |||
341 | continue; | 404 | continue; |
342 | } | 405 | } |
406 | |||
343 | pte = pte_offset_kernel(pmd, p); | 407 | pte = pte_offset_kernel(pmd, p); |
408 | |||
344 | do { | 409 | do { |
345 | entry = *pte; | 410 | entry = *pte; |
411 | |||
346 | if ((pte_val(entry) & _PAGE_PRESENT)) { | 412 | if ((pte_val(entry) & _PAGE_PRESENT)) { |
347 | phys = pte_val(entry)&PTE_PHYS_MASK; | 413 | phys = pte_val(entry) & PTE_PHYS_MASK; |
348 | if ((p^phys) & CACHE_ALIAS) { | 414 | |
349 | d |= 1 << ((p & CACHE_ALIAS)>>12); | 415 | if ((p ^ phys) & alias_mask) { |
350 | d |= 1 << ((phys & CACHE_ALIAS)>>12); | 416 | d |= 1 << ((p & alias_mask) >> PAGE_SHIFT); |
351 | if (d == 0x0f) | 417 | d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT); |
418 | |||
419 | if (d == all_aliases_mask) | ||
352 | goto loop_exit; | 420 | goto loop_exit; |
353 | } | 421 | } |
354 | } | 422 | } |
423 | |||
355 | pte++; | 424 | pte++; |
356 | p += PAGE_SIZE; | 425 | p += PAGE_SIZE; |
357 | } while (p < end && ((unsigned long)pte & ~PAGE_MASK)); | 426 | } while (p < end && ((unsigned long)pte & ~PAGE_MASK)); |
358 | pmd++; | 427 | pmd++; |
359 | } while (p < end); | 428 | } while (p < end); |
360 | loop_exit: | 429 | |
361 | if (d & 1) | 430 | loop_exit: |
362 | flush_cache_4096_all(0); | 431 | for (i = 0, select_bit = 0x1, addr_offset = 0x0; i < n_aliases; |
363 | if (d & 2) | 432 | i++, select_bit <<= 1, addr_offset += PAGE_SIZE) |
364 | flush_cache_4096_all(0x1000); | 433 | if (d & select_bit) { |
365 | if (d & 4) | 434 | (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE); |
366 | flush_cache_4096_all(0x2000); | 435 | wmb(); |
367 | if (d & 8) | 436 | } |
368 | flush_cache_4096_all(0x3000); | 437 | |
369 | if (vma->vm_flags & VM_EXEC) | 438 | if (vma->vm_flags & VM_EXEC) { |
439 | /* | ||
440 | * TODO: Is this required??? Need to look at how I-cache | ||
441 | * coherency is assured when new programs are loaded to see if | ||
442 | * this matters. | ||
443 | */ | ||
370 | flush_icache_all(); | 444 | flush_icache_all(); |
445 | } | ||
371 | } | 446 | } |
372 | 447 | ||
373 | /* | 448 | /* |
@@ -384,3 +459,271 @@ void flush_icache_user_range(struct vm_area_struct *vma, | |||
384 | mb(); | 459 | mb(); |
385 | } | 460 | } |
386 | 461 | ||
462 | /** | ||
463 | * __flush_cache_4096 | ||
464 | * | ||
465 | * @addr: address in memory mapped cache array | ||
466 | * @phys: P1 address to flush (has to match tags if addr has 'A' bit | ||
467 | * set i.e. associative write) | ||
468 | * @exec_offset: set to 0x20000000 if flush has to be executed from P2 | ||
469 | * region else 0x0 | ||
470 | * | ||
471 | * The offset into the cache array implied by 'addr' selects the | ||
472 | * 'colour' of the virtual address range that will be flushed. The | ||
473 | * operation (purge/write-back) is selected by the lower 2 bits of | ||
474 | * 'phys'. | ||
475 | */ | ||
476 | static void __flush_cache_4096(unsigned long addr, unsigned long phys, | ||
477 | unsigned long exec_offset) | ||
478 | { | ||
479 | int way_count; | ||
480 | unsigned long base_addr = addr; | ||
481 | struct cache_info *dcache; | ||
482 | unsigned long way_incr; | ||
483 | unsigned long a, ea, p; | ||
484 | unsigned long temp_pc; | ||
485 | |||
486 | dcache = &cpu_data->dcache; | ||
487 | /* Write this way for better assembly. */ | ||
488 | way_count = dcache->ways; | ||
489 | way_incr = dcache->way_incr; | ||
490 | |||
491 | /* | ||
492 | * Apply exec_offset (i.e. branch to P2 if required.). | ||
493 | * | ||
494 | * FIXME: | ||
495 | * | ||
496 | * If I write "=r" for the (temp_pc), it puts this in r6 hence | ||
497 | * trashing exec_offset before it's been added on - why? Hence | ||
498 | * "=&r" as a 'workaround' | ||
499 | */ | ||
500 | asm volatile("mov.l 1f, %0\n\t" | ||
501 | "add %1, %0\n\t" | ||
502 | "jmp @%0\n\t" | ||
503 | "nop\n\t" | ||
504 | ".balign 4\n\t" | ||
505 | "1: .long 2f\n\t" | ||
506 | "2:\n" : "=&r" (temp_pc) : "r" (exec_offset)); | ||
507 | |||
508 | /* | ||
509 | * We know there will be >=1 iteration, so write as do-while to avoid | ||
510 | * pointless nead-of-loop check for 0 iterations. | ||
511 | */ | ||
512 | do { | ||
513 | ea = base_addr + PAGE_SIZE; | ||
514 | a = base_addr; | ||
515 | p = phys; | ||
516 | |||
517 | do { | ||
518 | *(volatile unsigned long *)a = p; | ||
519 | /* | ||
520 | * Next line: intentionally not p+32, saves an add, p | ||
521 | * will do since only the cache tag bits need to | ||
522 | * match. | ||
523 | */ | ||
524 | *(volatile unsigned long *)(a+32) = p; | ||
525 | a += 64; | ||
526 | p += 64; | ||
527 | } while (a < ea); | ||
528 | |||
529 | base_addr += way_incr; | ||
530 | } while (--way_count != 0); | ||
531 | } | ||
532 | |||
533 | /* | ||
534 | * Break the 1, 2 and 4 way variants of this out into separate functions to | ||
535 | * avoid nearly all the overhead of having the conditional stuff in the function | ||
536 | * bodies (+ the 1 and 2 way cases avoid saving any registers too). | ||
537 | */ | ||
538 | static void __flush_dcache_segment_1way(unsigned long start, | ||
539 | unsigned long extent_per_way) | ||
540 | { | ||
541 | unsigned long orig_sr, sr_with_bl; | ||
542 | unsigned long base_addr; | ||
543 | unsigned long way_incr, linesz, way_size; | ||
544 | struct cache_info *dcache; | ||
545 | register unsigned long a0, a0e; | ||
546 | |||
547 | asm volatile("stc sr, %0" : "=r" (orig_sr)); | ||
548 | sr_with_bl = orig_sr | (1<<28); | ||
549 | base_addr = ((unsigned long)&empty_zero_page[0]); | ||
550 | |||
551 | /* | ||
552 | * The previous code aligned base_addr to 16k, i.e. the way_size of all | ||
553 | * existing SH-4 D-caches. Whilst I don't see a need to have this | ||
554 | * aligned to any better than the cache line size (which it will be | ||
555 | * anyway by construction), let's align it to at least the way_size of | ||
556 | * any existing or conceivable SH-4 D-cache. -- RPC | ||
557 | */ | ||
558 | base_addr = ((base_addr >> 16) << 16); | ||
559 | base_addr |= start; | ||
560 | |||
561 | dcache = &cpu_data->dcache; | ||
562 | linesz = dcache->linesz; | ||
563 | way_incr = dcache->way_incr; | ||
564 | way_size = dcache->way_size; | ||
565 | |||
566 | a0 = base_addr; | ||
567 | a0e = base_addr + extent_per_way; | ||
568 | do { | ||
569 | asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); | ||
570 | asm volatile("movca.l r0, @%0\n\t" | ||
571 | "ocbi @%0" : : "r" (a0)); | ||
572 | a0 += linesz; | ||
573 | asm volatile("movca.l r0, @%0\n\t" | ||
574 | "ocbi @%0" : : "r" (a0)); | ||
575 | a0 += linesz; | ||
576 | asm volatile("movca.l r0, @%0\n\t" | ||
577 | "ocbi @%0" : : "r" (a0)); | ||
578 | a0 += linesz; | ||
579 | asm volatile("movca.l r0, @%0\n\t" | ||
580 | "ocbi @%0" : : "r" (a0)); | ||
581 | asm volatile("ldc %0, sr" : : "r" (orig_sr)); | ||
582 | a0 += linesz; | ||
583 | } while (a0 < a0e); | ||
584 | } | ||
585 | |||
586 | static void __flush_dcache_segment_2way(unsigned long start, | ||
587 | unsigned long extent_per_way) | ||
588 | { | ||
589 | unsigned long orig_sr, sr_with_bl; | ||
590 | unsigned long base_addr; | ||
591 | unsigned long way_incr, linesz, way_size; | ||
592 | struct cache_info *dcache; | ||
593 | register unsigned long a0, a1, a0e; | ||
594 | |||
595 | asm volatile("stc sr, %0" : "=r" (orig_sr)); | ||
596 | sr_with_bl = orig_sr | (1<<28); | ||
597 | base_addr = ((unsigned long)&empty_zero_page[0]); | ||
598 | |||
599 | /* See comment under 1-way above */ | ||
600 | base_addr = ((base_addr >> 16) << 16); | ||
601 | base_addr |= start; | ||
602 | |||
603 | dcache = &cpu_data->dcache; | ||
604 | linesz = dcache->linesz; | ||
605 | way_incr = dcache->way_incr; | ||
606 | way_size = dcache->way_size; | ||
607 | |||
608 | a0 = base_addr; | ||
609 | a1 = a0 + way_incr; | ||
610 | a0e = base_addr + extent_per_way; | ||
611 | do { | ||
612 | asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); | ||
613 | asm volatile("movca.l r0, @%0\n\t" | ||
614 | "movca.l r0, @%1\n\t" | ||
615 | "ocbi @%0\n\t" | ||
616 | "ocbi @%1" : : | ||
617 | "r" (a0), "r" (a1)); | ||
618 | a0 += linesz; | ||
619 | a1 += linesz; | ||
620 | asm volatile("movca.l r0, @%0\n\t" | ||
621 | "movca.l r0, @%1\n\t" | ||
622 | "ocbi @%0\n\t" | ||
623 | "ocbi @%1" : : | ||
624 | "r" (a0), "r" (a1)); | ||
625 | a0 += linesz; | ||
626 | a1 += linesz; | ||
627 | asm volatile("movca.l r0, @%0\n\t" | ||
628 | "movca.l r0, @%1\n\t" | ||
629 | "ocbi @%0\n\t" | ||
630 | "ocbi @%1" : : | ||
631 | "r" (a0), "r" (a1)); | ||
632 | a0 += linesz; | ||
633 | a1 += linesz; | ||
634 | asm volatile("movca.l r0, @%0\n\t" | ||
635 | "movca.l r0, @%1\n\t" | ||
636 | "ocbi @%0\n\t" | ||
637 | "ocbi @%1" : : | ||
638 | "r" (a0), "r" (a1)); | ||
639 | asm volatile("ldc %0, sr" : : "r" (orig_sr)); | ||
640 | a0 += linesz; | ||
641 | a1 += linesz; | ||
642 | } while (a0 < a0e); | ||
643 | } | ||
644 | |||
645 | static void __flush_dcache_segment_4way(unsigned long start, | ||
646 | unsigned long extent_per_way) | ||
647 | { | ||
648 | unsigned long orig_sr, sr_with_bl; | ||
649 | unsigned long base_addr; | ||
650 | unsigned long way_incr, linesz, way_size; | ||
651 | struct cache_info *dcache; | ||
652 | register unsigned long a0, a1, a2, a3, a0e; | ||
653 | |||
654 | asm volatile("stc sr, %0" : "=r" (orig_sr)); | ||
655 | sr_with_bl = orig_sr | (1<<28); | ||
656 | base_addr = ((unsigned long)&empty_zero_page[0]); | ||
657 | |||
658 | /* See comment under 1-way above */ | ||
659 | base_addr = ((base_addr >> 16) << 16); | ||
660 | base_addr |= start; | ||
661 | |||
662 | dcache = &cpu_data->dcache; | ||
663 | linesz = dcache->linesz; | ||
664 | way_incr = dcache->way_incr; | ||
665 | way_size = dcache->way_size; | ||
666 | |||
667 | a0 = base_addr; | ||
668 | a1 = a0 + way_incr; | ||
669 | a2 = a1 + way_incr; | ||
670 | a3 = a2 + way_incr; | ||
671 | a0e = base_addr + extent_per_way; | ||
672 | do { | ||
673 | asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); | ||
674 | asm volatile("movca.l r0, @%0\n\t" | ||
675 | "movca.l r0, @%1\n\t" | ||
676 | "movca.l r0, @%2\n\t" | ||
677 | "movca.l r0, @%3\n\t" | ||
678 | "ocbi @%0\n\t" | ||
679 | "ocbi @%1\n\t" | ||
680 | "ocbi @%2\n\t" | ||
681 | "ocbi @%3\n\t" : : | ||
682 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
683 | a0 += linesz; | ||
684 | a1 += linesz; | ||
685 | a2 += linesz; | ||
686 | a3 += linesz; | ||
687 | asm volatile("movca.l r0, @%0\n\t" | ||
688 | "movca.l r0, @%1\n\t" | ||
689 | "movca.l r0, @%2\n\t" | ||
690 | "movca.l r0, @%3\n\t" | ||
691 | "ocbi @%0\n\t" | ||
692 | "ocbi @%1\n\t" | ||
693 | "ocbi @%2\n\t" | ||
694 | "ocbi @%3\n\t" : : | ||
695 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
696 | a0 += linesz; | ||
697 | a1 += linesz; | ||
698 | a2 += linesz; | ||
699 | a3 += linesz; | ||
700 | asm volatile("movca.l r0, @%0\n\t" | ||
701 | "movca.l r0, @%1\n\t" | ||
702 | "movca.l r0, @%2\n\t" | ||
703 | "movca.l r0, @%3\n\t" | ||
704 | "ocbi @%0\n\t" | ||
705 | "ocbi @%1\n\t" | ||
706 | "ocbi @%2\n\t" | ||
707 | "ocbi @%3\n\t" : : | ||
708 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
709 | a0 += linesz; | ||
710 | a1 += linesz; | ||
711 | a2 += linesz; | ||
712 | a3 += linesz; | ||
713 | asm volatile("movca.l r0, @%0\n\t" | ||
714 | "movca.l r0, @%1\n\t" | ||
715 | "movca.l r0, @%2\n\t" | ||
716 | "movca.l r0, @%3\n\t" | ||
717 | "ocbi @%0\n\t" | ||
718 | "ocbi @%1\n\t" | ||
719 | "ocbi @%2\n\t" | ||
720 | "ocbi @%3\n\t" : : | ||
721 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
722 | asm volatile("ldc %0, sr" : : "r" (orig_sr)); | ||
723 | a0 += linesz; | ||
724 | a1 += linesz; | ||
725 | a2 += linesz; | ||
726 | a3 += linesz; | ||
727 | } while (a0 < a0e); | ||
728 | } | ||
729 | |||