aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Curnow <richard.curnow@st.com>2006-09-27 01:09:26 -0400
committerPaul Mundt <lethal@linux-sh.org>2006-09-27 01:09:26 -0400
commitb638d0b921dc95229af0dfd09cd24850336a2f75 (patch)
tree0ef34527a47b22421fb92ba2141052fecfe36482
parentfdfc74f9fcebdda14609159d5010b758a9409acf (diff)
sh: Optimized cache handling for SH-4/SH-4A caches.
This reworks some of the SH-4 cache handling code to more easily accomodate newer-style caches (particularly for the > direct-mapped case), as well as optimizing some of the old code. Signed-off-by: Richard Curnow <richard.curnow@st.com> Signed-off-by: Paul Mundt <lethal@linux-sh.org>
-rw-r--r--arch/sh/kernel/cpu/init.c16
-rw-r--r--arch/sh/kernel/cpu/sh4/probe.c11
-rw-r--r--arch/sh/mm/cache-sh4.c517
-rw-r--r--arch/sh/mm/clear_page.S99
-rw-r--r--include/asm-sh/cache.h22
5 files changed, 475 insertions, 190 deletions
diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c
index 868e68b28880..731dd61419dd 100644
--- a/arch/sh/kernel/cpu/init.c
+++ b/arch/sh/kernel/cpu/init.c
@@ -4,6 +4,7 @@
4 * CPU init code 4 * CPU init code
5 * 5 *
6 * Copyright (C) 2002, 2003 Paul Mundt 6 * Copyright (C) 2002, 2003 Paul Mundt
7 * Copyright (C) 2003 Richard Curnow
7 * 8 *
8 * This file is subject to the terms and conditions of the GNU General Public 9 * This file is subject to the terms and conditions of the GNU General Public
9 * License. See the file "COPYING" in the main directory of this archive 10 * License. See the file "COPYING" in the main directory of this archive
@@ -51,7 +52,15 @@ static void __init cache_init(void)
51 ccr = ctrl_inl(CCR); 52 ccr = ctrl_inl(CCR);
52 53
53 /* 54 /*
54 * If the cache is already enabled .. flush it. 55 * At this point we don't know whether the cache is enabled or not - a
56 * bootloader may have enabled it. There are at least 2 things that
57 * could be dirty in the cache at this point:
58 * 1. kernel command line set up by boot loader
59 * 2. spilled registers from the prolog of this function
60 * => before re-initialising the cache, we must do a purge of the whole
61 * cache out to memory for safety. As long as nothing is spilled
62 * during the loop to lines that have already been done, this is safe.
63 * - RPC
55 */ 64 */
56 if (ccr & CCR_CACHE_ENABLE) { 65 if (ccr & CCR_CACHE_ENABLE) {
57 unsigned long ways, waysize, addrstart; 66 unsigned long ways, waysize, addrstart;
@@ -98,6 +107,8 @@ static void __init cache_init(void)
98 /* Force EMODE if possible */ 107 /* Force EMODE if possible */
99 if (cpu_data->dcache.ways > 1) 108 if (cpu_data->dcache.ways > 1)
100 flags |= CCR_CACHE_EMODE; 109 flags |= CCR_CACHE_EMODE;
110 else
111 flags &= ~CCR_CACHE_EMODE;
101#endif 112#endif
102 113
103#ifdef CONFIG_SH_WRITETHROUGH 114#ifdef CONFIG_SH_WRITETHROUGH
@@ -112,6 +123,9 @@ static void __init cache_init(void)
112 /* Turn on OCRAM -- halve the OC */ 123 /* Turn on OCRAM -- halve the OC */
113 flags |= CCR_CACHE_ORA; 124 flags |= CCR_CACHE_ORA;
114 cpu_data->dcache.sets >>= 1; 125 cpu_data->dcache.sets >>= 1;
126
127 cpu_data->dcache.way_size = cpu_data->dcache.sets *
128 cpu_data->dcache.linesz;
115#endif 129#endif
116 130
117 ctrl_outl(flags, CCR); 131 ctrl_outl(flags, CCR);
diff --git a/arch/sh/kernel/cpu/sh4/probe.c b/arch/sh/kernel/cpu/sh4/probe.c
index 42427b79697b..1208da8fe5db 100644
--- a/arch/sh/kernel/cpu/sh4/probe.c
+++ b/arch/sh/kernel/cpu/sh4/probe.c
@@ -113,6 +113,11 @@ int __init detect_cpu_and_cache_system(void)
113 break; 113 break;
114 } 114 }
115 115
116#ifdef CONFIG_SH_DIRECT_MAPPED
117 cpu_data->icache.ways = 1;
118 cpu_data->dcache.ways = 1;
119#endif
120
116 /* 121 /*
117 * On anything that's not a direct-mapped cache, look to the CVR 122 * On anything that's not a direct-mapped cache, look to the CVR
118 * for I/D-cache specifics. 123 * for I/D-cache specifics.
@@ -125,6 +130,9 @@ int __init detect_cpu_and_cache_system(void)
125 (cpu_data->icache.way_incr - (1 << 5)); 130 (cpu_data->icache.way_incr - (1 << 5));
126 } 131 }
127 132
133 cpu_data->icache.way_size = cpu_data->icache.sets *
134 cpu_data->icache.linesz;
135
128 if (cpu_data->dcache.ways > 1) { 136 if (cpu_data->dcache.ways > 1) {
129 size = sizes[(cvr >> 16) & 0xf]; 137 size = sizes[(cvr >> 16) & 0xf];
130 cpu_data->dcache.way_incr = (size >> 1); 138 cpu_data->dcache.way_incr = (size >> 1);
@@ -133,6 +141,9 @@ int __init detect_cpu_and_cache_system(void)
133 (cpu_data->dcache.way_incr - (1 << 5)); 141 (cpu_data->dcache.way_incr - (1 << 5));
134 } 142 }
135 143
144 cpu_data->dcache.way_size = cpu_data->dcache.sets *
145 cpu_data->dcache.linesz;
146
136 return 0; 147 return 0;
137} 148}
138 149
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index 846b63d6f5e8..c036c2b4ac2b 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -25,28 +25,95 @@
25#include <asm/mmu_context.h> 25#include <asm/mmu_context.h>
26#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
27 27
28extern void __flush_cache_4096(unsigned long addr, unsigned long phys, 28static void __flush_dcache_segment_1way(unsigned long start,
29 unsigned long extent);
30static void __flush_dcache_segment_2way(unsigned long start,
31 unsigned long extent);
32static void __flush_dcache_segment_4way(unsigned long start,
33 unsigned long extent);
34
35static void __flush_cache_4096(unsigned long addr, unsigned long phys,
29 unsigned long exec_offset); 36 unsigned long exec_offset);
30extern void __flush_cache_4096_all(unsigned long start); 37
31static void __flush_cache_4096_all_ex(unsigned long start); 38/*
32extern void __flush_dcache_all(void); 39 * This is initialised here to ensure that it is not placed in the BSS. If
33static void __flush_dcache_all_ex(void); 40 * that were to happen, note that cache_init gets called before the BSS is
41 * cleared, so this would get nulled out which would be hopeless.
42 */
43static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) =
44 (void (*)(unsigned long, unsigned long))0xdeadbeef;
45
46static void compute_alias(struct cache_info *c)
47{
48 c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1);
49 c->n_aliases = (c->alias_mask >> PAGE_SHIFT) + 1;
50}
51
52static void __init emit_cache_params(void)
53{
54 printk("PVR=%08x CVR=%08x PRR=%08x\n",
55 ctrl_inl(CCN_PVR),
56 ctrl_inl(CCN_CVR),
57 ctrl_inl(CCN_PRR));
58 printk("I-cache : n_ways=%d n_sets=%d way_incr=%d\n",
59 cpu_data->icache.ways,
60 cpu_data->icache.sets,
61 cpu_data->icache.way_incr);
62 printk("I-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
63 cpu_data->icache.entry_mask,
64 cpu_data->icache.alias_mask,
65 cpu_data->icache.n_aliases);
66 printk("D-cache : n_ways=%d n_sets=%d way_incr=%d\n",
67 cpu_data->dcache.ways,
68 cpu_data->dcache.sets,
69 cpu_data->dcache.way_incr);
70 printk("D-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
71 cpu_data->dcache.entry_mask,
72 cpu_data->dcache.alias_mask,
73 cpu_data->dcache.n_aliases);
74
75 if (!__flush_dcache_segment_fn)
76 panic("unknown number of cache ways\n");
77}
34 78
35/* 79/*
36 * SH-4 has virtually indexed and physically tagged cache. 80 * SH-4 has virtually indexed and physically tagged cache.
37 */ 81 */
38 82
39struct semaphore p3map_sem[4]; 83/* Worst case assumed to be 64k cache, direct-mapped i.e. 4 synonym bits. */
84#define MAX_P3_SEMAPHORES 16
85
86struct semaphore p3map_sem[MAX_P3_SEMAPHORES];
40 87
41void __init p3_cache_init(void) 88void __init p3_cache_init(void)
42{ 89{
43 if (remap_area_pages(P3SEG, 0, PAGE_SIZE*4, _PAGE_CACHABLE)) 90 int i;
91
92 compute_alias(&cpu_data->icache);
93 compute_alias(&cpu_data->dcache);
94
95 switch (cpu_data->dcache.ways) {
96 case 1:
97 __flush_dcache_segment_fn = __flush_dcache_segment_1way;
98 break;
99 case 2:
100 __flush_dcache_segment_fn = __flush_dcache_segment_2way;
101 break;
102 case 4:
103 __flush_dcache_segment_fn = __flush_dcache_segment_4way;
104 break;
105 default:
106 __flush_dcache_segment_fn = NULL;
107 break;
108 }
109
110 emit_cache_params();
111
112 if (remap_area_pages(P3SEG, 0, PAGE_SIZE * 4, _PAGE_CACHABLE))
44 panic("%s failed.", __FUNCTION__); 113 panic("%s failed.", __FUNCTION__);
45 114
46 sema_init (&p3map_sem[0], 1); 115 for (i = 0; i < cpu_data->dcache.n_aliases; i++)
47 sema_init (&p3map_sem[1], 1); 116 sema_init(&p3map_sem[i], 1);
48 sema_init (&p3map_sem[2], 1);
49 sema_init (&p3map_sem[3], 1);
50} 117}
51 118
52/* 119/*
@@ -91,7 +158,6 @@ void __flush_purge_region(void *start, int size)
91 } 158 }
92} 159}
93 160
94
95/* 161/*
96 * No write back please 162 * No write back please
97 */ 163 */
@@ -110,46 +176,6 @@ void __flush_invalidate_region(void *start, int size)
110 } 176 }
111} 177}
112 178
113static void __flush_dcache_all_ex(void)
114{
115 unsigned long addr, end_addr, entry_offset;
116
117 end_addr = CACHE_OC_ADDRESS_ARRAY +
118 (cpu_data->dcache.sets << cpu_data->dcache.entry_shift) *
119 cpu_data->dcache.ways;
120
121 entry_offset = 1 << cpu_data->dcache.entry_shift;
122 for (addr = CACHE_OC_ADDRESS_ARRAY;
123 addr < end_addr;
124 addr += entry_offset) {
125 ctrl_outl(0, addr);
126 }
127}
128
129static void __flush_cache_4096_all_ex(unsigned long start)
130{
131 unsigned long addr, entry_offset;
132 int i;
133
134 entry_offset = 1 << cpu_data->dcache.entry_shift;
135 for (i = 0; i < cpu_data->dcache.ways;
136 i++, start += cpu_data->dcache.way_incr) {
137 for (addr = CACHE_OC_ADDRESS_ARRAY + start;
138 addr < CACHE_OC_ADDRESS_ARRAY + 4096 + start;
139 addr += entry_offset) {
140 ctrl_outl(0, addr);
141 }
142 }
143}
144
145void flush_cache_4096_all(unsigned long start)
146{
147 if (cpu_data->dcache.ways == 1)
148 __flush_cache_4096_all(start);
149 else
150 __flush_cache_4096_all_ex(start);
151}
152
153/* 179/*
154 * Write back the range of D-cache, and purge the I-cache. 180 * Write back the range of D-cache, and purge the I-cache.
155 * 181 *
@@ -180,9 +206,11 @@ void flush_cache_sigtramp(unsigned long addr)
180 206
181 local_irq_save(flags); 207 local_irq_save(flags);
182 jump_to_P2(); 208 jump_to_P2();
209
183 for (i = 0; i < cpu_data->icache.ways; 210 for (i = 0; i < cpu_data->icache.ways;
184 i++, index += cpu_data->icache.way_incr) 211 i++, index += cpu_data->icache.way_incr)
185 ctrl_outl(0, index); /* Clear out Valid-bit */ 212 ctrl_outl(0, index); /* Clear out Valid-bit */
213
186 back_to_P1(); 214 back_to_P1();
187 wmb(); 215 wmb();
188 local_irq_restore(flags); 216 local_irq_restore(flags);
@@ -194,8 +222,8 @@ static inline void flush_cache_4096(unsigned long start,
194 unsigned long flags; 222 unsigned long flags;
195 223
196 /* 224 /*
197 * SH7751, SH7751R, and ST40 have no restriction to handle cache. 225 * All types of SH-4 require PC to be in P2 to operate on the I-cache.
198 * (While SH7750 must do that at P2 area.) 226 * Some types of SH-4 require PC to be in P2 to operate on the D-cache.
199 */ 227 */
200 if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) 228 if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG)
201 || start < CACHE_OC_ADDRESS_ARRAY) { 229 || start < CACHE_OC_ADDRESS_ARRAY) {
@@ -217,12 +245,13 @@ void flush_dcache_page(struct page *page)
217{ 245{
218 if (test_bit(PG_mapped, &page->flags)) { 246 if (test_bit(PG_mapped, &page->flags)) {
219 unsigned long phys = PHYSADDR(page_address(page)); 247 unsigned long phys = PHYSADDR(page_address(page));
248 unsigned long addr = CACHE_OC_ADDRESS_ARRAY;
249 int i, n;
220 250
221 /* Loop all the D-cache */ 251 /* Loop all the D-cache */
222 flush_cache_4096(CACHE_OC_ADDRESS_ARRAY, phys); 252 n = cpu_data->dcache.n_aliases;
223 flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x1000, phys); 253 for (i = 0; i < n; i++, addr += PAGE_SIZE)
224 flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x2000, phys); 254 flush_cache_4096(addr, phys);
225 flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x3000, phys);
226 } 255 }
227 256
228 wmb(); 257 wmb();
@@ -246,10 +275,7 @@ static inline void flush_icache_all(void)
246 275
247void flush_dcache_all(void) 276void flush_dcache_all(void)
248{ 277{
249 if (cpu_data->dcache.ways == 1) 278 (*__flush_dcache_segment_fn)(0UL, cpu_data->dcache.way_size);
250 __flush_dcache_all();
251 else
252 __flush_dcache_all_ex();
253 wmb(); 279 wmb();
254} 280}
255 281
@@ -261,6 +287,16 @@ void flush_cache_all(void)
261 287
262void flush_cache_mm(struct mm_struct *mm) 288void flush_cache_mm(struct mm_struct *mm)
263{ 289{
290 /*
291 * Note : (RPC) since the caches are physically tagged, the only point
292 * of flush_cache_mm for SH-4 is to get rid of aliases from the
293 * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that
294 * lines can stay resident so long as the virtual address they were
295 * accessed with (hence cache set) is in accord with the physical
296 * address (i.e. tag). It's no different here. So I reckon we don't
297 * need to flush the I-cache, since aliases don't matter for that. We
298 * should try that.
299 */
264 flush_cache_all(); 300 flush_cache_all();
265} 301}
266 302
@@ -273,24 +309,36 @@ void flush_cache_mm(struct mm_struct *mm)
273void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long pfn) 309void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long pfn)
274{ 310{
275 unsigned long phys = pfn << PAGE_SHIFT; 311 unsigned long phys = pfn << PAGE_SHIFT;
312 unsigned int alias_mask;
313
314 alias_mask = cpu_data->dcache.alias_mask;
276 315
277 /* We only need to flush D-cache when we have alias */ 316 /* We only need to flush D-cache when we have alias */
278 if ((address^phys) & CACHE_ALIAS) { 317 if ((address^phys) & alias_mask) {
279 /* Loop 4K of the D-cache */ 318 /* Loop 4K of the D-cache */
280 flush_cache_4096( 319 flush_cache_4096(
281 CACHE_OC_ADDRESS_ARRAY | (address & CACHE_ALIAS), 320 CACHE_OC_ADDRESS_ARRAY | (address & alias_mask),
282 phys); 321 phys);
283 /* Loop another 4K of the D-cache */ 322 /* Loop another 4K of the D-cache */
284 flush_cache_4096( 323 flush_cache_4096(
285 CACHE_OC_ADDRESS_ARRAY | (phys & CACHE_ALIAS), 324 CACHE_OC_ADDRESS_ARRAY | (phys & alias_mask),
286 phys); 325 phys);
287 } 326 }
288 327
289 if (vma->vm_flags & VM_EXEC) 328 alias_mask = cpu_data->icache.alias_mask;
290 /* Loop 4K (half) of the I-cache */ 329 if (vma->vm_flags & VM_EXEC) {
330 /*
331 * Evict entries from the portion of the cache from which code
332 * may have been executed at this address (virtual). There's
333 * no need to evict from the portion corresponding to the
334 * physical address as for the D-cache, because we know the
335 * kernel has never executed the code through its identity
336 * translation.
337 */
291 flush_cache_4096( 338 flush_cache_4096(
292 CACHE_IC_ADDRESS_ARRAY | (address & 0x1000), 339 CACHE_IC_ADDRESS_ARRAY | (address & alias_mask),
293 phys); 340 phys);
341 }
294} 342}
295 343
296/* 344/*
@@ -305,14 +353,28 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigne
305void flush_cache_range(struct vm_area_struct *vma, unsigned long start, 353void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
306 unsigned long end) 354 unsigned long end)
307{ 355{
308 unsigned long p = start & PAGE_MASK; 356 unsigned long d = 0, p = start & PAGE_MASK;
357 unsigned long alias_mask = cpu_data->dcache.alias_mask;
358 unsigned long n_aliases = cpu_data->dcache.n_aliases;
359 unsigned long select_bit;
360 unsigned long all_aliases_mask;
361 unsigned long addr_offset;
362 unsigned long phys;
309 pgd_t *dir; 363 pgd_t *dir;
310 pmd_t *pmd; 364 pmd_t *pmd;
311 pud_t *pud; 365 pud_t *pud;
312 pte_t *pte; 366 pte_t *pte;
313 pte_t entry; 367 pte_t entry;
314 unsigned long phys; 368 int i;
315 unsigned long d = 0; 369
370 /*
371 * If cache is only 4k-per-way, there are never any 'aliases'. Since
372 * the cache is physically tagged, the data can just be left in there.
373 */
374 if (n_aliases == 0)
375 return;
376
377 all_aliases_mask = (1 << n_aliases) - 1;
316 378
317 /* 379 /*
318 * Don't bother with the lookup and alias check if we have a 380 * Don't bother with the lookup and alias check if we have a
@@ -335,39 +397,52 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
335 397
336 do { 398 do {
337 if (pmd_none(*pmd) || pmd_bad(*pmd)) { 399 if (pmd_none(*pmd) || pmd_bad(*pmd)) {
338 p &= ~((1 << PMD_SHIFT) -1); 400 p &= ~((1 << PMD_SHIFT) - 1);
339 p += (1 << PMD_SHIFT); 401 p += (1 << PMD_SHIFT);
340 pmd++; 402 pmd++;
403
341 continue; 404 continue;
342 } 405 }
406
343 pte = pte_offset_kernel(pmd, p); 407 pte = pte_offset_kernel(pmd, p);
408
344 do { 409 do {
345 entry = *pte; 410 entry = *pte;
411
346 if ((pte_val(entry) & _PAGE_PRESENT)) { 412 if ((pte_val(entry) & _PAGE_PRESENT)) {
347 phys = pte_val(entry)&PTE_PHYS_MASK; 413 phys = pte_val(entry) & PTE_PHYS_MASK;
348 if ((p^phys) & CACHE_ALIAS) { 414
349 d |= 1 << ((p & CACHE_ALIAS)>>12); 415 if ((p ^ phys) & alias_mask) {
350 d |= 1 << ((phys & CACHE_ALIAS)>>12); 416 d |= 1 << ((p & alias_mask) >> PAGE_SHIFT);
351 if (d == 0x0f) 417 d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT);
418
419 if (d == all_aliases_mask)
352 goto loop_exit; 420 goto loop_exit;
353 } 421 }
354 } 422 }
423
355 pte++; 424 pte++;
356 p += PAGE_SIZE; 425 p += PAGE_SIZE;
357 } while (p < end && ((unsigned long)pte & ~PAGE_MASK)); 426 } while (p < end && ((unsigned long)pte & ~PAGE_MASK));
358 pmd++; 427 pmd++;
359 } while (p < end); 428 } while (p < end);
360 loop_exit: 429
361 if (d & 1) 430loop_exit:
362 flush_cache_4096_all(0); 431 for (i = 0, select_bit = 0x1, addr_offset = 0x0; i < n_aliases;
363 if (d & 2) 432 i++, select_bit <<= 1, addr_offset += PAGE_SIZE)
364 flush_cache_4096_all(0x1000); 433 if (d & select_bit) {
365 if (d & 4) 434 (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE);
366 flush_cache_4096_all(0x2000); 435 wmb();
367 if (d & 8) 436 }
368 flush_cache_4096_all(0x3000); 437
369 if (vma->vm_flags & VM_EXEC) 438 if (vma->vm_flags & VM_EXEC) {
439 /*
440 * TODO: Is this required??? Need to look at how I-cache
441 * coherency is assured when new programs are loaded to see if
442 * this matters.
443 */
370 flush_icache_all(); 444 flush_icache_all();
445 }
371} 446}
372 447
373/* 448/*
@@ -384,3 +459,271 @@ void flush_icache_user_range(struct vm_area_struct *vma,
384 mb(); 459 mb();
385} 460}
386 461
462/**
463 * __flush_cache_4096
464 *
465 * @addr: address in memory mapped cache array
466 * @phys: P1 address to flush (has to match tags if addr has 'A' bit
467 * set i.e. associative write)
468 * @exec_offset: set to 0x20000000 if flush has to be executed from P2
469 * region else 0x0
470 *
471 * The offset into the cache array implied by 'addr' selects the
472 * 'colour' of the virtual address range that will be flushed. The
473 * operation (purge/write-back) is selected by the lower 2 bits of
474 * 'phys'.
475 */
476static void __flush_cache_4096(unsigned long addr, unsigned long phys,
477 unsigned long exec_offset)
478{
479 int way_count;
480 unsigned long base_addr = addr;
481 struct cache_info *dcache;
482 unsigned long way_incr;
483 unsigned long a, ea, p;
484 unsigned long temp_pc;
485
486 dcache = &cpu_data->dcache;
487 /* Write this way for better assembly. */
488 way_count = dcache->ways;
489 way_incr = dcache->way_incr;
490
491 /*
492 * Apply exec_offset (i.e. branch to P2 if required.).
493 *
494 * FIXME:
495 *
496 * If I write "=r" for the (temp_pc), it puts this in r6 hence
497 * trashing exec_offset before it's been added on - why? Hence
498 * "=&r" as a 'workaround'
499 */
500 asm volatile("mov.l 1f, %0\n\t"
501 "add %1, %0\n\t"
502 "jmp @%0\n\t"
503 "nop\n\t"
504 ".balign 4\n\t"
505 "1: .long 2f\n\t"
506 "2:\n" : "=&r" (temp_pc) : "r" (exec_offset));
507
508 /*
509 * We know there will be >=1 iteration, so write as do-while to avoid
510 * pointless nead-of-loop check for 0 iterations.
511 */
512 do {
513 ea = base_addr + PAGE_SIZE;
514 a = base_addr;
515 p = phys;
516
517 do {
518 *(volatile unsigned long *)a = p;
519 /*
520 * Next line: intentionally not p+32, saves an add, p
521 * will do since only the cache tag bits need to
522 * match.
523 */
524 *(volatile unsigned long *)(a+32) = p;
525 a += 64;
526 p += 64;
527 } while (a < ea);
528
529 base_addr += way_incr;
530 } while (--way_count != 0);
531}
532
533/*
534 * Break the 1, 2 and 4 way variants of this out into separate functions to
535 * avoid nearly all the overhead of having the conditional stuff in the function
536 * bodies (+ the 1 and 2 way cases avoid saving any registers too).
537 */
538static void __flush_dcache_segment_1way(unsigned long start,
539 unsigned long extent_per_way)
540{
541 unsigned long orig_sr, sr_with_bl;
542 unsigned long base_addr;
543 unsigned long way_incr, linesz, way_size;
544 struct cache_info *dcache;
545 register unsigned long a0, a0e;
546
547 asm volatile("stc sr, %0" : "=r" (orig_sr));
548 sr_with_bl = orig_sr | (1<<28);
549 base_addr = ((unsigned long)&empty_zero_page[0]);
550
551 /*
552 * The previous code aligned base_addr to 16k, i.e. the way_size of all
553 * existing SH-4 D-caches. Whilst I don't see a need to have this
554 * aligned to any better than the cache line size (which it will be
555 * anyway by construction), let's align it to at least the way_size of
556 * any existing or conceivable SH-4 D-cache. -- RPC
557 */
558 base_addr = ((base_addr >> 16) << 16);
559 base_addr |= start;
560
561 dcache = &cpu_data->dcache;
562 linesz = dcache->linesz;
563 way_incr = dcache->way_incr;
564 way_size = dcache->way_size;
565
566 a0 = base_addr;
567 a0e = base_addr + extent_per_way;
568 do {
569 asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
570 asm volatile("movca.l r0, @%0\n\t"
571 "ocbi @%0" : : "r" (a0));
572 a0 += linesz;
573 asm volatile("movca.l r0, @%0\n\t"
574 "ocbi @%0" : : "r" (a0));
575 a0 += linesz;
576 asm volatile("movca.l r0, @%0\n\t"
577 "ocbi @%0" : : "r" (a0));
578 a0 += linesz;
579 asm volatile("movca.l r0, @%0\n\t"
580 "ocbi @%0" : : "r" (a0));
581 asm volatile("ldc %0, sr" : : "r" (orig_sr));
582 a0 += linesz;
583 } while (a0 < a0e);
584}
585
586static void __flush_dcache_segment_2way(unsigned long start,
587 unsigned long extent_per_way)
588{
589 unsigned long orig_sr, sr_with_bl;
590 unsigned long base_addr;
591 unsigned long way_incr, linesz, way_size;
592 struct cache_info *dcache;
593 register unsigned long a0, a1, a0e;
594
595 asm volatile("stc sr, %0" : "=r" (orig_sr));
596 sr_with_bl = orig_sr | (1<<28);
597 base_addr = ((unsigned long)&empty_zero_page[0]);
598
599 /* See comment under 1-way above */
600 base_addr = ((base_addr >> 16) << 16);
601 base_addr |= start;
602
603 dcache = &cpu_data->dcache;
604 linesz = dcache->linesz;
605 way_incr = dcache->way_incr;
606 way_size = dcache->way_size;
607
608 a0 = base_addr;
609 a1 = a0 + way_incr;
610 a0e = base_addr + extent_per_way;
611 do {
612 asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
613 asm volatile("movca.l r0, @%0\n\t"
614 "movca.l r0, @%1\n\t"
615 "ocbi @%0\n\t"
616 "ocbi @%1" : :
617 "r" (a0), "r" (a1));
618 a0 += linesz;
619 a1 += linesz;
620 asm volatile("movca.l r0, @%0\n\t"
621 "movca.l r0, @%1\n\t"
622 "ocbi @%0\n\t"
623 "ocbi @%1" : :
624 "r" (a0), "r" (a1));
625 a0 += linesz;
626 a1 += linesz;
627 asm volatile("movca.l r0, @%0\n\t"
628 "movca.l r0, @%1\n\t"
629 "ocbi @%0\n\t"
630 "ocbi @%1" : :
631 "r" (a0), "r" (a1));
632 a0 += linesz;
633 a1 += linesz;
634 asm volatile("movca.l r0, @%0\n\t"
635 "movca.l r0, @%1\n\t"
636 "ocbi @%0\n\t"
637 "ocbi @%1" : :
638 "r" (a0), "r" (a1));
639 asm volatile("ldc %0, sr" : : "r" (orig_sr));
640 a0 += linesz;
641 a1 += linesz;
642 } while (a0 < a0e);
643}
644
645static void __flush_dcache_segment_4way(unsigned long start,
646 unsigned long extent_per_way)
647{
648 unsigned long orig_sr, sr_with_bl;
649 unsigned long base_addr;
650 unsigned long way_incr, linesz, way_size;
651 struct cache_info *dcache;
652 register unsigned long a0, a1, a2, a3, a0e;
653
654 asm volatile("stc sr, %0" : "=r" (orig_sr));
655 sr_with_bl = orig_sr | (1<<28);
656 base_addr = ((unsigned long)&empty_zero_page[0]);
657
658 /* See comment under 1-way above */
659 base_addr = ((base_addr >> 16) << 16);
660 base_addr |= start;
661
662 dcache = &cpu_data->dcache;
663 linesz = dcache->linesz;
664 way_incr = dcache->way_incr;
665 way_size = dcache->way_size;
666
667 a0 = base_addr;
668 a1 = a0 + way_incr;
669 a2 = a1 + way_incr;
670 a3 = a2 + way_incr;
671 a0e = base_addr + extent_per_way;
672 do {
673 asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
674 asm volatile("movca.l r0, @%0\n\t"
675 "movca.l r0, @%1\n\t"
676 "movca.l r0, @%2\n\t"
677 "movca.l r0, @%3\n\t"
678 "ocbi @%0\n\t"
679 "ocbi @%1\n\t"
680 "ocbi @%2\n\t"
681 "ocbi @%3\n\t" : :
682 "r" (a0), "r" (a1), "r" (a2), "r" (a3));
683 a0 += linesz;
684 a1 += linesz;
685 a2 += linesz;
686 a3 += linesz;
687 asm volatile("movca.l r0, @%0\n\t"
688 "movca.l r0, @%1\n\t"
689 "movca.l r0, @%2\n\t"
690 "movca.l r0, @%3\n\t"
691 "ocbi @%0\n\t"
692 "ocbi @%1\n\t"
693 "ocbi @%2\n\t"
694 "ocbi @%3\n\t" : :
695 "r" (a0), "r" (a1), "r" (a2), "r" (a3));
696 a0 += linesz;
697 a1 += linesz;
698 a2 += linesz;
699 a3 += linesz;
700 asm volatile("movca.l r0, @%0\n\t"
701 "movca.l r0, @%1\n\t"
702 "movca.l r0, @%2\n\t"
703 "movca.l r0, @%3\n\t"
704 "ocbi @%0\n\t"
705 "ocbi @%1\n\t"
706 "ocbi @%2\n\t"
707 "ocbi @%3\n\t" : :
708 "r" (a0), "r" (a1), "r" (a2), "r" (a3));
709 a0 += linesz;
710 a1 += linesz;
711 a2 += linesz;
712 a3 += linesz;
713 asm volatile("movca.l r0, @%0\n\t"
714 "movca.l r0, @%1\n\t"
715 "movca.l r0, @%2\n\t"
716 "movca.l r0, @%3\n\t"
717 "ocbi @%0\n\t"
718 "ocbi @%1\n\t"
719 "ocbi @%2\n\t"
720 "ocbi @%3\n\t" : :
721 "r" (a0), "r" (a1), "r" (a2), "r" (a3));
722 asm volatile("ldc %0, sr" : : "r" (orig_sr));
723 a0 += linesz;
724 a1 += linesz;
725 a2 += linesz;
726 a3 += linesz;
727 } while (a0 < a0e);
728}
729
diff --git a/arch/sh/mm/clear_page.S b/arch/sh/mm/clear_page.S
index 08acead7b2a1..7b96425ae270 100644
--- a/arch/sh/mm/clear_page.S
+++ b/arch/sh/mm/clear_page.S
@@ -193,102 +193,5 @@ ENTRY(__clear_user_page)
193 nop 193 nop
194.L4096: .word 4096 194.L4096: .word 4096
195 195
196ENTRY(__flush_cache_4096)
197 mov.l 1f,r3
198 add r6,r3
199 mov r4,r0
200 mov #64,r2
201 shll r2
202 mov #64,r6
203 jmp @r3
204 mov #96,r7
205 .align 2
2061: .long 2f
2072:
208 .rept 32
209 mov.l r5,@r0
210 mov.l r5,@(32,r0)
211 mov.l r5,@(r0,r6)
212 mov.l r5,@(r0,r7)
213 add r2,r5
214 add r2,r0
215 .endr
216 nop
217 nop
218 nop
219 nop
220 nop
221 nop
222 nop
223 rts
224 nop
225
226ENTRY(__flush_dcache_all)
227 mov.l 2f,r0
228 mov.l 3f,r4
229 and r0,r4 ! r4 = (unsigned long)&empty_zero_page[0] & ~0xffffc000
230 stc sr,r1 ! save SR
231 mov.l 4f,r2
232 or r1,r2
233 mov #32,r3
234 shll2 r3
2351:
236 ldc r2,sr ! set BL bit
237 movca.l r0,@r4
238 ocbi @r4
239 add #32,r4
240 movca.l r0,@r4
241 ocbi @r4
242 add #32,r4
243 movca.l r0,@r4
244 ocbi @r4
245 add #32,r4
246 movca.l r0,@r4
247 ocbi @r4
248 ldc r1,sr ! restore SR
249 dt r3
250 bf/s 1b
251 add #32,r4
252
253 rts
254 nop
255 .align 2
2562: .long 0xffffc000
2573: .long empty_zero_page
2584: .long 0x10000000 ! BL bit
259
260/* __flush_cache_4096_all(unsigned long addr) */
261ENTRY(__flush_cache_4096_all)
262 mov.l 2f,r0
263 mov.l 3f,r2
264 and r0,r2
265 or r2,r4 ! r4 = addr | (unsigned long)&empty_zero_page[0] & ~0x3fff
266 stc sr,r1 ! save SR
267 mov.l 4f,r2
268 or r1,r2
269 mov #32,r3
2701:
271 ldc r2,sr ! set BL bit
272 movca.l r0,@r4
273 ocbi @r4
274 add #32,r4
275 movca.l r0,@r4
276 ocbi @r4
277 add #32,r4
278 movca.l r0,@r4
279 ocbi @r4
280 add #32,r4
281 movca.l r0,@r4
282 ocbi @r4
283 ldc r1,sr ! restore SR
284 dt r3
285 bf/s 1b
286 add #32,r4
287
288 rts
289 nop
290 .align 2
2912: .long 0xffffc000
2923: .long empty_zero_page
2934: .long 0x10000000 ! BL bit
294#endif 196#endif
197
diff --git a/include/asm-sh/cache.h b/include/asm-sh/cache.h
index 656fdfe9e8b4..33f13367054b 100644
--- a/include/asm-sh/cache.h
+++ b/include/asm-sh/cache.h
@@ -23,15 +23,29 @@
23#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)) 23#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
24 24
25struct cache_info { 25struct cache_info {
26 unsigned int ways; 26 unsigned int ways; /* Number of cache ways */
27 unsigned int sets; 27 unsigned int sets; /* Number of cache sets */
28 unsigned int linesz; 28 unsigned int linesz; /* Cache line size (bytes) */
29 29
30 unsigned int way_incr; 30 unsigned int way_size; /* sets * line size */
31 31
32 /*
33 * way_incr is the address offset for accessing the next way
34 * in memory mapped cache array ops.
35 */
36 unsigned int way_incr;
32 unsigned int entry_shift; 37 unsigned int entry_shift;
33 unsigned int entry_mask; 38 unsigned int entry_mask;
34 39
40 /*
41 * Compute a mask which selects the address bits which overlap between
42 * 1. those used to select the cache set during indexing
43 * 2. those in the physical page number.
44 */
45 unsigned int alias_mask;
46
47 unsigned int n_aliases; /* Number of aliases */
48
35 unsigned long flags; 49 unsigned long flags;
36}; 50};
37 51