diff options
Diffstat (limited to 'arch/sh/mm/cache-sh4.c')
| -rw-r--r-- | arch/sh/mm/cache-sh4.c | 685 |
1 files changed, 548 insertions, 137 deletions
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 524cea5b47f9..e48cc22724d9 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c | |||
| @@ -2,49 +2,120 @@ | |||
| 2 | * arch/sh/mm/cache-sh4.c | 2 | * arch/sh/mm/cache-sh4.c |
| 3 | * | 3 | * |
| 4 | * Copyright (C) 1999, 2000, 2002 Niibe Yutaka | 4 | * Copyright (C) 1999, 2000, 2002 Niibe Yutaka |
| 5 | * Copyright (C) 2001, 2002, 2003, 2004 Paul Mundt | 5 | * Copyright (C) 2001 - 2006 Paul Mundt |
| 6 | * Copyright (C) 2003 Richard Curnow | 6 | * Copyright (C) 2003 Richard Curnow |
| 7 | * | 7 | * |
| 8 | * This file is subject to the terms and conditions of the GNU General Public | 8 | * This file is subject to the terms and conditions of the GNU General Public |
| 9 | * License. See the file "COPYING" in the main directory of this archive | 9 | * License. See the file "COPYING" in the main directory of this archive |
| 10 | * for more details. | 10 | * for more details. |
| 11 | */ | 11 | */ |
| 12 | |||
| 13 | #include <linux/init.h> | 12 | #include <linux/init.h> |
| 14 | #include <linux/mman.h> | ||
| 15 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
| 16 | #include <linux/threads.h> | ||
| 17 | #include <asm/addrspace.h> | 14 | #include <asm/addrspace.h> |
| 18 | #include <asm/page.h> | ||
| 19 | #include <asm/pgtable.h> | 15 | #include <asm/pgtable.h> |
| 20 | #include <asm/processor.h> | 16 | #include <asm/processor.h> |
| 21 | #include <asm/cache.h> | 17 | #include <asm/cache.h> |
| 22 | #include <asm/io.h> | 18 | #include <asm/io.h> |
| 23 | #include <asm/uaccess.h> | ||
| 24 | #include <asm/pgalloc.h> | 19 | #include <asm/pgalloc.h> |
| 25 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
| 26 | #include <asm/cacheflush.h> | 21 | #include <asm/cacheflush.h> |
| 27 | 22 | ||
| 28 | extern void __flush_cache_4096_all(unsigned long start); | 23 | /* |
| 29 | static void __flush_cache_4096_all_ex(unsigned long start); | 24 | * The maximum number of pages we support up to when doing ranged dcache |
| 30 | extern void __flush_dcache_all(void); | 25 | * flushing. Anything exceeding this will simply flush the dcache in its |
| 31 | static void __flush_dcache_all_ex(void); | 26 | * entirety. |
| 27 | */ | ||
| 28 | #define MAX_DCACHE_PAGES 64 /* XXX: Tune for ways */ | ||
| 29 | |||
| 30 | static void __flush_dcache_segment_1way(unsigned long start, | ||
| 31 | unsigned long extent); | ||
| 32 | static void __flush_dcache_segment_2way(unsigned long start, | ||
| 33 | unsigned long extent); | ||
| 34 | static void __flush_dcache_segment_4way(unsigned long start, | ||
| 35 | unsigned long extent); | ||
| 36 | |||
| 37 | static void __flush_cache_4096(unsigned long addr, unsigned long phys, | ||
| 38 | unsigned long exec_offset); | ||
| 39 | |||
| 40 | /* | ||
| 41 | * This is initialised here to ensure that it is not placed in the BSS. If | ||
| 42 | * that were to happen, note that cache_init gets called before the BSS is | ||
| 43 | * cleared, so this would get nulled out which would be hopeless. | ||
| 44 | */ | ||
| 45 | static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) = | ||
| 46 | (void (*)(unsigned long, unsigned long))0xdeadbeef; | ||
| 47 | |||
| 48 | static void compute_alias(struct cache_info *c) | ||
| 49 | { | ||
| 50 | c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1); | ||
| 51 | c->n_aliases = (c->alias_mask >> PAGE_SHIFT) + 1; | ||
| 52 | } | ||
| 53 | |||
| 54 | static void __init emit_cache_params(void) | ||
| 55 | { | ||
| 56 | printk("PVR=%08x CVR=%08x PRR=%08x\n", | ||
| 57 | ctrl_inl(CCN_PVR), | ||
| 58 | ctrl_inl(CCN_CVR), | ||
| 59 | ctrl_inl(CCN_PRR)); | ||
| 60 | printk("I-cache : n_ways=%d n_sets=%d way_incr=%d\n", | ||
| 61 | cpu_data->icache.ways, | ||
| 62 | cpu_data->icache.sets, | ||
| 63 | cpu_data->icache.way_incr); | ||
| 64 | printk("I-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n", | ||
| 65 | cpu_data->icache.entry_mask, | ||
| 66 | cpu_data->icache.alias_mask, | ||
| 67 | cpu_data->icache.n_aliases); | ||
| 68 | printk("D-cache : n_ways=%d n_sets=%d way_incr=%d\n", | ||
| 69 | cpu_data->dcache.ways, | ||
| 70 | cpu_data->dcache.sets, | ||
| 71 | cpu_data->dcache.way_incr); | ||
| 72 | printk("D-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n", | ||
| 73 | cpu_data->dcache.entry_mask, | ||
| 74 | cpu_data->dcache.alias_mask, | ||
| 75 | cpu_data->dcache.n_aliases); | ||
| 76 | |||
| 77 | if (!__flush_dcache_segment_fn) | ||
| 78 | panic("unknown number of cache ways\n"); | ||
| 79 | } | ||
| 32 | 80 | ||
| 33 | /* | 81 | /* |
| 34 | * SH-4 has virtually indexed and physically tagged cache. | 82 | * SH-4 has virtually indexed and physically tagged cache. |
| 35 | */ | 83 | */ |
| 36 | 84 | ||
| 37 | struct semaphore p3map_sem[4]; | 85 | /* Worst case assumed to be 64k cache, direct-mapped i.e. 4 synonym bits. */ |
| 86 | #define MAX_P3_SEMAPHORES 16 | ||
| 87 | |||
| 88 | struct semaphore p3map_sem[MAX_P3_SEMAPHORES]; | ||
| 38 | 89 | ||
| 39 | void __init p3_cache_init(void) | 90 | void __init p3_cache_init(void) |
| 40 | { | 91 | { |
| 41 | if (remap_area_pages(P3SEG, 0, PAGE_SIZE*4, _PAGE_CACHABLE)) | 92 | int i; |
| 93 | |||
| 94 | compute_alias(&cpu_data->icache); | ||
| 95 | compute_alias(&cpu_data->dcache); | ||
| 96 | |||
| 97 | switch (cpu_data->dcache.ways) { | ||
| 98 | case 1: | ||
| 99 | __flush_dcache_segment_fn = __flush_dcache_segment_1way; | ||
| 100 | break; | ||
| 101 | case 2: | ||
| 102 | __flush_dcache_segment_fn = __flush_dcache_segment_2way; | ||
| 103 | break; | ||
| 104 | case 4: | ||
| 105 | __flush_dcache_segment_fn = __flush_dcache_segment_4way; | ||
| 106 | break; | ||
| 107 | default: | ||
| 108 | __flush_dcache_segment_fn = NULL; | ||
| 109 | break; | ||
| 110 | } | ||
| 111 | |||
| 112 | emit_cache_params(); | ||
| 113 | |||
| 114 | if (remap_area_pages(P3SEG, 0, PAGE_SIZE * 4, _PAGE_CACHABLE)) | ||
| 42 | panic("%s failed.", __FUNCTION__); | 115 | panic("%s failed.", __FUNCTION__); |
| 43 | 116 | ||
| 44 | sema_init (&p3map_sem[0], 1); | 117 | for (i = 0; i < cpu_data->dcache.n_aliases; i++) |
| 45 | sema_init (&p3map_sem[1], 1); | 118 | sema_init(&p3map_sem[i], 1); |
| 46 | sema_init (&p3map_sem[2], 1); | ||
| 47 | sema_init (&p3map_sem[3], 1); | ||
| 48 | } | 119 | } |
| 49 | 120 | ||
| 50 | /* | 121 | /* |
| @@ -89,7 +160,6 @@ void __flush_purge_region(void *start, int size) | |||
| 89 | } | 160 | } |
| 90 | } | 161 | } |
| 91 | 162 | ||
| 92 | |||
| 93 | /* | 163 | /* |
| 94 | * No write back please | 164 | * No write back please |
| 95 | */ | 165 | */ |
| @@ -108,40 +178,6 @@ void __flush_invalidate_region(void *start, int size) | |||
| 108 | } | 178 | } |
| 109 | } | 179 | } |
| 110 | 180 | ||
| 111 | static void __flush_dcache_all_ex(void) | ||
| 112 | { | ||
| 113 | unsigned long addr, end_addr, entry_offset; | ||
| 114 | |||
| 115 | end_addr = CACHE_OC_ADDRESS_ARRAY + (cpu_data->dcache.sets << cpu_data->dcache.entry_shift) * cpu_data->dcache.ways; | ||
| 116 | entry_offset = 1 << cpu_data->dcache.entry_shift; | ||
| 117 | for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; addr += entry_offset) { | ||
| 118 | ctrl_outl(0, addr); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | static void __flush_cache_4096_all_ex(unsigned long start) | ||
| 123 | { | ||
| 124 | unsigned long addr, entry_offset; | ||
| 125 | int i; | ||
| 126 | |||
| 127 | entry_offset = 1 << cpu_data->dcache.entry_shift; | ||
| 128 | for (i = 0; i < cpu_data->dcache.ways; i++, start += cpu_data->dcache.way_incr) { | ||
| 129 | for (addr = CACHE_OC_ADDRESS_ARRAY + start; | ||
| 130 | addr < CACHE_OC_ADDRESS_ARRAY + 4096 + start; | ||
| 131 | addr += entry_offset) { | ||
| 132 | ctrl_outl(0, addr); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | } | ||
| 136 | |||
| 137 | void flush_cache_4096_all(unsigned long start) | ||
| 138 | { | ||
| 139 | if (cpu_data->dcache.ways == 1) | ||
| 140 | __flush_cache_4096_all(start); | ||
| 141 | else | ||
| 142 | __flush_cache_4096_all_ex(start); | ||
| 143 | } | ||
| 144 | |||
| 145 | /* | 181 | /* |
| 146 | * Write back the range of D-cache, and purge the I-cache. | 182 | * Write back the range of D-cache, and purge the I-cache. |
| 147 | * | 183 | * |
| @@ -153,14 +189,14 @@ void flush_icache_range(unsigned long start, unsigned long end) | |||
| 153 | } | 189 | } |
| 154 | 190 | ||
| 155 | /* | 191 | /* |
| 156 | * Write back the D-cache and purge the I-cache for signal trampoline. | 192 | * Write back the D-cache and purge the I-cache for signal trampoline. |
| 157 | * .. which happens to be the same behavior as flush_icache_range(). | 193 | * .. which happens to be the same behavior as flush_icache_range(). |
| 158 | * So, we simply flush out a line. | 194 | * So, we simply flush out a line. |
| 159 | */ | 195 | */ |
| 160 | void flush_cache_sigtramp(unsigned long addr) | 196 | void flush_cache_sigtramp(unsigned long addr) |
| 161 | { | 197 | { |
| 162 | unsigned long v, index; | 198 | unsigned long v, index; |
| 163 | unsigned long flags; | 199 | unsigned long flags; |
| 164 | int i; | 200 | int i; |
| 165 | 201 | ||
| 166 | v = addr & ~(L1_CACHE_BYTES-1); | 202 | v = addr & ~(L1_CACHE_BYTES-1); |
| @@ -172,30 +208,33 @@ void flush_cache_sigtramp(unsigned long addr) | |||
| 172 | 208 | ||
| 173 | local_irq_save(flags); | 209 | local_irq_save(flags); |
| 174 | jump_to_P2(); | 210 | jump_to_P2(); |
| 175 | for(i = 0; i < cpu_data->icache.ways; i++, index += cpu_data->icache.way_incr) | 211 | |
| 212 | for (i = 0; i < cpu_data->icache.ways; | ||
| 213 | i++, index += cpu_data->icache.way_incr) | ||
| 176 | ctrl_outl(0, index); /* Clear out Valid-bit */ | 214 | ctrl_outl(0, index); /* Clear out Valid-bit */ |
| 215 | |||
| 177 | back_to_P1(); | 216 | back_to_P1(); |
| 217 | wmb(); | ||
| 178 | local_irq_restore(flags); | 218 | local_irq_restore(flags); |
| 179 | } | 219 | } |
| 180 | 220 | ||
| 181 | static inline void flush_cache_4096(unsigned long start, | 221 | static inline void flush_cache_4096(unsigned long start, |
| 182 | unsigned long phys) | 222 | unsigned long phys) |
| 183 | { | 223 | { |
| 184 | unsigned long flags; | 224 | unsigned long flags, exec_offset = 0; |
| 185 | extern void __flush_cache_4096(unsigned long addr, unsigned long phys, unsigned long exec_offset); | ||
| 186 | 225 | ||
| 187 | /* | 226 | /* |
| 188 | * SH7751, SH7751R, and ST40 have no restriction to handle cache. | 227 | * All types of SH-4 require PC to be in P2 to operate on the I-cache. |
| 189 | * (While SH7750 must do that at P2 area.) | 228 | * Some types of SH-4 require PC to be in P2 to operate on the D-cache. |
| 190 | */ | 229 | */ |
| 191 | if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) | 230 | if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) || |
| 192 | || start < CACHE_OC_ADDRESS_ARRAY) { | 231 | (start < CACHE_OC_ADDRESS_ARRAY)) |
| 193 | local_irq_save(flags); | 232 | exec_offset = 0x20000000; |
| 194 | __flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), 0x20000000); | 233 | |
| 195 | local_irq_restore(flags); | 234 | local_irq_save(flags); |
| 196 | } else { | 235 | __flush_cache_4096(start | SH_CACHE_ASSOC, |
| 197 | __flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), 0); | 236 | P1SEGADDR(phys), exec_offset); |
| 198 | } | 237 | local_irq_restore(flags); |
| 199 | } | 238 | } |
| 200 | 239 | ||
| 201 | /* | 240 | /* |
| @@ -206,15 +245,19 @@ void flush_dcache_page(struct page *page) | |||
| 206 | { | 245 | { |
| 207 | if (test_bit(PG_mapped, &page->flags)) { | 246 | if (test_bit(PG_mapped, &page->flags)) { |
| 208 | unsigned long phys = PHYSADDR(page_address(page)); | 247 | unsigned long phys = PHYSADDR(page_address(page)); |
| 248 | unsigned long addr = CACHE_OC_ADDRESS_ARRAY; | ||
| 249 | int i, n; | ||
| 209 | 250 | ||
| 210 | /* Loop all the D-cache */ | 251 | /* Loop all the D-cache */ |
| 211 | flush_cache_4096(CACHE_OC_ADDRESS_ARRAY, phys); | 252 | n = cpu_data->dcache.n_aliases; |
| 212 | flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x1000, phys); | 253 | for (i = 0; i < n; i++, addr += PAGE_SIZE) |
| 213 | flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x2000, phys); | 254 | flush_cache_4096(addr, phys); |
| 214 | flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x3000, phys); | ||
| 215 | } | 255 | } |
| 256 | |||
| 257 | wmb(); | ||
| 216 | } | 258 | } |
| 217 | 259 | ||
| 260 | /* TODO: Selective icache invalidation through IC address array.. */ | ||
| 218 | static inline void flush_icache_all(void) | 261 | static inline void flush_icache_all(void) |
| 219 | { | 262 | { |
| 220 | unsigned long flags, ccr; | 263 | unsigned long flags, ccr; |
| @@ -227,34 +270,142 @@ static inline void flush_icache_all(void) | |||
| 227 | ccr |= CCR_CACHE_ICI; | 270 | ccr |= CCR_CACHE_ICI; |
| 228 | ctrl_outl(ccr, CCR); | 271 | ctrl_outl(ccr, CCR); |
| 229 | 272 | ||
| 273 | /* | ||
| 274 | * back_to_P1() will take care of the barrier for us, don't add | ||
| 275 | * another one! | ||
| 276 | */ | ||
| 277 | |||
| 230 | back_to_P1(); | 278 | back_to_P1(); |
| 231 | local_irq_restore(flags); | 279 | local_irq_restore(flags); |
| 232 | } | 280 | } |
| 233 | 281 | ||
| 282 | void flush_dcache_all(void) | ||
| 283 | { | ||
| 284 | (*__flush_dcache_segment_fn)(0UL, cpu_data->dcache.way_size); | ||
| 285 | wmb(); | ||
| 286 | } | ||
| 287 | |||
| 234 | void flush_cache_all(void) | 288 | void flush_cache_all(void) |
| 235 | { | 289 | { |
| 236 | if (cpu_data->dcache.ways == 1) | 290 | flush_dcache_all(); |
| 237 | __flush_dcache_all(); | ||
| 238 | else | ||
| 239 | __flush_dcache_all_ex(); | ||
| 240 | flush_icache_all(); | 291 | flush_icache_all(); |
| 241 | } | 292 | } |
| 242 | 293 | ||
| 294 | static void __flush_cache_mm(struct mm_struct *mm, unsigned long start, | ||
| 295 | unsigned long end) | ||
| 296 | { | ||
| 297 | unsigned long d = 0, p = start & PAGE_MASK; | ||
| 298 | unsigned long alias_mask = cpu_data->dcache.alias_mask; | ||
| 299 | unsigned long n_aliases = cpu_data->dcache.n_aliases; | ||
| 300 | unsigned long select_bit; | ||
| 301 | unsigned long all_aliases_mask; | ||
| 302 | unsigned long addr_offset; | ||
| 303 | pgd_t *dir; | ||
| 304 | pmd_t *pmd; | ||
| 305 | pud_t *pud; | ||
| 306 | pte_t *pte; | ||
| 307 | int i; | ||
| 308 | |||
| 309 | dir = pgd_offset(mm, p); | ||
| 310 | pud = pud_offset(dir, p); | ||
| 311 | pmd = pmd_offset(pud, p); | ||
| 312 | end = PAGE_ALIGN(end); | ||
| 313 | |||
| 314 | all_aliases_mask = (1 << n_aliases) - 1; | ||
| 315 | |||
| 316 | do { | ||
| 317 | if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) { | ||
| 318 | p &= PMD_MASK; | ||
| 319 | p += PMD_SIZE; | ||
| 320 | pmd++; | ||
| 321 | |||
| 322 | continue; | ||
| 323 | } | ||
| 324 | |||
| 325 | pte = pte_offset_kernel(pmd, p); | ||
| 326 | |||
| 327 | do { | ||
| 328 | unsigned long phys; | ||
| 329 | pte_t entry = *pte; | ||
| 330 | |||
| 331 | if (!(pte_val(entry) & _PAGE_PRESENT)) { | ||
| 332 | pte++; | ||
| 333 | p += PAGE_SIZE; | ||
| 334 | continue; | ||
| 335 | } | ||
| 336 | |||
| 337 | phys = pte_val(entry) & PTE_PHYS_MASK; | ||
| 338 | |||
| 339 | if ((p ^ phys) & alias_mask) { | ||
| 340 | d |= 1 << ((p & alias_mask) >> PAGE_SHIFT); | ||
| 341 | d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT); | ||
| 342 | |||
| 343 | if (d == all_aliases_mask) | ||
| 344 | goto loop_exit; | ||
| 345 | } | ||
| 346 | |||
| 347 | pte++; | ||
| 348 | p += PAGE_SIZE; | ||
| 349 | } while (p < end && ((unsigned long)pte & ~PAGE_MASK)); | ||
| 350 | pmd++; | ||
| 351 | } while (p < end); | ||
| 352 | |||
| 353 | loop_exit: | ||
| 354 | addr_offset = 0; | ||
| 355 | select_bit = 1; | ||
| 356 | |||
| 357 | for (i = 0; i < n_aliases; i++) { | ||
| 358 | if (d & select_bit) { | ||
| 359 | (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE); | ||
| 360 | wmb(); | ||
| 361 | } | ||
| 362 | |||
| 363 | select_bit <<= 1; | ||
| 364 | addr_offset += PAGE_SIZE; | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 368 | /* | ||
| 369 | * Note : (RPC) since the caches are physically tagged, the only point | ||
| 370 | * of flush_cache_mm for SH-4 is to get rid of aliases from the | ||
| 371 | * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that | ||
| 372 | * lines can stay resident so long as the virtual address they were | ||
| 373 | * accessed with (hence cache set) is in accord with the physical | ||
| 374 | * address (i.e. tag). It's no different here. So I reckon we don't | ||
| 375 | * need to flush the I-cache, since aliases don't matter for that. We | ||
| 376 | * should try that. | ||
| 377 | * | ||
| 378 | * Caller takes mm->mmap_sem. | ||
| 379 | */ | ||
| 243 | void flush_cache_mm(struct mm_struct *mm) | 380 | void flush_cache_mm(struct mm_struct *mm) |
| 244 | { | 381 | { |
| 245 | /* Is there any good way? */ | 382 | /* |
| 246 | /* XXX: possibly call flush_cache_range for each vm area */ | 383 | * If cache is only 4k-per-way, there are never any 'aliases'. Since |
| 247 | /* | 384 | * the cache is physically tagged, the data can just be left in there. |
| 248 | * FIXME: Really, the optimal solution here would be able to flush out | ||
| 249 | * individual lines created by the specified context, but this isn't | ||
| 250 | * feasible for a number of architectures (such as MIPS, and some | ||
| 251 | * SPARC) .. is this possible for SuperH? | ||
| 252 | * | ||
| 253 | * In the meantime, we'll just flush all of the caches.. this | ||
| 254 | * seems to be the simplest way to avoid at least a few wasted | ||
| 255 | * cache flushes. -Lethal | ||
| 256 | */ | 385 | */ |
| 257 | flush_cache_all(); | 386 | if (cpu_data->dcache.n_aliases == 0) |
| 387 | return; | ||
| 388 | |||
| 389 | /* | ||
| 390 | * Don't bother groveling around the dcache for the VMA ranges | ||
| 391 | * if there are too many PTEs to make it worthwhile. | ||
| 392 | */ | ||
| 393 | if (mm->nr_ptes >= MAX_DCACHE_PAGES) | ||
| 394 | flush_dcache_all(); | ||
| 395 | else { | ||
| 396 | struct vm_area_struct *vma; | ||
| 397 | |||
| 398 | /* | ||
| 399 | * In this case there are reasonably sized ranges to flush, | ||
| 400 | * iterate through the VMA list and take care of any aliases. | ||
| 401 | */ | ||
| 402 | for (vma = mm->mmap; vma; vma = vma->vm_next) | ||
| 403 | __flush_cache_mm(mm, vma->vm_start, vma->vm_end); | ||
| 404 | } | ||
| 405 | |||
| 406 | /* Only touch the icache if one of the VMAs has VM_EXEC set. */ | ||
| 407 | if (mm->exec_vm) | ||
| 408 | flush_icache_all(); | ||
| 258 | } | 409 | } |
| 259 | 410 | ||
| 260 | /* | 411 | /* |
| @@ -263,27 +414,40 @@ void flush_cache_mm(struct mm_struct *mm) | |||
| 263 | * ADDR: Virtual Address (U0 address) | 414 | * ADDR: Virtual Address (U0 address) |
| 264 | * PFN: Physical page number | 415 | * PFN: Physical page number |
| 265 | */ | 416 | */ |
| 266 | void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long pfn) | 417 | void flush_cache_page(struct vm_area_struct *vma, unsigned long address, |
| 418 | unsigned long pfn) | ||
| 267 | { | 419 | { |
| 268 | unsigned long phys = pfn << PAGE_SHIFT; | 420 | unsigned long phys = pfn << PAGE_SHIFT; |
| 421 | unsigned int alias_mask; | ||
| 422 | |||
| 423 | alias_mask = cpu_data->dcache.alias_mask; | ||
| 269 | 424 | ||
| 270 | /* We only need to flush D-cache when we have alias */ | 425 | /* We only need to flush D-cache when we have alias */ |
| 271 | if ((address^phys) & CACHE_ALIAS) { | 426 | if ((address^phys) & alias_mask) { |
| 272 | /* Loop 4K of the D-cache */ | 427 | /* Loop 4K of the D-cache */ |
| 273 | flush_cache_4096( | 428 | flush_cache_4096( |
| 274 | CACHE_OC_ADDRESS_ARRAY | (address & CACHE_ALIAS), | 429 | CACHE_OC_ADDRESS_ARRAY | (address & alias_mask), |
| 275 | phys); | 430 | phys); |
| 276 | /* Loop another 4K of the D-cache */ | 431 | /* Loop another 4K of the D-cache */ |
| 277 | flush_cache_4096( | 432 | flush_cache_4096( |
| 278 | CACHE_OC_ADDRESS_ARRAY | (phys & CACHE_ALIAS), | 433 | CACHE_OC_ADDRESS_ARRAY | (phys & alias_mask), |
| 279 | phys); | 434 | phys); |
| 280 | } | 435 | } |
| 281 | 436 | ||
| 282 | if (vma->vm_flags & VM_EXEC) | 437 | alias_mask = cpu_data->icache.alias_mask; |
| 283 | /* Loop 4K (half) of the I-cache */ | 438 | if (vma->vm_flags & VM_EXEC) { |
| 439 | /* | ||
| 440 | * Evict entries from the portion of the cache from which code | ||
| 441 | * may have been executed at this address (virtual). There's | ||
| 442 | * no need to evict from the portion corresponding to the | ||
| 443 | * physical address as for the D-cache, because we know the | ||
| 444 | * kernel has never executed the code through its identity | ||
| 445 | * translation. | ||
| 446 | */ | ||
| 284 | flush_cache_4096( | 447 | flush_cache_4096( |
| 285 | CACHE_IC_ADDRESS_ARRAY | (address & 0x1000), | 448 | CACHE_IC_ADDRESS_ARRAY | (address & alias_mask), |
| 286 | phys); | 449 | phys); |
| 450 | } | ||
| 287 | } | 451 | } |
| 288 | 452 | ||
| 289 | /* | 453 | /* |
| @@ -298,52 +462,31 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigne | |||
| 298 | void flush_cache_range(struct vm_area_struct *vma, unsigned long start, | 462 | void flush_cache_range(struct vm_area_struct *vma, unsigned long start, |
| 299 | unsigned long end) | 463 | unsigned long end) |
| 300 | { | 464 | { |
| 301 | unsigned long p = start & PAGE_MASK; | 465 | /* |
| 302 | pgd_t *dir; | 466 | * If cache is only 4k-per-way, there are never any 'aliases'. Since |
| 303 | pmd_t *pmd; | 467 | * the cache is physically tagged, the data can just be left in there. |
| 304 | pte_t *pte; | 468 | */ |
| 305 | pte_t entry; | 469 | if (cpu_data->dcache.n_aliases == 0) |
| 306 | unsigned long phys; | 470 | return; |
| 307 | unsigned long d = 0; | ||
| 308 | |||
| 309 | dir = pgd_offset(vma->vm_mm, p); | ||
| 310 | pmd = pmd_offset(dir, p); | ||
| 311 | 471 | ||
| 312 | do { | 472 | /* |
| 313 | if (pmd_none(*pmd) || pmd_bad(*pmd)) { | 473 | * Don't bother with the lookup and alias check if we have a |
| 314 | p &= ~((1 << PMD_SHIFT) -1); | 474 | * wide range to cover, just blow away the dcache in its |
| 315 | p += (1 << PMD_SHIFT); | 475 | * entirety instead. -- PFM. |
| 316 | pmd++; | 476 | */ |
| 317 | continue; | 477 | if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES) |
| 318 | } | 478 | flush_dcache_all(); |
| 319 | pte = pte_offset_kernel(pmd, p); | 479 | else |
| 320 | do { | 480 | __flush_cache_mm(vma->vm_mm, start, end); |
| 321 | entry = *pte; | 481 | |
| 322 | if ((pte_val(entry) & _PAGE_PRESENT)) { | 482 | if (vma->vm_flags & VM_EXEC) { |
| 323 | phys = pte_val(entry)&PTE_PHYS_MASK; | 483 | /* |
| 324 | if ((p^phys) & CACHE_ALIAS) { | 484 | * TODO: Is this required??? Need to look at how I-cache |
| 325 | d |= 1 << ((p & CACHE_ALIAS)>>12); | 485 | * coherency is assured when new programs are loaded to see if |
| 326 | d |= 1 << ((phys & CACHE_ALIAS)>>12); | 486 | * this matters. |
| 327 | if (d == 0x0f) | 487 | */ |
| 328 | goto loop_exit; | ||
| 329 | } | ||
| 330 | } | ||
| 331 | pte++; | ||
| 332 | p += PAGE_SIZE; | ||
| 333 | } while (p < end && ((unsigned long)pte & ~PAGE_MASK)); | ||
| 334 | pmd++; | ||
| 335 | } while (p < end); | ||
| 336 | loop_exit: | ||
| 337 | if (d & 1) | ||
| 338 | flush_cache_4096_all(0); | ||
| 339 | if (d & 2) | ||
| 340 | flush_cache_4096_all(0x1000); | ||
| 341 | if (d & 4) | ||
| 342 | flush_cache_4096_all(0x2000); | ||
| 343 | if (d & 8) | ||
| 344 | flush_cache_4096_all(0x3000); | ||
| 345 | if (vma->vm_flags & VM_EXEC) | ||
| 346 | flush_icache_all(); | 488 | flush_icache_all(); |
| 489 | } | ||
| 347 | } | 490 | } |
| 348 | 491 | ||
| 349 | /* | 492 | /* |
| @@ -357,5 +500,273 @@ void flush_icache_user_range(struct vm_area_struct *vma, | |||
| 357 | struct page *page, unsigned long addr, int len) | 500 | struct page *page, unsigned long addr, int len) |
| 358 | { | 501 | { |
| 359 | flush_cache_page(vma, addr, page_to_pfn(page)); | 502 | flush_cache_page(vma, addr, page_to_pfn(page)); |
| 503 | mb(); | ||
| 504 | } | ||
| 505 | |||
| 506 | /** | ||
| 507 | * __flush_cache_4096 | ||
| 508 | * | ||
| 509 | * @addr: address in memory mapped cache array | ||
| 510 | * @phys: P1 address to flush (has to match tags if addr has 'A' bit | ||
| 511 | * set i.e. associative write) | ||
| 512 | * @exec_offset: set to 0x20000000 if flush has to be executed from P2 | ||
| 513 | * region else 0x0 | ||
| 514 | * | ||
| 515 | * The offset into the cache array implied by 'addr' selects the | ||
| 516 | * 'colour' of the virtual address range that will be flushed. The | ||
| 517 | * operation (purge/write-back) is selected by the lower 2 bits of | ||
| 518 | * 'phys'. | ||
| 519 | */ | ||
| 520 | static void __flush_cache_4096(unsigned long addr, unsigned long phys, | ||
| 521 | unsigned long exec_offset) | ||
| 522 | { | ||
| 523 | int way_count; | ||
| 524 | unsigned long base_addr = addr; | ||
| 525 | struct cache_info *dcache; | ||
| 526 | unsigned long way_incr; | ||
| 527 | unsigned long a, ea, p; | ||
| 528 | unsigned long temp_pc; | ||
| 529 | |||
| 530 | dcache = &cpu_data->dcache; | ||
| 531 | /* Write this way for better assembly. */ | ||
| 532 | way_count = dcache->ways; | ||
| 533 | way_incr = dcache->way_incr; | ||
| 534 | |||
| 535 | /* | ||
| 536 | * Apply exec_offset (i.e. branch to P2 if required.). | ||
| 537 | * | ||
| 538 | * FIXME: | ||
| 539 | * | ||
| 540 | * If I write "=r" for the (temp_pc), it puts this in r6 hence | ||
| 541 | * trashing exec_offset before it's been added on - why? Hence | ||
| 542 | * "=&r" as a 'workaround' | ||
| 543 | */ | ||
| 544 | asm volatile("mov.l 1f, %0\n\t" | ||
| 545 | "add %1, %0\n\t" | ||
| 546 | "jmp @%0\n\t" | ||
| 547 | "nop\n\t" | ||
| 548 | ".balign 4\n\t" | ||
| 549 | "1: .long 2f\n\t" | ||
| 550 | "2:\n" : "=&r" (temp_pc) : "r" (exec_offset)); | ||
| 551 | |||
| 552 | /* | ||
| 553 | * We know there will be >=1 iteration, so write as do-while to avoid | ||
| 554 | * pointless nead-of-loop check for 0 iterations. | ||
| 555 | */ | ||
| 556 | do { | ||
| 557 | ea = base_addr + PAGE_SIZE; | ||
| 558 | a = base_addr; | ||
| 559 | p = phys; | ||
| 560 | |||
| 561 | do { | ||
| 562 | *(volatile unsigned long *)a = p; | ||
| 563 | /* | ||
| 564 | * Next line: intentionally not p+32, saves an add, p | ||
| 565 | * will do since only the cache tag bits need to | ||
| 566 | * match. | ||
| 567 | */ | ||
| 568 | *(volatile unsigned long *)(a+32) = p; | ||
| 569 | a += 64; | ||
| 570 | p += 64; | ||
| 571 | } while (a < ea); | ||
| 572 | |||
| 573 | base_addr += way_incr; | ||
| 574 | } while (--way_count != 0); | ||
| 360 | } | 575 | } |
| 361 | 576 | ||
| 577 | /* | ||
| 578 | * Break the 1, 2 and 4 way variants of this out into separate functions to | ||
| 579 | * avoid nearly all the overhead of having the conditional stuff in the function | ||
| 580 | * bodies (+ the 1 and 2 way cases avoid saving any registers too). | ||
| 581 | */ | ||
| 582 | static void __flush_dcache_segment_1way(unsigned long start, | ||
| 583 | unsigned long extent_per_way) | ||
| 584 | { | ||
| 585 | unsigned long orig_sr, sr_with_bl; | ||
| 586 | unsigned long base_addr; | ||
| 587 | unsigned long way_incr, linesz, way_size; | ||
| 588 | struct cache_info *dcache; | ||
| 589 | register unsigned long a0, a0e; | ||
| 590 | |||
| 591 | asm volatile("stc sr, %0" : "=r" (orig_sr)); | ||
| 592 | sr_with_bl = orig_sr | (1<<28); | ||
| 593 | base_addr = ((unsigned long)&empty_zero_page[0]); | ||
| 594 | |||
| 595 | /* | ||
| 596 | * The previous code aligned base_addr to 16k, i.e. the way_size of all | ||
| 597 | * existing SH-4 D-caches. Whilst I don't see a need to have this | ||
| 598 | * aligned to any better than the cache line size (which it will be | ||
| 599 | * anyway by construction), let's align it to at least the way_size of | ||
| 600 | * any existing or conceivable SH-4 D-cache. -- RPC | ||
| 601 | */ | ||
| 602 | base_addr = ((base_addr >> 16) << 16); | ||
| 603 | base_addr |= start; | ||
| 604 | |||
| 605 | dcache = &cpu_data->dcache; | ||
| 606 | linesz = dcache->linesz; | ||
| 607 | way_incr = dcache->way_incr; | ||
| 608 | way_size = dcache->way_size; | ||
| 609 | |||
| 610 | a0 = base_addr; | ||
| 611 | a0e = base_addr + extent_per_way; | ||
| 612 | do { | ||
| 613 | asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); | ||
| 614 | asm volatile("movca.l r0, @%0\n\t" | ||
| 615 | "ocbi @%0" : : "r" (a0)); | ||
| 616 | a0 += linesz; | ||
| 617 | asm volatile("movca.l r0, @%0\n\t" | ||
| 618 | "ocbi @%0" : : "r" (a0)); | ||
| 619 | a0 += linesz; | ||
| 620 | asm volatile("movca.l r0, @%0\n\t" | ||
| 621 | "ocbi @%0" : : "r" (a0)); | ||
| 622 | a0 += linesz; | ||
| 623 | asm volatile("movca.l r0, @%0\n\t" | ||
| 624 | "ocbi @%0" : : "r" (a0)); | ||
| 625 | asm volatile("ldc %0, sr" : : "r" (orig_sr)); | ||
| 626 | a0 += linesz; | ||
| 627 | } while (a0 < a0e); | ||
| 628 | } | ||
| 629 | |||
| 630 | static void __flush_dcache_segment_2way(unsigned long start, | ||
| 631 | unsigned long extent_per_way) | ||
| 632 | { | ||
| 633 | unsigned long orig_sr, sr_with_bl; | ||
| 634 | unsigned long base_addr; | ||
| 635 | unsigned long way_incr, linesz, way_size; | ||
| 636 | struct cache_info *dcache; | ||
| 637 | register unsigned long a0, a1, a0e; | ||
| 638 | |||
| 639 | asm volatile("stc sr, %0" : "=r" (orig_sr)); | ||
| 640 | sr_with_bl = orig_sr | (1<<28); | ||
| 641 | base_addr = ((unsigned long)&empty_zero_page[0]); | ||
| 642 | |||
| 643 | /* See comment under 1-way above */ | ||
| 644 | base_addr = ((base_addr >> 16) << 16); | ||
| 645 | base_addr |= start; | ||
| 646 | |||
| 647 | dcache = &cpu_data->dcache; | ||
| 648 | linesz = dcache->linesz; | ||
| 649 | way_incr = dcache->way_incr; | ||
| 650 | way_size = dcache->way_size; | ||
| 651 | |||
| 652 | a0 = base_addr; | ||
| 653 | a1 = a0 + way_incr; | ||
| 654 | a0e = base_addr + extent_per_way; | ||
| 655 | do { | ||
| 656 | asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); | ||
| 657 | asm volatile("movca.l r0, @%0\n\t" | ||
| 658 | "movca.l r0, @%1\n\t" | ||
| 659 | "ocbi @%0\n\t" | ||
| 660 | "ocbi @%1" : : | ||
| 661 | "r" (a0), "r" (a1)); | ||
| 662 | a0 += linesz; | ||
| 663 | a1 += linesz; | ||
| 664 | asm volatile("movca.l r0, @%0\n\t" | ||
| 665 | "movca.l r0, @%1\n\t" | ||
| 666 | "ocbi @%0\n\t" | ||
| 667 | "ocbi @%1" : : | ||
| 668 | "r" (a0), "r" (a1)); | ||
| 669 | a0 += linesz; | ||
| 670 | a1 += linesz; | ||
| 671 | asm volatile("movca.l r0, @%0\n\t" | ||
| 672 | "movca.l r0, @%1\n\t" | ||
| 673 | "ocbi @%0\n\t" | ||
| 674 | "ocbi @%1" : : | ||
| 675 | "r" (a0), "r" (a1)); | ||
| 676 | a0 += linesz; | ||
| 677 | a1 += linesz; | ||
| 678 | asm volatile("movca.l r0, @%0\n\t" | ||
| 679 | "movca.l r0, @%1\n\t" | ||
| 680 | "ocbi @%0\n\t" | ||
| 681 | "ocbi @%1" : : | ||
| 682 | "r" (a0), "r" (a1)); | ||
| 683 | asm volatile("ldc %0, sr" : : "r" (orig_sr)); | ||
| 684 | a0 += linesz; | ||
| 685 | a1 += linesz; | ||
| 686 | } while (a0 < a0e); | ||
| 687 | } | ||
| 688 | |||
| 689 | static void __flush_dcache_segment_4way(unsigned long start, | ||
| 690 | unsigned long extent_per_way) | ||
| 691 | { | ||
| 692 | unsigned long orig_sr, sr_with_bl; | ||
| 693 | unsigned long base_addr; | ||
| 694 | unsigned long way_incr, linesz, way_size; | ||
| 695 | struct cache_info *dcache; | ||
| 696 | register unsigned long a0, a1, a2, a3, a0e; | ||
| 697 | |||
| 698 | asm volatile("stc sr, %0" : "=r" (orig_sr)); | ||
| 699 | sr_with_bl = orig_sr | (1<<28); | ||
| 700 | base_addr = ((unsigned long)&empty_zero_page[0]); | ||
| 701 | |||
| 702 | /* See comment under 1-way above */ | ||
| 703 | base_addr = ((base_addr >> 16) << 16); | ||
| 704 | base_addr |= start; | ||
| 705 | |||
| 706 | dcache = &cpu_data->dcache; | ||
| 707 | linesz = dcache->linesz; | ||
| 708 | way_incr = dcache->way_incr; | ||
| 709 | way_size = dcache->way_size; | ||
| 710 | |||
| 711 | a0 = base_addr; | ||
| 712 | a1 = a0 + way_incr; | ||
| 713 | a2 = a1 + way_incr; | ||
| 714 | a3 = a2 + way_incr; | ||
| 715 | a0e = base_addr + extent_per_way; | ||
| 716 | do { | ||
| 717 | asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); | ||
| 718 | asm volatile("movca.l r0, @%0\n\t" | ||
| 719 | "movca.l r0, @%1\n\t" | ||
| 720 | "movca.l r0, @%2\n\t" | ||
| 721 | "movca.l r0, @%3\n\t" | ||
| 722 | "ocbi @%0\n\t" | ||
| 723 | "ocbi @%1\n\t" | ||
| 724 | "ocbi @%2\n\t" | ||
| 725 | "ocbi @%3\n\t" : : | ||
| 726 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
| 727 | a0 += linesz; | ||
| 728 | a1 += linesz; | ||
| 729 | a2 += linesz; | ||
| 730 | a3 += linesz; | ||
| 731 | asm volatile("movca.l r0, @%0\n\t" | ||
| 732 | "movca.l r0, @%1\n\t" | ||
| 733 | "movca.l r0, @%2\n\t" | ||
| 734 | "movca.l r0, @%3\n\t" | ||
| 735 | "ocbi @%0\n\t" | ||
| 736 | "ocbi @%1\n\t" | ||
| 737 | "ocbi @%2\n\t" | ||
| 738 | "ocbi @%3\n\t" : : | ||
| 739 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
| 740 | a0 += linesz; | ||
| 741 | a1 += linesz; | ||
| 742 | a2 += linesz; | ||
| 743 | a3 += linesz; | ||
| 744 | asm volatile("movca.l r0, @%0\n\t" | ||
| 745 | "movca.l r0, @%1\n\t" | ||
| 746 | "movca.l r0, @%2\n\t" | ||
| 747 | "movca.l r0, @%3\n\t" | ||
| 748 | "ocbi @%0\n\t" | ||
| 749 | "ocbi @%1\n\t" | ||
| 750 | "ocbi @%2\n\t" | ||
| 751 | "ocbi @%3\n\t" : : | ||
| 752 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
| 753 | a0 += linesz; | ||
| 754 | a1 += linesz; | ||
| 755 | a2 += linesz; | ||
| 756 | a3 += linesz; | ||
| 757 | asm volatile("movca.l r0, @%0\n\t" | ||
| 758 | "movca.l r0, @%1\n\t" | ||
| 759 | "movca.l r0, @%2\n\t" | ||
| 760 | "movca.l r0, @%3\n\t" | ||
| 761 | "ocbi @%0\n\t" | ||
| 762 | "ocbi @%1\n\t" | ||
| 763 | "ocbi @%2\n\t" | ||
| 764 | "ocbi @%3\n\t" : : | ||
| 765 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
| 766 | asm volatile("ldc %0, sr" : : "r" (orig_sr)); | ||
| 767 | a0 += linesz; | ||
| 768 | a1 += linesz; | ||
| 769 | a2 += linesz; | ||
| 770 | a3 += linesz; | ||
| 771 | } while (a0 < a0e); | ||
| 772 | } | ||
