diff options
Diffstat (limited to 'arch/sh/mm/cache-sh5.c')
| -rw-r--r-- | arch/sh/mm/cache-sh5.c | 1019 |
1 files changed, 411 insertions, 608 deletions
diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c index 4617e3aeee73..3877321fcede 100644 --- a/arch/sh/mm/cache-sh5.c +++ b/arch/sh/mm/cache-sh5.c | |||
| @@ -1,10 +1,10 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * arch/sh/mm/cache-sh5.c | 2 | * arch/sh/mm/cache-sh5.c |
| 3 | * | 3 | * |
| 4 | * Original version Copyright (C) 2000, 2001 Paolo Alberelli | 4 | * Copyright (C) 2000, 2001 Paolo Alberelli |
| 5 | * Second version Copyright (C) benedict.gaster@superh.com 2002 | 5 | * Copyright (C) 2002 Benedict Gaster |
| 6 | * Third version Copyright Richard.Curnow@superh.com 2003 | 6 | * Copyright (C) 2003 Richard Curnow |
| 7 | * Hacks to third version Copyright (C) 2003 Paul Mundt | 7 | * Copyright (C) 2003 - 2008 Paul Mundt |
| 8 | * | 8 | * |
| 9 | * This file is subject to the terms and conditions of the GNU General Public | 9 | * This file is subject to the terms and conditions of the GNU General Public |
| 10 | * License. See the file "COPYING" in the main directory of this archive | 10 | * License. See the file "COPYING" in the main directory of this archive |
| @@ -13,101 +13,20 @@ | |||
| 13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
| 14 | #include <linux/mman.h> | 14 | #include <linux/mman.h> |
| 15 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
| 16 | #include <linux/threads.h> | 16 | #include <asm/tlb.h> |
| 17 | #include <asm/page.h> | ||
| 18 | #include <asm/pgtable.h> | ||
| 19 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
| 20 | #include <asm/cache.h> | 18 | #include <asm/cache.h> |
| 21 | #include <asm/tlb.h> | 19 | #include <asm/pgalloc.h> |
| 22 | #include <asm/io.h> | ||
| 23 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
| 24 | #include <asm/mmu_context.h> | 21 | #include <asm/mmu_context.h> |
| 25 | #include <asm/pgalloc.h> /* for flush_itlb_range */ | ||
| 26 | |||
| 27 | #include <linux/proc_fs.h> | ||
| 28 | |||
| 29 | /* This function is in entry.S */ | ||
| 30 | extern unsigned long switch_and_save_asid(unsigned long new_asid); | ||
| 31 | 22 | ||
| 32 | /* Wired TLB entry for the D-cache */ | 23 | /* Wired TLB entry for the D-cache */ |
| 33 | static unsigned long long dtlb_cache_slot; | 24 | static unsigned long long dtlb_cache_slot; |
| 34 | 25 | ||
| 35 | /** | 26 | void __init p3_cache_init(void) |
| 36 | * sh64_cache_init() | ||
| 37 | * | ||
| 38 | * This is pretty much just a straightforward clone of the SH | ||
| 39 | * detect_cpu_and_cache_system(). | ||
| 40 | * | ||
| 41 | * This function is responsible for setting up all of the cache | ||
| 42 | * info dynamically as well as taking care of CPU probing and | ||
| 43 | * setting up the relevant subtype data. | ||
| 44 | * | ||
| 45 | * FIXME: For the time being, we only really support the SH5-101 | ||
| 46 | * out of the box, and don't support dynamic probing for things | ||
| 47 | * like the SH5-103 or even cut2 of the SH5-101. Implement this | ||
| 48 | * later! | ||
| 49 | */ | ||
| 50 | int __init sh64_cache_init(void) | ||
| 51 | { | 27 | { |
| 52 | /* | 28 | /* Reserve a slot for dcache colouring in the DTLB */ |
| 53 | * First, setup some sane values for the I-cache. | 29 | dtlb_cache_slot = sh64_get_wired_dtlb_entry(); |
| 54 | */ | ||
| 55 | cpu_data->icache.ways = 4; | ||
| 56 | cpu_data->icache.sets = 256; | ||
| 57 | cpu_data->icache.linesz = L1_CACHE_BYTES; | ||
| 58 | |||
| 59 | /* | ||
| 60 | * FIXME: This can probably be cleaned up a bit as well.. for example, | ||
| 61 | * do we really need the way shift _and_ the way_step_shift ?? Judging | ||
| 62 | * by the existing code, I would guess no.. is there any valid reason | ||
| 63 | * why we need to be tracking this around? | ||
| 64 | */ | ||
| 65 | cpu_data->icache.way_shift = 13; | ||
| 66 | cpu_data->icache.entry_shift = 5; | ||
| 67 | cpu_data->icache.set_shift = 4; | ||
| 68 | cpu_data->icache.way_step_shift = 16; | ||
| 69 | cpu_data->icache.asid_shift = 2; | ||
| 70 | |||
| 71 | /* | ||
| 72 | * way offset = cache size / associativity, so just don't factor in | ||
| 73 | * associativity in the first place.. | ||
| 74 | */ | ||
| 75 | cpu_data->icache.way_ofs = cpu_data->icache.sets * | ||
| 76 | cpu_data->icache.linesz; | ||
| 77 | |||
| 78 | cpu_data->icache.asid_mask = 0x3fc; | ||
| 79 | cpu_data->icache.idx_mask = 0x1fe0; | ||
| 80 | cpu_data->icache.epn_mask = 0xffffe000; | ||
| 81 | cpu_data->icache.flags = 0; | ||
| 82 | |||
| 83 | /* | ||
| 84 | * Next, setup some sane values for the D-cache. | ||
| 85 | * | ||
| 86 | * On the SH5, these are pretty consistent with the I-cache settings, | ||
| 87 | * so we just copy over the existing definitions.. these can be fixed | ||
| 88 | * up later, especially if we add runtime CPU probing. | ||
| 89 | * | ||
| 90 | * Though in the meantime it saves us from having to duplicate all of | ||
| 91 | * the above definitions.. | ||
| 92 | */ | ||
| 93 | cpu_data->dcache = cpu_data->icache; | ||
| 94 | |||
| 95 | /* | ||
| 96 | * Setup any cache-related flags here | ||
| 97 | */ | ||
| 98 | #if defined(CONFIG_DCACHE_WRITE_THROUGH) | ||
| 99 | set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)); | ||
| 100 | #elif defined(CONFIG_DCACHE_WRITE_BACK) | ||
| 101 | set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags)); | ||
| 102 | #endif | ||
| 103 | |||
| 104 | /* | ||
| 105 | * We also need to reserve a slot for the D-cache in the DTLB, so we | ||
| 106 | * do this now .. | ||
| 107 | */ | ||
| 108 | dtlb_cache_slot = sh64_get_wired_dtlb_entry(); | ||
| 109 | |||
| 110 | return 0; | ||
| 111 | } | 30 | } |
| 112 | 31 | ||
| 113 | #ifdef CONFIG_DCACHE_DISABLED | 32 | #ifdef CONFIG_DCACHE_DISABLED |
| @@ -116,73 +35,48 @@ int __init sh64_cache_init(void) | |||
| 116 | #define sh64_dcache_purge_user_range(mm, start, end) do { } while (0) | 35 | #define sh64_dcache_purge_user_range(mm, start, end) do { } while (0) |
| 117 | #define sh64_dcache_purge_phy_page(paddr) do { } while (0) | 36 | #define sh64_dcache_purge_phy_page(paddr) do { } while (0) |
| 118 | #define sh64_dcache_purge_virt_page(mm, eaddr) do { } while (0) | 37 | #define sh64_dcache_purge_virt_page(mm, eaddr) do { } while (0) |
| 119 | #define sh64_dcache_purge_kernel_range(start, end) do { } while (0) | ||
| 120 | #define sh64_dcache_wback_current_user_range(start, end) do { } while (0) | ||
| 121 | #endif | 38 | #endif |
| 122 | 39 | ||
| 123 | /*##########################################################################*/ | 40 | /* |
| 124 | 41 | * The following group of functions deal with mapping and unmapping a | |
| 125 | /* From here onwards, a rewrite of the implementation, | 42 | * temporary page into a DTLB slot that has been set aside for exclusive |
| 126 | by Richard.Curnow@superh.com. | 43 | * use. |
| 127 | 44 | */ | |
| 128 | The major changes in this compared to the old version are; | 45 | static inline void |
| 129 | 1. use more selective purging through OCBP instead of using ALLOCO to purge | 46 | sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, |
| 130 | by natural replacement. This avoids purging out unrelated cache lines | 47 | unsigned long paddr) |
| 131 | that happen to be in the same set. | ||
| 132 | 2. exploit the APIs copy_user_page and clear_user_page better | ||
| 133 | 3. be more selective about I-cache purging, in particular use invalidate_all | ||
| 134 | more sparingly. | ||
| 135 | |||
| 136 | */ | ||
| 137 | |||
| 138 | /*########################################################################## | ||
| 139 | SUPPORT FUNCTIONS | ||
| 140 | ##########################################################################*/ | ||
| 141 | |||
| 142 | /****************************************************************************/ | ||
| 143 | /* The following group of functions deal with mapping and unmapping a temporary | ||
| 144 | page into the DTLB slot that have been set aside for our exclusive use. */ | ||
| 145 | /* In order to accomplish this, we use the generic interface for adding and | ||
| 146 | removing a wired slot entry as defined in arch/sh/mm/tlb-sh5.c */ | ||
| 147 | /****************************************************************************/ | ||
| 148 | |||
| 149 | static unsigned long slot_own_flags; | ||
| 150 | |||
| 151 | static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr) | ||
| 152 | { | 48 | { |
| 153 | local_irq_save(slot_own_flags); | 49 | local_irq_disable(); |
| 154 | sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr); | 50 | sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr); |
| 155 | } | 51 | } |
| 156 | 52 | ||
| 157 | static inline void sh64_teardown_dtlb_cache_slot(void) | 53 | static inline void sh64_teardown_dtlb_cache_slot(void) |
| 158 | { | 54 | { |
| 159 | sh64_teardown_tlb_slot(dtlb_cache_slot); | 55 | sh64_teardown_tlb_slot(dtlb_cache_slot); |
| 160 | local_irq_restore(slot_own_flags); | 56 | local_irq_enable(); |
| 161 | } | 57 | } |
| 162 | 58 | ||
| 163 | /****************************************************************************/ | ||
| 164 | |||
| 165 | #ifndef CONFIG_ICACHE_DISABLED | 59 | #ifndef CONFIG_ICACHE_DISABLED |
| 166 | 60 | static inline void sh64_icache_inv_all(void) | |
| 167 | static void __inline__ sh64_icache_inv_all(void) | ||
| 168 | { | 61 | { |
| 169 | unsigned long long addr, flag, data; | 62 | unsigned long long addr, flag, data; |
| 170 | unsigned int flags; | 63 | unsigned int flags; |
| 171 | 64 | ||
| 172 | addr=ICCR0; | 65 | addr = ICCR0; |
| 173 | flag=ICCR0_ICI; | 66 | flag = ICCR0_ICI; |
| 174 | data=0; | 67 | data = 0; |
| 175 | 68 | ||
| 176 | /* Make this a critical section for safety (probably not strictly necessary.) */ | 69 | /* Make this a critical section for safety (probably not strictly necessary.) */ |
| 177 | local_irq_save(flags); | 70 | local_irq_save(flags); |
| 178 | 71 | ||
| 179 | /* Without %1 it gets unexplicably wrong */ | 72 | /* Without %1 it gets unexplicably wrong */ |
| 180 | asm volatile("getcfg %3, 0, %0\n\t" | 73 | __asm__ __volatile__ ( |
| 181 | "or %0, %2, %0\n\t" | 74 | "getcfg %3, 0, %0\n\t" |
| 182 | "putcfg %3, 0, %0\n\t" | 75 | "or %0, %2, %0\n\t" |
| 183 | "synci" | 76 | "putcfg %3, 0, %0\n\t" |
| 184 | : "=&r" (data) | 77 | "synci" |
| 185 | : "0" (data), "r" (flag), "r" (addr)); | 78 | : "=&r" (data) |
| 79 | : "0" (data), "r" (flag), "r" (addr)); | ||
| 186 | 80 | ||
| 187 | local_irq_restore(flags); | 81 | local_irq_restore(flags); |
| 188 | } | 82 | } |
| @@ -193,20 +87,12 @@ static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end) | |||
| 193 | * the addresses lie in the kernel superpage. */ | 87 | * the addresses lie in the kernel superpage. */ |
| 194 | 88 | ||
| 195 | unsigned long long ullend, addr, aligned_start; | 89 | unsigned long long ullend, addr, aligned_start; |
| 196 | #if (NEFF == 32) | ||
| 197 | aligned_start = (unsigned long long)(signed long long)(signed long) start; | 90 | aligned_start = (unsigned long long)(signed long long)(signed long) start; |
| 198 | #else | 91 | addr = L1_CACHE_ALIGN(aligned_start); |
| 199 | #error "NEFF != 32" | ||
| 200 | #endif | ||
| 201 | aligned_start &= L1_CACHE_ALIGN_MASK; | ||
| 202 | addr = aligned_start; | ||
| 203 | #if (NEFF == 32) | ||
| 204 | ullend = (unsigned long long) (signed long long) (signed long) end; | 92 | ullend = (unsigned long long) (signed long long) (signed long) end; |
| 205 | #else | 93 | |
| 206 | #error "NEFF != 32" | ||
| 207 | #endif | ||
| 208 | while (addr <= ullend) { | 94 | while (addr <= ullend) { |
| 209 | asm __volatile__ ("icbi %0, 0" : : "r" (addr)); | 95 | __asm__ __volatile__ ("icbi %0, 0" : : "r" (addr)); |
| 210 | addr += L1_CACHE_BYTES; | 96 | addr += L1_CACHE_BYTES; |
| 211 | } | 97 | } |
| 212 | } | 98 | } |
| @@ -215,7 +101,7 @@ static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long | |||
| 215 | { | 101 | { |
| 216 | /* If we get called, we know that vma->vm_flags contains VM_EXEC. | 102 | /* If we get called, we know that vma->vm_flags contains VM_EXEC. |
| 217 | Also, eaddr is page-aligned. */ | 103 | Also, eaddr is page-aligned. */ |
| 218 | 104 | unsigned int cpu = smp_processor_id(); | |
| 219 | unsigned long long addr, end_addr; | 105 | unsigned long long addr, end_addr; |
| 220 | unsigned long flags = 0; | 106 | unsigned long flags = 0; |
| 221 | unsigned long running_asid, vma_asid; | 107 | unsigned long running_asid, vma_asid; |
| @@ -237,17 +123,17 @@ static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long | |||
| 237 | */ | 123 | */ |
| 238 | 124 | ||
| 239 | running_asid = get_asid(); | 125 | running_asid = get_asid(); |
| 240 | vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK); | 126 | vma_asid = cpu_asid(cpu, vma->vm_mm); |
| 241 | if (running_asid != vma_asid) { | 127 | if (running_asid != vma_asid) { |
| 242 | local_irq_save(flags); | 128 | local_irq_save(flags); |
| 243 | switch_and_save_asid(vma_asid); | 129 | switch_and_save_asid(vma_asid); |
| 244 | } | 130 | } |
| 245 | while (addr < end_addr) { | 131 | while (addr < end_addr) { |
| 246 | /* Worth unrolling a little */ | 132 | /* Worth unrolling a little */ |
| 247 | asm __volatile__("icbi %0, 0" : : "r" (addr)); | 133 | __asm__ __volatile__("icbi %0, 0" : : "r" (addr)); |
| 248 | asm __volatile__("icbi %0, 32" : : "r" (addr)); | 134 | __asm__ __volatile__("icbi %0, 32" : : "r" (addr)); |
| 249 | asm __volatile__("icbi %0, 64" : : "r" (addr)); | 135 | __asm__ __volatile__("icbi %0, 64" : : "r" (addr)); |
| 250 | asm __volatile__("icbi %0, 96" : : "r" (addr)); | 136 | __asm__ __volatile__("icbi %0, 96" : : "r" (addr)); |
| 251 | addr += 128; | 137 | addr += 128; |
| 252 | } | 138 | } |
| 253 | if (running_asid != vma_asid) { | 139 | if (running_asid != vma_asid) { |
| @@ -256,8 +142,6 @@ static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long | |||
| 256 | } | 142 | } |
| 257 | } | 143 | } |
| 258 | 144 | ||
| 259 | /****************************************************************************/ | ||
| 260 | |||
| 261 | static void sh64_icache_inv_user_page_range(struct mm_struct *mm, | 145 | static void sh64_icache_inv_user_page_range(struct mm_struct *mm, |
| 262 | unsigned long start, unsigned long end) | 146 | unsigned long start, unsigned long end) |
| 263 | { | 147 | { |
| @@ -275,10 +159,10 @@ static void sh64_icache_inv_user_page_range(struct mm_struct *mm, | |||
| 275 | possible with the D-cache. Just assume 64 for now as a working | 159 | possible with the D-cache. Just assume 64 for now as a working |
| 276 | figure. | 160 | figure. |
| 277 | */ | 161 | */ |
| 278 | |||
| 279 | int n_pages; | 162 | int n_pages; |
| 280 | 163 | ||
| 281 | if (!mm) return; | 164 | if (!mm) |
| 165 | return; | ||
| 282 | 166 | ||
| 283 | n_pages = ((end - start) >> PAGE_SHIFT); | 167 | n_pages = ((end - start) >> PAGE_SHIFT); |
| 284 | if (n_pages >= 64) { | 168 | if (n_pages >= 64) { |
| @@ -290,7 +174,7 @@ static void sh64_icache_inv_user_page_range(struct mm_struct *mm, | |||
| 290 | unsigned long mm_asid, current_asid; | 174 | unsigned long mm_asid, current_asid; |
| 291 | unsigned long long flags = 0ULL; | 175 | unsigned long long flags = 0ULL; |
| 292 | 176 | ||
| 293 | mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; | 177 | mm_asid = cpu_asid(smp_processor_id(), mm); |
| 294 | current_asid = get_asid(); | 178 | current_asid = get_asid(); |
| 295 | 179 | ||
| 296 | if (mm_asid != current_asid) { | 180 | if (mm_asid != current_asid) { |
| @@ -322,6 +206,7 @@ static void sh64_icache_inv_user_page_range(struct mm_struct *mm, | |||
| 322 | } | 206 | } |
| 323 | aligned_start = vma->vm_end; /* Skip to start of next region */ | 207 | aligned_start = vma->vm_end; /* Skip to start of next region */ |
| 324 | } | 208 | } |
| 209 | |||
| 325 | if (mm_asid != current_asid) { | 210 | if (mm_asid != current_asid) { |
| 326 | switch_and_save_asid(current_asid); | 211 | switch_and_save_asid(current_asid); |
| 327 | local_irq_restore(flags); | 212 | local_irq_restore(flags); |
| @@ -329,47 +214,46 @@ static void sh64_icache_inv_user_page_range(struct mm_struct *mm, | |||
| 329 | } | 214 | } |
| 330 | } | 215 | } |
| 331 | 216 | ||
| 217 | /* | ||
| 218 | * Invalidate a small range of user context I-cache, not necessarily page | ||
| 219 | * (or even cache-line) aligned. | ||
| 220 | * | ||
| 221 | * Since this is used inside ptrace, the ASID in the mm context typically | ||
| 222 | * won't match current_asid. We'll have to switch ASID to do this. For | ||
| 223 | * safety, and given that the range will be small, do all this under cli. | ||
| 224 | * | ||
| 225 | * Note, there is a hazard that the ASID in mm->context is no longer | ||
| 226 | * actually associated with mm, i.e. if the mm->context has started a new | ||
| 227 | * cycle since mm was last active. However, this is just a performance | ||
| 228 | * issue: all that happens is that we invalidate lines belonging to | ||
| 229 | * another mm, so the owning process has to refill them when that mm goes | ||
| 230 | * live again. mm itself can't have any cache entries because there will | ||
| 231 | * have been a flush_cache_all when the new mm->context cycle started. | ||
| 232 | */ | ||
| 332 | static void sh64_icache_inv_user_small_range(struct mm_struct *mm, | 233 | static void sh64_icache_inv_user_small_range(struct mm_struct *mm, |
| 333 | unsigned long start, int len) | 234 | unsigned long start, int len) |
| 334 | { | 235 | { |
| 335 | |||
| 336 | /* Invalidate a small range of user context I-cache, not necessarily | ||
| 337 | page (or even cache-line) aligned. */ | ||
| 338 | |||
| 339 | unsigned long long eaddr = start; | 236 | unsigned long long eaddr = start; |
| 340 | unsigned long long eaddr_end = start + len; | 237 | unsigned long long eaddr_end = start + len; |
| 341 | unsigned long current_asid, mm_asid; | 238 | unsigned long current_asid, mm_asid; |
| 342 | unsigned long long flags; | 239 | unsigned long long flags; |
| 343 | unsigned long long epage_start; | 240 | unsigned long long epage_start; |
| 344 | 241 | ||
| 345 | /* Since this is used inside ptrace, the ASID in the mm context | 242 | /* |
| 346 | typically won't match current_asid. We'll have to switch ASID to do | 243 | * Align to start of cache line. Otherwise, suppose len==8 and |
| 347 | this. For safety, and given that the range will be small, do all | 244 | * start was at 32N+28 : the last 4 bytes wouldn't get invalidated. |
| 348 | this under cli. | 245 | */ |
| 349 | 246 | eaddr = L1_CACHE_ALIGN(start); | |
| 350 | Note, there is a hazard that the ASID in mm->context is no longer | ||
| 351 | actually associated with mm, i.e. if the mm->context has started a | ||
| 352 | new cycle since mm was last active. However, this is just a | ||
| 353 | performance issue: all that happens is that we invalidate lines | ||
| 354 | belonging to another mm, so the owning process has to refill them | ||
| 355 | when that mm goes live again. mm itself can't have any cache | ||
| 356 | entries because there will have been a flush_cache_all when the new | ||
| 357 | mm->context cycle started. */ | ||
| 358 | |||
| 359 | /* Align to start of cache line. Otherwise, suppose len==8 and start | ||
| 360 | was at 32N+28 : the last 4 bytes wouldn't get invalidated. */ | ||
| 361 | eaddr = start & L1_CACHE_ALIGN_MASK; | ||
| 362 | eaddr_end = start + len; | 247 | eaddr_end = start + len; |
| 363 | 248 | ||
| 249 | mm_asid = cpu_asid(smp_processor_id(), mm); | ||
| 364 | local_irq_save(flags); | 250 | local_irq_save(flags); |
| 365 | mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; | ||
| 366 | current_asid = switch_and_save_asid(mm_asid); | 251 | current_asid = switch_and_save_asid(mm_asid); |
| 367 | 252 | ||
| 368 | epage_start = eaddr & PAGE_MASK; | 253 | epage_start = eaddr & PAGE_MASK; |
| 369 | 254 | ||
| 370 | while (eaddr < eaddr_end) | 255 | while (eaddr < eaddr_end) { |
| 371 | { | 256 | __asm__ __volatile__("icbi %0, 0" : : "r" (eaddr)); |
| 372 | asm __volatile__("icbi %0, 0" : : "r" (eaddr)); | ||
| 373 | eaddr += L1_CACHE_BYTES; | 257 | eaddr += L1_CACHE_BYTES; |
| 374 | } | 258 | } |
| 375 | switch_and_save_asid(current_asid); | 259 | switch_and_save_asid(current_asid); |
| @@ -394,30 +278,24 @@ static void sh64_icache_inv_current_user_range(unsigned long start, unsigned lon | |||
| 394 | been recycled since we were last active in which case we might just | 278 | been recycled since we were last active in which case we might just |
| 395 | invalidate another processes I-cache entries : no worries, just a | 279 | invalidate another processes I-cache entries : no worries, just a |
| 396 | performance drop for him. */ | 280 | performance drop for him. */ |
| 397 | aligned_start = start & L1_CACHE_ALIGN_MASK; | 281 | aligned_start = L1_CACHE_ALIGN(start); |
| 398 | addr = aligned_start; | 282 | addr = aligned_start; |
| 399 | while (addr < ull_end) { | 283 | while (addr < ull_end) { |
| 400 | asm __volatile__ ("icbi %0, 0" : : "r" (addr)); | 284 | __asm__ __volatile__ ("icbi %0, 0" : : "r" (addr)); |
| 401 | asm __volatile__ ("nop"); | 285 | __asm__ __volatile__ ("nop"); |
| 402 | asm __volatile__ ("nop"); | 286 | __asm__ __volatile__ ("nop"); |
| 403 | addr += L1_CACHE_BYTES; | 287 | addr += L1_CACHE_BYTES; |
| 404 | } | 288 | } |
| 405 | } | 289 | } |
| 406 | |||
| 407 | #endif /* !CONFIG_ICACHE_DISABLED */ | 290 | #endif /* !CONFIG_ICACHE_DISABLED */ |
| 408 | 291 | ||
| 409 | /****************************************************************************/ | ||
| 410 | |||
| 411 | #ifndef CONFIG_DCACHE_DISABLED | 292 | #ifndef CONFIG_DCACHE_DISABLED |
| 412 | |||
| 413 | /* Buffer used as the target of alloco instructions to purge data from cache | 293 | /* Buffer used as the target of alloco instructions to purge data from cache |
| 414 | sets by natural eviction. -- RPC */ | 294 | sets by natural eviction. -- RPC */ |
| 415 | #define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4) | 295 | #define DUMMY_ALLOCO_AREA_SIZE ((L1_CACHE_BYTES << 10) + (1024 * 4)) |
| 416 | static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, }; | 296 | static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, }; |
| 417 | 297 | ||
| 418 | /****************************************************************************/ | 298 | static void inline sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets) |
| 419 | |||
| 420 | static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets) | ||
| 421 | { | 299 | { |
| 422 | /* Purge all ways in a particular block of sets, specified by the base | 300 | /* Purge all ways in a particular block of sets, specified by the base |
| 423 | set number and number of sets. Can handle wrap-around, if that's | 301 | set number and number of sets. Can handle wrap-around, if that's |
| @@ -428,102 +306,86 @@ static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets | |||
| 428 | int j; | 306 | int j; |
| 429 | int set_offset; | 307 | int set_offset; |
| 430 | 308 | ||
| 431 | dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift; | 309 | dummy_buffer_base_set = ((int)&dummy_alloco_area & |
| 310 | cpu_data->dcache.entry_mask) >> | ||
| 311 | cpu_data->dcache.entry_shift; | ||
| 432 | set_offset = sets_to_purge_base - dummy_buffer_base_set; | 312 | set_offset = sets_to_purge_base - dummy_buffer_base_set; |
| 433 | 313 | ||
| 434 | for (j=0; j<n_sets; j++, set_offset++) { | 314 | for (j = 0; j < n_sets; j++, set_offset++) { |
| 435 | set_offset &= (cpu_data->dcache.sets - 1); | 315 | set_offset &= (cpu_data->dcache.sets - 1); |
| 436 | eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift); | 316 | eaddr0 = (unsigned long long)dummy_alloco_area + |
| 437 | 317 | (set_offset << cpu_data->dcache.entry_shift); | |
| 438 | /* Do one alloco which hits the required set per cache way. For | 318 | |
| 439 | write-back mode, this will purge the #ways resident lines. There's | 319 | /* |
| 440 | little point unrolling this loop because the allocos stall more if | 320 | * Do one alloco which hits the required set per cache |
| 441 | they're too close together. */ | 321 | * way. For write-back mode, this will purge the #ways |
| 442 | eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; | 322 | * resident lines. There's little point unrolling this |
| 443 | for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { | 323 | * loop because the allocos stall more if they're too |
| 444 | asm __volatile__ ("alloco %0, 0" : : "r" (eaddr)); | 324 | * close together. |
| 445 | asm __volatile__ ("synco"); /* TAKum03020 */ | 325 | */ |
| 326 | eaddr1 = eaddr0 + cpu_data->dcache.way_size * | ||
| 327 | cpu_data->dcache.ways; | ||
| 328 | |||
| 329 | for (eaddr = eaddr0; eaddr < eaddr1; | ||
| 330 | eaddr += cpu_data->dcache.way_size) { | ||
| 331 | __asm__ __volatile__ ("alloco %0, 0" : : "r" (eaddr)); | ||
| 332 | __asm__ __volatile__ ("synco"); /* TAKum03020 */ | ||
| 446 | } | 333 | } |
| 447 | 334 | ||
| 448 | eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; | 335 | eaddr1 = eaddr0 + cpu_data->dcache.way_size * |
| 449 | for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { | 336 | cpu_data->dcache.ways; |
| 450 | /* Load from each address. Required because alloco is a NOP if | 337 | |
| 451 | the cache is write-through. Write-through is a config option. */ | 338 | for (eaddr = eaddr0; eaddr < eaddr1; |
| 339 | eaddr += cpu_data->dcache.way_size) { | ||
| 340 | /* | ||
| 341 | * Load from each address. Required because | ||
| 342 | * alloco is a NOP if the cache is write-through. | ||
| 343 | */ | ||
| 452 | if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags))) | 344 | if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags))) |
| 453 | *(volatile unsigned char *)(int)eaddr; | 345 | ctrl_inb(eaddr); |
| 454 | } | 346 | } |
| 455 | } | 347 | } |
| 456 | 348 | ||
| 457 | /* Don't use OCBI to invalidate the lines. That costs cycles directly. | 349 | /* |
| 458 | If the dummy block is just left resident, it will naturally get | 350 | * Don't use OCBI to invalidate the lines. That costs cycles |
| 459 | evicted as required. */ | 351 | * directly. If the dummy block is just left resident, it will |
| 460 | 352 | * naturally get evicted as required. | |
| 461 | return; | 353 | */ |
| 462 | } | 354 | } |
| 463 | 355 | ||
| 464 | /****************************************************************************/ | 356 | /* |
| 465 | 357 | * Purge the entire contents of the dcache. The most efficient way to | |
| 358 | * achieve this is to use alloco instructions on a region of unused | ||
| 359 | * memory equal in size to the cache, thereby causing the current | ||
| 360 | * contents to be discarded by natural eviction. The alternative, namely | ||
| 361 | * reading every tag, setting up a mapping for the corresponding page and | ||
| 362 | * doing an OCBP for the line, would be much more expensive. | ||
| 363 | */ | ||
| 466 | static void sh64_dcache_purge_all(void) | 364 | static void sh64_dcache_purge_all(void) |
| 467 | { | 365 | { |
| 468 | /* Purge the entire contents of the dcache. The most efficient way to | ||
| 469 | achieve this is to use alloco instructions on a region of unused | ||
| 470 | memory equal in size to the cache, thereby causing the current | ||
| 471 | contents to be discarded by natural eviction. The alternative, | ||
| 472 | namely reading every tag, setting up a mapping for the corresponding | ||
| 473 | page and doing an OCBP for the line, would be much more expensive. | ||
| 474 | */ | ||
| 475 | 366 | ||
| 476 | sh64_dcache_purge_sets(0, cpu_data->dcache.sets); | 367 | sh64_dcache_purge_sets(0, cpu_data->dcache.sets); |
| 477 | |||
| 478 | return; | ||
| 479 | |||
| 480 | } | 368 | } |
| 481 | 369 | ||
| 482 | /****************************************************************************/ | ||
| 483 | |||
| 484 | static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end) | ||
| 485 | { | ||
| 486 | /* Purge the range of addresses [start,end] from the D-cache. The | ||
| 487 | addresses lie in the superpage mapping. There's no harm if we | ||
| 488 | overpurge at either end - just a small performance loss. */ | ||
| 489 | unsigned long long ullend, addr, aligned_start; | ||
| 490 | #if (NEFF == 32) | ||
| 491 | aligned_start = (unsigned long long)(signed long long)(signed long) start; | ||
| 492 | #else | ||
| 493 | #error "NEFF != 32" | ||
| 494 | #endif | ||
| 495 | aligned_start &= L1_CACHE_ALIGN_MASK; | ||
| 496 | addr = aligned_start; | ||
| 497 | #if (NEFF == 32) | ||
| 498 | ullend = (unsigned long long) (signed long long) (signed long) end; | ||
| 499 | #else | ||
| 500 | #error "NEFF != 32" | ||
| 501 | #endif | ||
| 502 | while (addr <= ullend) { | ||
| 503 | asm __volatile__ ("ocbp %0, 0" : : "r" (addr)); | ||
| 504 | addr += L1_CACHE_BYTES; | ||
| 505 | } | ||
| 506 | return; | ||
| 507 | } | ||
| 508 | 370 | ||
| 509 | /* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for | 371 | /* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for |
| 510 | anything else in the kernel */ | 372 | anything else in the kernel */ |
| 511 | #define MAGIC_PAGE0_START 0xffffffffec000000ULL | 373 | #define MAGIC_PAGE0_START 0xffffffffec000000ULL |
| 512 | 374 | ||
| 513 | static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr) | 375 | /* Purge the physical page 'paddr' from the cache. It's known that any |
| 376 | * cache lines requiring attention have the same page colour as the the | ||
| 377 | * address 'eaddr'. | ||
| 378 | * | ||
| 379 | * This relies on the fact that the D-cache matches on physical tags when | ||
| 380 | * no virtual tag matches. So we create an alias for the original page | ||
| 381 | * and purge through that. (Alternatively, we could have done this by | ||
| 382 | * switching ASID to match the original mapping and purged through that, | ||
| 383 | * but that involves ASID switching cost + probably a TLBMISS + refill | ||
| 384 | * anyway.) | ||
| 385 | */ | ||
| 386 | static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, | ||
| 387 | unsigned long eaddr) | ||
| 514 | { | 388 | { |
| 515 | /* Purge the physical page 'paddr' from the cache. It's known that any | ||
| 516 | cache lines requiring attention have the same page colour as the the | ||
| 517 | address 'eaddr'. | ||
| 518 | |||
| 519 | This relies on the fact that the D-cache matches on physical tags | ||
| 520 | when no virtual tag matches. So we create an alias for the original | ||
| 521 | page and purge through that. (Alternatively, we could have done | ||
| 522 | this by switching ASID to match the original mapping and purged | ||
| 523 | through that, but that involves ASID switching cost + probably a | ||
| 524 | TLBMISS + refill anyway.) | ||
| 525 | */ | ||
| 526 | |||
| 527 | unsigned long long magic_page_start; | 389 | unsigned long long magic_page_start; |
| 528 | unsigned long long magic_eaddr, magic_eaddr_end; | 390 | unsigned long long magic_eaddr, magic_eaddr_end; |
| 529 | 391 | ||
| @@ -531,47 +393,45 @@ static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned lo | |||
| 531 | 393 | ||
| 532 | /* As long as the kernel is not pre-emptible, this doesn't need to be | 394 | /* As long as the kernel is not pre-emptible, this doesn't need to be |
| 533 | under cli/sti. */ | 395 | under cli/sti. */ |
| 534 | |||
| 535 | sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr); | 396 | sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr); |
| 536 | 397 | ||
| 537 | magic_eaddr = magic_page_start; | 398 | magic_eaddr = magic_page_start; |
| 538 | magic_eaddr_end = magic_eaddr + PAGE_SIZE; | 399 | magic_eaddr_end = magic_eaddr + PAGE_SIZE; |
| 400 | |||
| 539 | while (magic_eaddr < magic_eaddr_end) { | 401 | while (magic_eaddr < magic_eaddr_end) { |
| 540 | /* Little point in unrolling this loop - the OCBPs are blocking | 402 | /* Little point in unrolling this loop - the OCBPs are blocking |
| 541 | and won't go any quicker (i.e. the loop overhead is parallel | 403 | and won't go any quicker (i.e. the loop overhead is parallel |
| 542 | to part of the OCBP execution.) */ | 404 | to part of the OCBP execution.) */ |
| 543 | asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr)); | 405 | __asm__ __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr)); |
| 544 | magic_eaddr += L1_CACHE_BYTES; | 406 | magic_eaddr += L1_CACHE_BYTES; |
| 545 | } | 407 | } |
| 546 | 408 | ||
| 547 | sh64_teardown_dtlb_cache_slot(); | 409 | sh64_teardown_dtlb_cache_slot(); |
| 548 | } | 410 | } |
| 549 | 411 | ||
| 550 | /****************************************************************************/ | 412 | /* |
| 551 | 413 | * Purge a page given its physical start address, by creating a temporary | |
| 414 | * 1 page mapping and purging across that. Even if we know the virtual | ||
| 415 | * address (& vma or mm) of the page, the method here is more elegant | ||
| 416 | * because it avoids issues of coping with page faults on the purge | ||
| 417 | * instructions (i.e. no special-case code required in the critical path | ||
| 418 | * in the TLB miss handling). | ||
| 419 | */ | ||
| 552 | static void sh64_dcache_purge_phy_page(unsigned long paddr) | 420 | static void sh64_dcache_purge_phy_page(unsigned long paddr) |
| 553 | { | 421 | { |
| 554 | /* Pure a page given its physical start address, by creating a | ||
| 555 | temporary 1 page mapping and purging across that. Even if we know | ||
| 556 | the virtual address (& vma or mm) of the page, the method here is | ||
| 557 | more elegant because it avoids issues of coping with page faults on | ||
| 558 | the purge instructions (i.e. no special-case code required in the | ||
| 559 | critical path in the TLB miss handling). */ | ||
| 560 | |||
| 561 | unsigned long long eaddr_start, eaddr, eaddr_end; | 422 | unsigned long long eaddr_start, eaddr, eaddr_end; |
| 562 | int i; | 423 | int i; |
| 563 | 424 | ||
| 564 | /* As long as the kernel is not pre-emptible, this doesn't need to be | 425 | /* As long as the kernel is not pre-emptible, this doesn't need to be |
| 565 | under cli/sti. */ | 426 | under cli/sti. */ |
| 566 | |||
| 567 | eaddr_start = MAGIC_PAGE0_START; | 427 | eaddr_start = MAGIC_PAGE0_START; |
| 568 | for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) { | 428 | for (i = 0; i < (1 << CACHE_OC_N_SYNBITS); i++) { |
| 569 | sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr); | 429 | sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr); |
| 570 | 430 | ||
| 571 | eaddr = eaddr_start; | 431 | eaddr = eaddr_start; |
| 572 | eaddr_end = eaddr + PAGE_SIZE; | 432 | eaddr_end = eaddr + PAGE_SIZE; |
| 573 | while (eaddr < eaddr_end) { | 433 | while (eaddr < eaddr_end) { |
| 574 | asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr)); | 434 | __asm__ __volatile__ ("ocbp %0, 0" : : "r" (eaddr)); |
| 575 | eaddr += L1_CACHE_BYTES; | 435 | eaddr += L1_CACHE_BYTES; |
| 576 | } | 436 | } |
| 577 | 437 | ||
| @@ -584,6 +444,7 @@ static void sh64_dcache_purge_user_pages(struct mm_struct *mm, | |||
| 584 | unsigned long addr, unsigned long end) | 444 | unsigned long addr, unsigned long end) |
| 585 | { | 445 | { |
| 586 | pgd_t *pgd; | 446 | pgd_t *pgd; |
| 447 | pud_t *pud; | ||
| 587 | pmd_t *pmd; | 448 | pmd_t *pmd; |
| 588 | pte_t *pte; | 449 | pte_t *pte; |
| 589 | pte_t entry; | 450 | pte_t entry; |
| @@ -597,7 +458,11 @@ static void sh64_dcache_purge_user_pages(struct mm_struct *mm, | |||
| 597 | if (pgd_bad(*pgd)) | 458 | if (pgd_bad(*pgd)) |
| 598 | return; | 459 | return; |
| 599 | 460 | ||
| 600 | pmd = pmd_offset(pgd, addr); | 461 | pud = pud_offset(pgd, addr); |
| 462 | if (pud_none(*pud) || pud_bad(*pud)) | ||
| 463 | return; | ||
| 464 | |||
| 465 | pmd = pmd_offset(pud, addr); | ||
| 601 | if (pmd_none(*pmd) || pmd_bad(*pmd)) | 466 | if (pmd_none(*pmd) || pmd_bad(*pmd)) |
| 602 | return; | 467 | return; |
| 603 | 468 | ||
| @@ -611,419 +476,357 @@ static void sh64_dcache_purge_user_pages(struct mm_struct *mm, | |||
| 611 | } while (pte++, addr += PAGE_SIZE, addr != end); | 476 | } while (pte++, addr += PAGE_SIZE, addr != end); |
| 612 | pte_unmap_unlock(pte - 1, ptl); | 477 | pte_unmap_unlock(pte - 1, ptl); |
| 613 | } | 478 | } |
| 614 | /****************************************************************************/ | ||
| 615 | 479 | ||
| 480 | /* | ||
| 481 | * There are at least 5 choices for the implementation of this, with | ||
| 482 | * pros (+), cons(-), comments(*): | ||
| 483 | * | ||
| 484 | * 1. ocbp each line in the range through the original user's ASID | ||
| 485 | * + no lines spuriously evicted | ||
| 486 | * - tlbmiss handling (must either handle faults on demand => extra | ||
| 487 | * special-case code in tlbmiss critical path), or map the page in | ||
| 488 | * advance (=> flush_tlb_range in advance to avoid multiple hits) | ||
| 489 | * - ASID switching | ||
| 490 | * - expensive for large ranges | ||
| 491 | * | ||
| 492 | * 2. temporarily map each page in the range to a special effective | ||
| 493 | * address and ocbp through the temporary mapping; relies on the | ||
| 494 | * fact that SH-5 OCB* always do TLB lookup and match on ptags (they | ||
| 495 | * never look at the etags) | ||
| 496 | * + no spurious evictions | ||
| 497 | * - expensive for large ranges | ||
| 498 | * * surely cheaper than (1) | ||
| 499 | * | ||
| 500 | * 3. walk all the lines in the cache, check the tags, if a match | ||
| 501 | * occurs create a page mapping to ocbp the line through | ||
| 502 | * + no spurious evictions | ||
| 503 | * - tag inspection overhead | ||
| 504 | * - (especially for small ranges) | ||
| 505 | * - potential cost of setting up/tearing down page mapping for | ||
| 506 | * every line that matches the range | ||
| 507 | * * cost partly independent of range size | ||
| 508 | * | ||
| 509 | * 4. walk all the lines in the cache, check the tags, if a match | ||
| 510 | * occurs use 4 * alloco to purge the line (+3 other probably | ||
| 511 | * innocent victims) by natural eviction | ||
| 512 | * + no tlb mapping overheads | ||
| 513 | * - spurious evictions | ||
| 514 | * - tag inspection overhead | ||
| 515 | * | ||
| 516 | * 5. implement like flush_cache_all | ||
| 517 | * + no tag inspection overhead | ||
| 518 | * - spurious evictions | ||
| 519 | * - bad for small ranges | ||
| 520 | * | ||
| 521 | * (1) can be ruled out as more expensive than (2). (2) appears best | ||
| 522 | * for small ranges. The choice between (3), (4) and (5) for large | ||
| 523 | * ranges and the range size for the large/small boundary need | ||
| 524 | * benchmarking to determine. | ||
| 525 | * | ||
| 526 | * For now use approach (2) for small ranges and (5) for large ones. | ||
| 527 | */ | ||
| 616 | static void sh64_dcache_purge_user_range(struct mm_struct *mm, | 528 | static void sh64_dcache_purge_user_range(struct mm_struct *mm, |
| 617 | unsigned long start, unsigned long end) | 529 | unsigned long start, unsigned long end) |
| 618 | { | 530 | { |
| 619 | /* There are at least 5 choices for the implementation of this, with | 531 | int n_pages = ((end - start) >> PAGE_SHIFT); |
| 620 | pros (+), cons(-), comments(*): | ||
| 621 | |||
| 622 | 1. ocbp each line in the range through the original user's ASID | ||
| 623 | + no lines spuriously evicted | ||
| 624 | - tlbmiss handling (must either handle faults on demand => extra | ||
| 625 | special-case code in tlbmiss critical path), or map the page in | ||
| 626 | advance (=> flush_tlb_range in advance to avoid multiple hits) | ||
| 627 | - ASID switching | ||
| 628 | - expensive for large ranges | ||
| 629 | |||
| 630 | 2. temporarily map each page in the range to a special effective | ||
| 631 | address and ocbp through the temporary mapping; relies on the | ||
| 632 | fact that SH-5 OCB* always do TLB lookup and match on ptags (they | ||
| 633 | never look at the etags) | ||
| 634 | + no spurious evictions | ||
| 635 | - expensive for large ranges | ||
| 636 | * surely cheaper than (1) | ||
| 637 | |||
| 638 | 3. walk all the lines in the cache, check the tags, if a match | ||
| 639 | occurs create a page mapping to ocbp the line through | ||
| 640 | + no spurious evictions | ||
| 641 | - tag inspection overhead | ||
| 642 | - (especially for small ranges) | ||
| 643 | - potential cost of setting up/tearing down page mapping for | ||
| 644 | every line that matches the range | ||
| 645 | * cost partly independent of range size | ||
| 646 | |||
| 647 | 4. walk all the lines in the cache, check the tags, if a match | ||
| 648 | occurs use 4 * alloco to purge the line (+3 other probably | ||
| 649 | innocent victims) by natural eviction | ||
| 650 | + no tlb mapping overheads | ||
| 651 | - spurious evictions | ||
| 652 | - tag inspection overhead | ||
| 653 | |||
| 654 | 5. implement like flush_cache_all | ||
| 655 | + no tag inspection overhead | ||
| 656 | - spurious evictions | ||
| 657 | - bad for small ranges | ||
| 658 | |||
| 659 | (1) can be ruled out as more expensive than (2). (2) appears best | ||
| 660 | for small ranges. The choice between (3), (4) and (5) for large | ||
| 661 | ranges and the range size for the large/small boundary need | ||
| 662 | benchmarking to determine. | ||
| 663 | |||
| 664 | For now use approach (2) for small ranges and (5) for large ones. | ||
| 665 | |||
| 666 | */ | ||
| 667 | |||
| 668 | int n_pages; | ||
| 669 | 532 | ||
| 670 | n_pages = ((end - start) >> PAGE_SHIFT); | ||
| 671 | if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) { | 533 | if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) { |
| 672 | #if 1 | ||
| 673 | sh64_dcache_purge_all(); | 534 | sh64_dcache_purge_all(); |
| 674 | #else | ||
| 675 | unsigned long long set, way; | ||
| 676 | unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; | ||
| 677 | for (set = 0; set < cpu_data->dcache.sets; set++) { | ||
| 678 | unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift); | ||
| 679 | for (way = 0; way < cpu_data->dcache.ways; way++) { | ||
| 680 | unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift); | ||
| 681 | unsigned long long tag0; | ||
| 682 | unsigned long line_valid; | ||
| 683 | |||
| 684 | asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr)); | ||
| 685 | line_valid = tag0 & SH_CACHE_VALID; | ||
| 686 | if (line_valid) { | ||
| 687 | unsigned long cache_asid; | ||
| 688 | unsigned long epn; | ||
| 689 | |||
| 690 | cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift; | ||
| 691 | /* The next line needs some | ||
| 692 | explanation. The virtual tags | ||
| 693 | encode bits [31:13] of the virtual | ||
| 694 | address, bit [12] of the 'tag' being | ||
| 695 | implied by the cache set index. */ | ||
| 696 | epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift); | ||
| 697 | |||
| 698 | if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) { | ||
| 699 | /* TODO : could optimise this | ||
| 700 | call by batching multiple | ||
| 701 | adjacent sets together. */ | ||
| 702 | sh64_dcache_purge_sets(set, 1); | ||
| 703 | break; /* Don't waste time inspecting other ways for this set */ | ||
| 704 | } | ||
| 705 | } | ||
| 706 | } | ||
| 707 | } | ||
| 708 | #endif | ||
| 709 | } else { | 535 | } else { |
| 710 | /* Small range, covered by a single page table page */ | 536 | /* Small range, covered by a single page table page */ |
| 711 | start &= PAGE_MASK; /* should already be so */ | 537 | start &= PAGE_MASK; /* should already be so */ |
| 712 | end = PAGE_ALIGN(end); /* should already be so */ | 538 | end = PAGE_ALIGN(end); /* should already be so */ |
| 713 | sh64_dcache_purge_user_pages(mm, start, end); | 539 | sh64_dcache_purge_user_pages(mm, start, end); |
| 714 | } | 540 | } |
| 715 | return; | ||
| 716 | } | 541 | } |
| 717 | 542 | ||
| 718 | static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end) | 543 | /* |
| 544 | * Purge the range of addresses from the D-cache. | ||
| 545 | * | ||
| 546 | * The addresses lie in the superpage mapping. There's no harm if we | ||
| 547 | * overpurge at either end - just a small performance loss. | ||
| 548 | */ | ||
| 549 | void __flush_purge_region(void *start, int size) | ||
| 719 | { | 550 | { |
| 720 | unsigned long long aligned_start; | 551 | unsigned long long ullend, addr, aligned_start; |
| 721 | unsigned long long ull_end; | ||
| 722 | unsigned long long addr; | ||
| 723 | |||
| 724 | ull_end = end; | ||
| 725 | 552 | ||
| 726 | /* Just wback over the range using the natural addresses. TLB miss | 553 | aligned_start = (unsigned long long)(signed long long)(signed long) start; |
| 727 | handling will be OK (TBC) : the range has just been written to by | 554 | addr = L1_CACHE_ALIGN(aligned_start); |
| 728 | the signal frame setup code, so the PTEs must exist. | 555 | ullend = (unsigned long long) (signed long long) (signed long) start + size; |
| 729 | 556 | ||
| 730 | Note, if we have CONFIG_PREEMPT and get preempted inside this loop, | 557 | while (addr <= ullend) { |
| 731 | it doesn't matter, even if the pid->ASID mapping changes whilst | 558 | __asm__ __volatile__ ("ocbp %0, 0" : : "r" (addr)); |
| 732 | we're away. In that case the cache will have been flushed when the | ||
| 733 | mapping was renewed. So the writebacks below will be nugatory (and | ||
| 734 | we'll doubtless have to fault the TLB entry/ies in again with the | ||
| 735 | new ASID), but it's a rare case. | ||
| 736 | */ | ||
| 737 | aligned_start = start & L1_CACHE_ALIGN_MASK; | ||
| 738 | addr = aligned_start; | ||
| 739 | while (addr < ull_end) { | ||
| 740 | asm __volatile__ ("ocbwb %0, 0" : : "r" (addr)); | ||
| 741 | addr += L1_CACHE_BYTES; | 559 | addr += L1_CACHE_BYTES; |
| 742 | } | 560 | } |
| 743 | } | 561 | } |
| 744 | 562 | ||
| 745 | /****************************************************************************/ | 563 | void __flush_wback_region(void *start, int size) |
| 746 | |||
| 747 | /* These *MUST* lie in an area of virtual address space that's otherwise unused. */ | ||
| 748 | #define UNIQUE_EADDR_START 0xe0000000UL | ||
| 749 | #define UNIQUE_EADDR_END 0xe8000000UL | ||
| 750 | |||
| 751 | static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr) | ||
| 752 | { | 564 | { |
| 753 | /* Given a physical address paddr, and a user virtual address | 565 | unsigned long long ullend, addr, aligned_start; |
| 754 | user_eaddr which will eventually be mapped to it, create a one-off | ||
| 755 | kernel-private eaddr mapped to the same paddr. This is used for | ||
| 756 | creating special destination pages for copy_user_page and | ||
| 757 | clear_user_page */ | ||
| 758 | 566 | ||
| 759 | static unsigned long current_pointer = UNIQUE_EADDR_START; | 567 | aligned_start = (unsigned long long)(signed long long)(signed long) start; |
| 760 | unsigned long coloured_pointer; | 568 | addr = L1_CACHE_ALIGN(aligned_start); |
| 569 | ullend = (unsigned long long) (signed long long) (signed long) start + size; | ||
| 761 | 570 | ||
| 762 | if (current_pointer == UNIQUE_EADDR_END) { | 571 | while (addr < ullend) { |
| 763 | sh64_dcache_purge_all(); | 572 | __asm__ __volatile__ ("ocbwb %0, 0" : : "r" (addr)); |
| 764 | current_pointer = UNIQUE_EADDR_START; | 573 | addr += L1_CACHE_BYTES; |
| 765 | } | 574 | } |
| 766 | |||
| 767 | coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK); | ||
| 768 | sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr); | ||
| 769 | |||
| 770 | current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS); | ||
| 771 | |||
| 772 | return coloured_pointer; | ||
| 773 | } | ||
| 774 | |||
| 775 | /****************************************************************************/ | ||
| 776 | |||
| 777 | static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address) | ||
| 778 | { | ||
| 779 | void *coloured_to; | ||
| 780 | |||
| 781 | /* Discard any existing cache entries of the wrong colour. These are | ||
| 782 | present quite often, if the kernel has recently used the page | ||
| 783 | internally, then given it up, then it's been allocated to the user. | ||
| 784 | */ | ||
| 785 | sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to); | ||
| 786 | |||
| 787 | coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to)); | ||
| 788 | sh64_page_copy(from, coloured_to); | ||
| 789 | |||
| 790 | sh64_teardown_dtlb_cache_slot(); | ||
| 791 | } | 575 | } |
| 792 | 576 | ||
| 793 | static void sh64_clear_user_page_coloured(void *to, unsigned long address) | 577 | void __flush_invalidate_region(void *start, int size) |
| 794 | { | 578 | { |
| 795 | void *coloured_to; | 579 | unsigned long long ullend, addr, aligned_start; |
| 796 | |||
| 797 | /* Discard any existing kernel-originated lines of the wrong colour (as | ||
| 798 | above) */ | ||
| 799 | sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to); | ||
| 800 | 580 | ||
| 801 | coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to)); | 581 | aligned_start = (unsigned long long)(signed long long)(signed long) start; |
| 802 | sh64_page_clear(coloured_to); | 582 | addr = L1_CACHE_ALIGN(aligned_start); |
| 583 | ullend = (unsigned long long) (signed long long) (signed long) start + size; | ||
| 803 | 584 | ||
| 804 | sh64_teardown_dtlb_cache_slot(); | 585 | while (addr < ullend) { |
| 586 | __asm__ __volatile__ ("ocbi %0, 0" : : "r" (addr)); | ||
| 587 | addr += L1_CACHE_BYTES; | ||
| 588 | } | ||
| 805 | } | 589 | } |
| 806 | |||
| 807 | #endif /* !CONFIG_DCACHE_DISABLED */ | 590 | #endif /* !CONFIG_DCACHE_DISABLED */ |
| 808 | 591 | ||
| 809 | /****************************************************************************/ | 592 | /* |
| 810 | 593 | * Invalidate the entire contents of both caches, after writing back to | |
| 811 | /*########################################################################## | 594 | * memory any dirty data from the D-cache. |
| 812 | EXTERNALLY CALLABLE API. | 595 | */ |
| 813 | ##########################################################################*/ | ||
| 814 | |||
| 815 | /* These functions are described in Documentation/cachetlb.txt. | ||
| 816 | Each one of these functions varies in behaviour depending on whether the | ||
| 817 | I-cache and/or D-cache are configured out. | ||
| 818 | |||
| 819 | Note that the Linux term 'flush' corresponds to what is termed 'purge' in | ||
| 820 | the sh/sh64 jargon for the D-cache, i.e. write back dirty data then | ||
| 821 | invalidate the cache lines, and 'invalidate' for the I-cache. | ||
| 822 | */ | ||
| 823 | |||
| 824 | #undef FLUSH_TRACE | ||
| 825 | |||
| 826 | void flush_cache_all(void) | 596 | void flush_cache_all(void) |
| 827 | { | 597 | { |
| 828 | /* Invalidate the entire contents of both caches, after writing back to | ||
| 829 | memory any dirty data from the D-cache. */ | ||
| 830 | sh64_dcache_purge_all(); | 598 | sh64_dcache_purge_all(); |
| 831 | sh64_icache_inv_all(); | 599 | sh64_icache_inv_all(); |
| 832 | } | 600 | } |
| 833 | 601 | ||
| 834 | /****************************************************************************/ | 602 | /* |
| 835 | 603 | * Invalidate an entire user-address space from both caches, after | |
| 604 | * writing back dirty data (e.g. for shared mmap etc). | ||
| 605 | * | ||
| 606 | * This could be coded selectively by inspecting all the tags then | ||
| 607 | * doing 4*alloco on any set containing a match (as for | ||
| 608 | * flush_cache_range), but fork/exit/execve (where this is called from) | ||
| 609 | * are expensive anyway. | ||
| 610 | * | ||
| 611 | * Have to do a purge here, despite the comments re I-cache below. | ||
| 612 | * There could be odd-coloured dirty data associated with the mm still | ||
| 613 | * in the cache - if this gets written out through natural eviction | ||
| 614 | * after the kernel has reused the page there will be chaos. | ||
| 615 | * | ||
| 616 | * The mm being torn down won't ever be active again, so any Icache | ||
| 617 | * lines tagged with its ASID won't be visible for the rest of the | ||
| 618 | * lifetime of this ASID cycle. Before the ASID gets reused, there | ||
| 619 | * will be a flush_cache_all. Hence we don't need to touch the | ||
| 620 | * I-cache. This is similar to the lack of action needed in | ||
| 621 | * flush_tlb_mm - see fault.c. | ||
| 622 | */ | ||
| 836 | void flush_cache_mm(struct mm_struct *mm) | 623 | void flush_cache_mm(struct mm_struct *mm) |
| 837 | { | 624 | { |
| 838 | /* Invalidate an entire user-address space from both caches, after | ||
| 839 | writing back dirty data (e.g. for shared mmap etc). */ | ||
| 840 | |||
| 841 | /* This could be coded selectively by inspecting all the tags then | ||
| 842 | doing 4*alloco on any set containing a match (as for | ||
| 843 | flush_cache_range), but fork/exit/execve (where this is called from) | ||
| 844 | are expensive anyway. */ | ||
| 845 | |||
| 846 | /* Have to do a purge here, despite the comments re I-cache below. | ||
| 847 | There could be odd-coloured dirty data associated with the mm still | ||
| 848 | in the cache - if this gets written out through natural eviction | ||
| 849 | after the kernel has reused the page there will be chaos. | ||
| 850 | */ | ||
| 851 | |||
| 852 | sh64_dcache_purge_all(); | 625 | sh64_dcache_purge_all(); |
| 853 | |||
| 854 | /* The mm being torn down won't ever be active again, so any Icache | ||
| 855 | lines tagged with its ASID won't be visible for the rest of the | ||
| 856 | lifetime of this ASID cycle. Before the ASID gets reused, there | ||
| 857 | will be a flush_cache_all. Hence we don't need to touch the | ||
| 858 | I-cache. This is similar to the lack of action needed in | ||
| 859 | flush_tlb_mm - see fault.c. */ | ||
| 860 | } | 626 | } |
| 861 | 627 | ||
| 862 | /****************************************************************************/ | 628 | /* |
| 863 | 629 | * Invalidate (from both caches) the range [start,end) of virtual | |
| 630 | * addresses from the user address space specified by mm, after writing | ||
| 631 | * back any dirty data. | ||
| 632 | * | ||
| 633 | * Note, 'end' is 1 byte beyond the end of the range to flush. | ||
| 634 | */ | ||
| 864 | void flush_cache_range(struct vm_area_struct *vma, unsigned long start, | 635 | void flush_cache_range(struct vm_area_struct *vma, unsigned long start, |
| 865 | unsigned long end) | 636 | unsigned long end) |
| 866 | { | 637 | { |
| 867 | struct mm_struct *mm = vma->vm_mm; | 638 | struct mm_struct *mm = vma->vm_mm; |
| 868 | 639 | ||
| 869 | /* Invalidate (from both caches) the range [start,end) of virtual | ||
| 870 | addresses from the user address space specified by mm, after writing | ||
| 871 | back any dirty data. | ||
| 872 | |||
| 873 | Note, 'end' is 1 byte beyond the end of the range to flush. */ | ||
| 874 | |||
| 875 | sh64_dcache_purge_user_range(mm, start, end); | 640 | sh64_dcache_purge_user_range(mm, start, end); |
| 876 | sh64_icache_inv_user_page_range(mm, start, end); | 641 | sh64_icache_inv_user_page_range(mm, start, end); |
| 877 | } | 642 | } |
| 878 | 643 | ||
| 879 | /****************************************************************************/ | 644 | /* |
| 880 | 645 | * Invalidate any entries in either cache for the vma within the user | |
| 881 | void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn) | 646 | * address space vma->vm_mm for the page starting at virtual address |
| 647 | * 'eaddr'. This seems to be used primarily in breaking COW. Note, | ||
| 648 | * the I-cache must be searched too in case the page in question is | ||
| 649 | * both writable and being executed from (e.g. stack trampolines.) | ||
| 650 | * | ||
| 651 | * Note, this is called with pte lock held. | ||
| 652 | */ | ||
| 653 | void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, | ||
| 654 | unsigned long pfn) | ||
| 882 | { | 655 | { |
| 883 | /* Invalidate any entries in either cache for the vma within the user | ||
| 884 | address space vma->vm_mm for the page starting at virtual address | ||
| 885 | 'eaddr'. This seems to be used primarily in breaking COW. Note, | ||
| 886 | the I-cache must be searched too in case the page in question is | ||
| 887 | both writable and being executed from (e.g. stack trampolines.) | ||
| 888 | |||
| 889 | Note, this is called with pte lock held. | ||
| 890 | */ | ||
| 891 | |||
| 892 | sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT); | 656 | sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT); |
| 893 | 657 | ||
| 894 | if (vma->vm_flags & VM_EXEC) { | 658 | if (vma->vm_flags & VM_EXEC) |
| 895 | sh64_icache_inv_user_page(vma, eaddr); | 659 | sh64_icache_inv_user_page(vma, eaddr); |
| 896 | } | ||
| 897 | } | 660 | } |
| 898 | 661 | ||
| 899 | /****************************************************************************/ | 662 | void flush_dcache_page(struct page *page) |
| 663 | { | ||
| 664 | sh64_dcache_purge_phy_page(page_to_phys(page)); | ||
| 665 | wmb(); | ||
| 666 | } | ||
| 900 | 667 | ||
| 901 | #ifndef CONFIG_DCACHE_DISABLED | 668 | /* |
| 669 | * Flush the range [start,end] of kernel virtual adddress space from | ||
| 670 | * the I-cache. The corresponding range must be purged from the | ||
| 671 | * D-cache also because the SH-5 doesn't have cache snooping between | ||
| 672 | * the caches. The addresses will be visible through the superpage | ||
| 673 | * mapping, therefore it's guaranteed that there no cache entries for | ||
| 674 | * the range in cache sets of the wrong colour. | ||
| 675 | */ | ||
| 676 | void flush_icache_range(unsigned long start, unsigned long end) | ||
| 677 | { | ||
| 678 | __flush_purge_region((void *)start, end); | ||
| 679 | wmb(); | ||
| 680 | sh64_icache_inv_kernel_range(start, end); | ||
| 681 | } | ||
| 902 | 682 | ||
| 903 | void copy_user_page(void *to, void *from, unsigned long address, struct page *page) | 683 | /* |
| 684 | * Flush the range of user (defined by vma->vm_mm) address space starting | ||
| 685 | * at 'addr' for 'len' bytes from the cache. The range does not straddle | ||
| 686 | * a page boundary, the unique physical page containing the range is | ||
| 687 | * 'page'. This seems to be used mainly for invalidating an address | ||
| 688 | * range following a poke into the program text through the ptrace() call | ||
| 689 | * from another process (e.g. for BRK instruction insertion). | ||
| 690 | */ | ||
| 691 | void flush_icache_user_range(struct vm_area_struct *vma, | ||
| 692 | struct page *page, unsigned long addr, int len) | ||
| 904 | { | 693 | { |
| 905 | /* 'from' and 'to' are kernel virtual addresses (within the superpage | ||
| 906 | mapping of the physical RAM). 'address' is the user virtual address | ||
| 907 | where the copy 'to' will be mapped after. This allows a custom | ||
| 908 | mapping to be used to ensure that the new copy is placed in the | ||
| 909 | right cache sets for the user to see it without having to bounce it | ||
| 910 | out via memory. Note however : the call to flush_page_to_ram in | ||
| 911 | (generic)/mm/memory.c:(break_cow) undoes all this good work in that one | ||
| 912 | very important case! | ||
| 913 | |||
| 914 | TBD : can we guarantee that on every call, any cache entries for | ||
| 915 | 'from' are in the same colour sets as 'address' also? i.e. is this | ||
| 916 | always used just to deal with COW? (I suspect not). */ | ||
| 917 | |||
| 918 | /* There are two possibilities here for when the page 'from' was last accessed: | ||
| 919 | * by the kernel : this is OK, no purge required. | ||
| 920 | * by the/a user (e.g. for break_COW) : need to purge. | ||
| 921 | |||
| 922 | If the potential user mapping at 'address' is the same colour as | ||
| 923 | 'from' there is no need to purge any cache lines from the 'from' | ||
| 924 | page mapped into cache sets of colour 'address'. (The copy will be | ||
| 925 | accessing the page through 'from'). | ||
| 926 | */ | ||
| 927 | 694 | ||
| 928 | if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) { | 695 | sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr); |
| 929 | sh64_dcache_purge_coloured_phy_page(__pa(from), address); | 696 | mb(); |
| 930 | } | ||
| 931 | 697 | ||
| 932 | if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { | 698 | if (vma->vm_flags & VM_EXEC) |
| 933 | /* No synonym problem on destination */ | 699 | sh64_icache_inv_user_small_range(vma->vm_mm, addr, len); |
| 934 | sh64_page_copy(from, to); | 700 | } |
| 935 | } else { | 701 | |
| 936 | sh64_copy_user_page_coloured(to, from, address); | 702 | /* |
| 937 | } | 703 | * For the address range [start,end), write back the data from the |
| 704 | * D-cache and invalidate the corresponding region of the I-cache for the | ||
| 705 | * current process. Used to flush signal trampolines on the stack to | ||
| 706 | * make them executable. | ||
| 707 | */ | ||
| 708 | void flush_cache_sigtramp(unsigned long vaddr) | ||
| 709 | { | ||
| 710 | unsigned long end = vaddr + L1_CACHE_BYTES; | ||
| 938 | 711 | ||
| 939 | /* Note, don't need to flush 'from' page from the cache again - it's | 712 | __flush_wback_region((void *)vaddr, L1_CACHE_BYTES); |
| 940 | done anyway by the generic code */ | 713 | wmb(); |
| 714 | sh64_icache_inv_current_user_range(vaddr, end); | ||
| 941 | } | 715 | } |
| 942 | 716 | ||
| 943 | void clear_user_page(void *to, unsigned long address, struct page *page) | 717 | /* |
| 718 | * These *MUST* lie in an area of virtual address space that's otherwise | ||
| 719 | * unused. | ||
| 720 | */ | ||
| 721 | #define UNIQUE_EADDR_START 0xe0000000UL | ||
| 722 | #define UNIQUE_EADDR_END 0xe8000000UL | ||
| 723 | |||
| 724 | /* | ||
| 725 | * Given a physical address paddr, and a user virtual address user_eaddr | ||
| 726 | * which will eventually be mapped to it, create a one-off kernel-private | ||
| 727 | * eaddr mapped to the same paddr. This is used for creating special | ||
| 728 | * destination pages for copy_user_page and clear_user_page. | ||
| 729 | */ | ||
| 730 | static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, | ||
| 731 | unsigned long paddr) | ||
| 944 | { | 732 | { |
| 945 | /* 'to' is a kernel virtual address (within the superpage | 733 | static unsigned long current_pointer = UNIQUE_EADDR_START; |
| 946 | mapping of the physical RAM). 'address' is the user virtual address | 734 | unsigned long coloured_pointer; |
| 947 | where the 'to' page will be mapped after. This allows a custom | ||
| 948 | mapping to be used to ensure that the new copy is placed in the | ||
| 949 | right cache sets for the user to see it without having to bounce it | ||
| 950 | out via memory. | ||
| 951 | */ | ||
| 952 | 735 | ||
| 953 | if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { | 736 | if (current_pointer == UNIQUE_EADDR_END) { |
| 954 | /* No synonym problem on destination */ | 737 | sh64_dcache_purge_all(); |
| 955 | sh64_page_clear(to); | 738 | current_pointer = UNIQUE_EADDR_START; |
| 956 | } else { | ||
| 957 | sh64_clear_user_page_coloured(to, address); | ||
| 958 | } | 739 | } |
| 959 | } | ||
| 960 | 740 | ||
| 961 | #endif /* !CONFIG_DCACHE_DISABLED */ | 741 | coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | |
| 742 | (user_eaddr & CACHE_OC_SYN_MASK); | ||
| 743 | sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr); | ||
| 962 | 744 | ||
| 963 | /****************************************************************************/ | 745 | current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS); |
| 964 | 746 | ||
| 965 | void flush_dcache_page(struct page *page) | 747 | return coloured_pointer; |
| 966 | { | ||
| 967 | sh64_dcache_purge_phy_page(page_to_phys(page)); | ||
| 968 | wmb(); | ||
| 969 | } | 748 | } |
| 970 | 749 | ||
| 971 | /****************************************************************************/ | 750 | static void sh64_copy_user_page_coloured(void *to, void *from, |
| 972 | 751 | unsigned long address) | |
| 973 | void flush_icache_range(unsigned long start, unsigned long end) | ||
| 974 | { | 752 | { |
| 975 | /* Flush the range [start,end] of kernel virtual adddress space from | 753 | void *coloured_to; |
| 976 | the I-cache. The corresponding range must be purged from the | ||
| 977 | D-cache also because the SH-5 doesn't have cache snooping between | ||
| 978 | the caches. The addresses will be visible through the superpage | ||
| 979 | mapping, therefore it's guaranteed that there no cache entries for | ||
| 980 | the range in cache sets of the wrong colour. | ||
| 981 | 754 | ||
| 982 | Primarily used for cohering the I-cache after a module has | 755 | /* |
| 983 | been loaded. */ | 756 | * Discard any existing cache entries of the wrong colour. These are |
| 757 | * present quite often, if the kernel has recently used the page | ||
| 758 | * internally, then given it up, then it's been allocated to the user. | ||
| 759 | */ | ||
| 760 | sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long)to); | ||
| 984 | 761 | ||
| 985 | /* We also make sure to purge the same range from the D-cache since | 762 | coloured_to = (void *)sh64_make_unique_eaddr(address, __pa(to)); |
| 986 | flush_page_to_ram() won't be doing this for us! */ | 763 | copy_page(from, coloured_to); |
| 987 | 764 | ||
| 988 | sh64_dcache_purge_kernel_range(start, end); | 765 | sh64_teardown_dtlb_cache_slot(); |
| 989 | wmb(); | ||
| 990 | sh64_icache_inv_kernel_range(start, end); | ||
| 991 | } | 766 | } |
| 992 | 767 | ||
| 993 | /****************************************************************************/ | 768 | static void sh64_clear_user_page_coloured(void *to, unsigned long address) |
| 994 | |||
| 995 | void flush_icache_user_range(struct vm_area_struct *vma, | ||
| 996 | struct page *page, unsigned long addr, int len) | ||
| 997 | { | 769 | { |
| 998 | /* Flush the range of user (defined by vma->vm_mm) address space | 770 | void *coloured_to; |
| 999 | starting at 'addr' for 'len' bytes from the cache. The range does | ||
| 1000 | not straddle a page boundary, the unique physical page containing | ||
| 1001 | the range is 'page'. This seems to be used mainly for invalidating | ||
| 1002 | an address range following a poke into the program text through the | ||
| 1003 | ptrace() call from another process (e.g. for BRK instruction | ||
| 1004 | insertion). */ | ||
| 1005 | 771 | ||
| 1006 | sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr); | 772 | /* |
| 1007 | mb(); | 773 | * Discard any existing kernel-originated lines of the wrong |
| 774 | * colour (as above) | ||
| 775 | */ | ||
| 776 | sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long)to); | ||
| 1008 | 777 | ||
| 1009 | if (vma->vm_flags & VM_EXEC) { | 778 | coloured_to = (void *)sh64_make_unique_eaddr(address, __pa(to)); |
| 1010 | sh64_icache_inv_user_small_range(vma->vm_mm, addr, len); | 779 | clear_page(coloured_to); |
| 1011 | } | ||
| 1012 | } | ||
| 1013 | 780 | ||
| 1014 | /*########################################################################## | 781 | sh64_teardown_dtlb_cache_slot(); |
| 1015 | ARCH/SH64 PRIVATE CALLABLE API. | 782 | } |
| 1016 | ##########################################################################*/ | ||
| 1017 | 783 | ||
| 1018 | void flush_cache_sigtramp(unsigned long start, unsigned long end) | 784 | /* |
| 785 | * 'from' and 'to' are kernel virtual addresses (within the superpage | ||
| 786 | * mapping of the physical RAM). 'address' is the user virtual address | ||
| 787 | * where the copy 'to' will be mapped after. This allows a custom | ||
| 788 | * mapping to be used to ensure that the new copy is placed in the | ||
| 789 | * right cache sets for the user to see it without having to bounce it | ||
| 790 | * out via memory. Note however : the call to flush_page_to_ram in | ||
| 791 | * (generic)/mm/memory.c:(break_cow) undoes all this good work in that one | ||
| 792 | * very important case! | ||
| 793 | * | ||
| 794 | * TBD : can we guarantee that on every call, any cache entries for | ||
| 795 | * 'from' are in the same colour sets as 'address' also? i.e. is this | ||
| 796 | * always used just to deal with COW? (I suspect not). | ||
| 797 | * | ||
| 798 | * There are two possibilities here for when the page 'from' was last accessed: | ||
| 799 | * - by the kernel : this is OK, no purge required. | ||
| 800 | * - by the/a user (e.g. for break_COW) : need to purge. | ||
| 801 | * | ||
| 802 | * If the potential user mapping at 'address' is the same colour as | ||
| 803 | * 'from' there is no need to purge any cache lines from the 'from' | ||
| 804 | * page mapped into cache sets of colour 'address'. (The copy will be | ||
| 805 | * accessing the page through 'from'). | ||
| 806 | */ | ||
| 807 | void copy_user_page(void *to, void *from, unsigned long address, | ||
| 808 | struct page *page) | ||
| 1019 | { | 809 | { |
| 1020 | /* For the address range [start,end), write back the data from the | 810 | if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) |
| 1021 | D-cache and invalidate the corresponding region of the I-cache for | 811 | sh64_dcache_purge_coloured_phy_page(__pa(from), address); |
| 1022 | the current process. Used to flush signal trampolines on the stack | ||
| 1023 | to make them executable. */ | ||
| 1024 | 812 | ||
| 1025 | sh64_dcache_wback_current_user_range(start, end); | 813 | if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) |
| 1026 | wmb(); | 814 | copy_page(to, from); |
| 1027 | sh64_icache_inv_current_user_range(start, end); | 815 | else |
| 816 | sh64_copy_user_page_coloured(to, from, address); | ||
| 1028 | } | 817 | } |
| 1029 | 818 | ||
| 819 | /* | ||
| 820 | * 'to' is a kernel virtual address (within the superpage mapping of the | ||
| 821 | * physical RAM). 'address' is the user virtual address where the 'to' | ||
| 822 | * page will be mapped after. This allows a custom mapping to be used to | ||
| 823 | * ensure that the new copy is placed in the right cache sets for the | ||
| 824 | * user to see it without having to bounce it out via memory. | ||
| 825 | */ | ||
| 826 | void clear_user_page(void *to, unsigned long address, struct page *page) | ||
| 827 | { | ||
| 828 | if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) | ||
| 829 | clear_page(to); | ||
| 830 | else | ||
| 831 | sh64_clear_user_page_coloured(to, address); | ||
| 832 | } | ||
