aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2008-02-13 06:14:10 -0500
committerPaul Mundt <lethal@linux-sh.org>2008-02-14 00:22:12 -0500
commit38350e0a00f973dd9c6556beeff0f7eb5ef3f58b (patch)
treea419a9c9b45a6cb20a2f4f809f628c10a3ce9ab9
parent5c8f82c64941594cdab53bf9f9a66c190781f4f6 (diff)
sh: Get SH-5 caches working again post-unification.
A number of cleanups to get the SH-5 cache management code in line with the rest of the SH backend. Signed-off-by: Paul Mundt <lethal@linux-sh.org>
-rw-r--r--arch/sh/kernel/cpu/sh5/probe.c61
-rw-r--r--arch/sh/mm/cache-sh5.c1021
-rw-r--r--include/asm-sh/cpu-sh5/cacheflush.h4
-rw-r--r--include/asm-sh/mmu_context_64.h3
-rw-r--r--include/asm-sh/page.h7
5 files changed, 448 insertions, 648 deletions
diff --git a/arch/sh/kernel/cpu/sh5/probe.c b/arch/sh/kernel/cpu/sh5/probe.c
index 15d167fd0ae..31f8cb0f637 100644
--- a/arch/sh/kernel/cpu/sh5/probe.c
+++ b/arch/sh/kernel/cpu/sh5/probe.c
@@ -20,19 +20,18 @@ int __init detect_cpu_and_cache_system(void)
20{ 20{
21 unsigned long long cir; 21 unsigned long long cir;
22 22
23 /* Do peeks in real mode to avoid having to set up a mapping for the 23 /*
24 WPC registers. On SH5-101 cut2, such a mapping would be exposed to 24 * Do peeks in real mode to avoid having to set up a mapping for
25 an address translation erratum which would make it hard to set up 25 * the WPC registers. On SH5-101 cut2, such a mapping would be
26 correctly. */ 26 * exposed to an address translation erratum which would make it
27 * hard to set up correctly.
28 */
27 cir = peek_real_address_q(0x0d000008); 29 cir = peek_real_address_q(0x0d000008);
28 if ((cir & 0xffff) == 0x5103) { 30 if ((cir & 0xffff) == 0x5103)
29 boot_cpu_data.type = CPU_SH5_103; 31 boot_cpu_data.type = CPU_SH5_103;
30 } else if (((cir >> 32) & 0xffff) == 0x51e2) { 32 else if (((cir >> 32) & 0xffff) == 0x51e2)
31 /* CPU.VCR aliased at CIR address on SH5-101 */ 33 /* CPU.VCR aliased at CIR address on SH5-101 */
32 boot_cpu_data.type = CPU_SH5_101; 34 boot_cpu_data.type = CPU_SH5_101;
33 } else {
34 boot_cpu_data.type = CPU_SH_NONE;
35 }
36 35
37 /* 36 /*
38 * First, setup some sane values for the I-cache. 37 * First, setup some sane values for the I-cache.
@@ -40,37 +39,33 @@ int __init detect_cpu_and_cache_system(void)
40 boot_cpu_data.icache.ways = 4; 39 boot_cpu_data.icache.ways = 4;
41 boot_cpu_data.icache.sets = 256; 40 boot_cpu_data.icache.sets = 256;
42 boot_cpu_data.icache.linesz = L1_CACHE_BYTES; 41 boot_cpu_data.icache.linesz = L1_CACHE_BYTES;
42 boot_cpu_data.icache.way_incr = (1 << 13);
43 boot_cpu_data.icache.entry_shift = 5;
44 boot_cpu_data.icache.way_size = boot_cpu_data.icache.sets *
45 boot_cpu_data.icache.linesz;
46 boot_cpu_data.icache.entry_mask = 0x1fe0;
47 boot_cpu_data.icache.flags = 0;
43 48
44#if 0
45 /* 49 /*
46 * FIXME: This can probably be cleaned up a bit as well.. for example, 50 * Next, setup some sane values for the D-cache.
47 * do we really need the way shift _and_ the way_step_shift ?? Judging 51 *
48 * by the existing code, I would guess no.. is there any valid reason 52 * On the SH5, these are pretty consistent with the I-cache settings,
49 * why we need to be tracking this around? 53 * so we just copy over the existing definitions.. these can be fixed
54 * up later, especially if we add runtime CPU probing.
55 *
56 * Though in the meantime it saves us from having to duplicate all of
57 * the above definitions..
50 */ 58 */
51 boot_cpu_data.icache.way_shift = 13; 59 boot_cpu_data.dcache = boot_cpu_data.icache;
52 boot_cpu_data.icache.entry_shift = 5;
53 boot_cpu_data.icache.set_shift = 4;
54 boot_cpu_data.icache.way_step_shift = 16;
55 boot_cpu_data.icache.asid_shift = 2;
56 60
57 /* 61 /*
58 * way offset = cache size / associativity, so just don't factor in 62 * Setup any cache-related flags here
59 * associativity in the first place..
60 */ 63 */
61 boot_cpu_data.icache.way_ofs = boot_cpu_data.icache.sets * 64#if defined(CONFIG_CACHE_WRITETHROUGH)
62 boot_cpu_data.icache.linesz; 65 set_bit(SH_CACHE_MODE_WT, &(boot_cpu_data.dcache.flags));
63 66#elif defined(CONFIG_CACHE_WRITEBACK)
64 boot_cpu_data.icache.asid_mask = 0x3fc; 67 set_bit(SH_CACHE_MODE_WB, &(boot_cpu_data.dcache.flags));
65 boot_cpu_data.icache.idx_mask = 0x1fe0;
66 boot_cpu_data.icache.epn_mask = 0xffffe000;
67#endif 68#endif
68 69
69 boot_cpu_data.icache.flags = 0;
70
71 /* A trivial starting point.. */
72 memcpy(&boot_cpu_data.dcache,
73 &boot_cpu_data.icache, sizeof(struct cache_info));
74
75 return 0; 70 return 0;
76} 71}
diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c
index 5d1f615fe52..3877321fced 100644
--- a/arch/sh/mm/cache-sh5.c
+++ b/arch/sh/mm/cache-sh5.c
@@ -1,10 +1,10 @@
1/* 1/*
2 * arch/sh/mm/cache-sh5.c 2 * arch/sh/mm/cache-sh5.c
3 * 3 *
4 * Original version Copyright (C) 2000, 2001 Paolo Alberelli 4 * Copyright (C) 2000, 2001 Paolo Alberelli
5 * Second version Copyright (C) benedict.gaster@superh.com 2002 5 * Copyright (C) 2002 Benedict Gaster
6 * Third version Copyright Richard.Curnow@superh.com 2003 6 * Copyright (C) 2003 Richard Curnow
7 * Hacks to third version Copyright (C) 2003 Paul Mundt 7 * Copyright (C) 2003 - 2008 Paul Mundt
8 * 8 *
9 * This file is subject to the terms and conditions of the GNU General Public 9 * This file is subject to the terms and conditions of the GNU General Public
10 * License. See the file "COPYING" in the main directory of this archive 10 * License. See the file "COPYING" in the main directory of this archive
@@ -13,101 +13,20 @@
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/mman.h> 14#include <linux/mman.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/threads.h> 16#include <asm/tlb.h>
17#include <asm/page.h>
18#include <asm/pgtable.h>
19#include <asm/processor.h> 17#include <asm/processor.h>
20#include <asm/cache.h> 18#include <asm/cache.h>
21#include <asm/tlb.h> 19#include <asm/pgalloc.h>
22#include <asm/io.h>
23#include <asm/uaccess.h> 20#include <asm/uaccess.h>
24#include <asm/mmu_context.h> 21#include <asm/mmu_context.h>
25#include <asm/pgalloc.h> /* for flush_itlb_range */
26
27#include <linux/proc_fs.h>
28
29/* This function is in entry.S */
30extern unsigned long switch_and_save_asid(unsigned long new_asid);
31 22
32/* Wired TLB entry for the D-cache */ 23/* Wired TLB entry for the D-cache */
33static unsigned long long dtlb_cache_slot; 24static unsigned long long dtlb_cache_slot;
34 25
35/** 26void __init p3_cache_init(void)
36 * sh64_cache_init()
37 *
38 * This is pretty much just a straightforward clone of the SH
39 * detect_cpu_and_cache_system().
40 *
41 * This function is responsible for setting up all of the cache
42 * info dynamically as well as taking care of CPU probing and
43 * setting up the relevant subtype data.
44 *
45 * FIXME: For the time being, we only really support the SH5-101
46 * out of the box, and don't support dynamic probing for things
47 * like the SH5-103 or even cut2 of the SH5-101. Implement this
48 * later!
49 */
50int __init sh64_cache_init(void)
51{ 27{
52 /* 28 /* Reserve a slot for dcache colouring in the DTLB */
53 * First, setup some sane values for the I-cache. 29 dtlb_cache_slot = sh64_get_wired_dtlb_entry();
54 */
55 cpu_data->icache.ways = 4;
56 cpu_data->icache.sets = 256;
57 cpu_data->icache.linesz = L1_CACHE_BYTES;
58
59 /*
60 * FIXME: This can probably be cleaned up a bit as well.. for example,
61 * do we really need the way shift _and_ the way_step_shift ?? Judging
62 * by the existing code, I would guess no.. is there any valid reason
63 * why we need to be tracking this around?
64 */
65 cpu_data->icache.way_shift = 13;
66 cpu_data->icache.entry_shift = 5;
67 cpu_data->icache.set_shift = 4;
68 cpu_data->icache.way_step_shift = 16;
69 cpu_data->icache.asid_shift = 2;
70
71 /*
72 * way offset = cache size / associativity, so just don't factor in
73 * associativity in the first place..
74 */
75 cpu_data->icache.way_ofs = cpu_data->icache.sets *
76 cpu_data->icache.linesz;
77
78 cpu_data->icache.asid_mask = 0x3fc;
79 cpu_data->icache.idx_mask = 0x1fe0;
80 cpu_data->icache.epn_mask = 0xffffe000;
81 cpu_data->icache.flags = 0;
82
83 /*
84 * Next, setup some sane values for the D-cache.
85 *
86 * On the SH5, these are pretty consistent with the I-cache settings,
87 * so we just copy over the existing definitions.. these can be fixed
88 * up later, especially if we add runtime CPU probing.
89 *
90 * Though in the meantime it saves us from having to duplicate all of
91 * the above definitions..
92 */
93 cpu_data->dcache = cpu_data->icache;
94
95 /*
96 * Setup any cache-related flags here
97 */
98#if defined(CONFIG_DCACHE_WRITE_THROUGH)
99 set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags));
100#elif defined(CONFIG_DCACHE_WRITE_BACK)
101 set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags));
102#endif
103
104 /*
105 * We also need to reserve a slot for the D-cache in the DTLB, so we
106 * do this now ..
107 */
108 dtlb_cache_slot = sh64_get_wired_dtlb_entry();
109
110 return 0;
111} 30}
112 31
113#ifdef CONFIG_DCACHE_DISABLED 32#ifdef CONFIG_DCACHE_DISABLED
@@ -116,73 +35,48 @@ int __init sh64_cache_init(void)
116#define sh64_dcache_purge_user_range(mm, start, end) do { } while (0) 35#define sh64_dcache_purge_user_range(mm, start, end) do { } while (0)
117#define sh64_dcache_purge_phy_page(paddr) do { } while (0) 36#define sh64_dcache_purge_phy_page(paddr) do { } while (0)
118#define sh64_dcache_purge_virt_page(mm, eaddr) do { } while (0) 37#define sh64_dcache_purge_virt_page(mm, eaddr) do { } while (0)
119#define sh64_dcache_purge_kernel_range(start, end) do { } while (0)
120#define sh64_dcache_wback_current_user_range(start, end) do { } while (0)
121#endif 38#endif
122 39
123/*##########################################################################*/ 40/*
124 41 * The following group of functions deal with mapping and unmapping a
125/* From here onwards, a rewrite of the implementation, 42 * temporary page into a DTLB slot that has been set aside for exclusive
126 by Richard.Curnow@superh.com. 43 * use.
127 44 */
128 The major changes in this compared to the old version are; 45static inline void
129 1. use more selective purging through OCBP instead of using ALLOCO to purge 46sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid,
130 by natural replacement. This avoids purging out unrelated cache lines 47 unsigned long paddr)
131 that happen to be in the same set.
132 2. exploit the APIs copy_user_page and clear_user_page better
133 3. be more selective about I-cache purging, in particular use invalidate_all
134 more sparingly.
135
136 */
137
138/*##########################################################################
139 SUPPORT FUNCTIONS
140 ##########################################################################*/
141
142/****************************************************************************/
143/* The following group of functions deal with mapping and unmapping a temporary
144 page into the DTLB slot that have been set aside for our exclusive use. */
145/* In order to accomplish this, we use the generic interface for adding and
146 removing a wired slot entry as defined in arch/sh/mm/tlb-sh5.c */
147/****************************************************************************/
148
149static unsigned long slot_own_flags;
150
151static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr)
152{ 48{
153 local_irq_save(slot_own_flags); 49 local_irq_disable();
154 sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr); 50 sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
155} 51}
156 52
157static inline void sh64_teardown_dtlb_cache_slot(void) 53static inline void sh64_teardown_dtlb_cache_slot(void)
158{ 54{
159 sh64_teardown_tlb_slot(dtlb_cache_slot); 55 sh64_teardown_tlb_slot(dtlb_cache_slot);
160 local_irq_restore(slot_own_flags); 56 local_irq_enable();
161} 57}
162 58
163/****************************************************************************/
164
165#ifndef CONFIG_ICACHE_DISABLED 59#ifndef CONFIG_ICACHE_DISABLED
166 60static inline void sh64_icache_inv_all(void)
167static void __inline__ sh64_icache_inv_all(void)
168{ 61{
169 unsigned long long addr, flag, data; 62 unsigned long long addr, flag, data;
170 unsigned int flags; 63 unsigned int flags;
171 64
172 addr=ICCR0; 65 addr = ICCR0;
173 flag=ICCR0_ICI; 66 flag = ICCR0_ICI;
174 data=0; 67 data = 0;
175 68
176 /* Make this a critical section for safety (probably not strictly necessary.) */ 69 /* Make this a critical section for safety (probably not strictly necessary.) */
177 local_irq_save(flags); 70 local_irq_save(flags);
178 71
179 /* Without %1 it gets unexplicably wrong */ 72 /* Without %1 it gets unexplicably wrong */
180 asm volatile("getcfg %3, 0, %0\n\t" 73 __asm__ __volatile__ (
181 "or %0, %2, %0\n\t" 74 "getcfg %3, 0, %0\n\t"
182 "putcfg %3, 0, %0\n\t" 75 "or %0, %2, %0\n\t"
183 "synci" 76 "putcfg %3, 0, %0\n\t"
184 : "=&r" (data) 77 "synci"
185 : "0" (data), "r" (flag), "r" (addr)); 78 : "=&r" (data)
79 : "0" (data), "r" (flag), "r" (addr));
186 80
187 local_irq_restore(flags); 81 local_irq_restore(flags);
188} 82}
@@ -193,20 +87,12 @@ static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
193 * the addresses lie in the kernel superpage. */ 87 * the addresses lie in the kernel superpage. */
194 88
195 unsigned long long ullend, addr, aligned_start; 89 unsigned long long ullend, addr, aligned_start;
196#if (NEFF == 32)
197 aligned_start = (unsigned long long)(signed long long)(signed long) start; 90 aligned_start = (unsigned long long)(signed long long)(signed long) start;
198#else 91 addr = L1_CACHE_ALIGN(aligned_start);
199#error "NEFF != 32"
200#endif
201 aligned_start &= L1_CACHE_ALIGN_MASK;
202 addr = aligned_start;
203#if (NEFF == 32)
204 ullend = (unsigned long long) (signed long long) (signed long) end; 92 ullend = (unsigned long long) (signed long long) (signed long) end;
205#else 93
206#error "NEFF != 32"
207#endif
208 while (addr <= ullend) { 94 while (addr <= ullend) {
209 asm __volatile__ ("icbi %0, 0" : : "r" (addr)); 95 __asm__ __volatile__ ("icbi %0, 0" : : "r" (addr));
210 addr += L1_CACHE_BYTES; 96 addr += L1_CACHE_BYTES;
211 } 97 }
212} 98}
@@ -215,7 +101,7 @@ static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long
215{ 101{
216 /* If we get called, we know that vma->vm_flags contains VM_EXEC. 102 /* If we get called, we know that vma->vm_flags contains VM_EXEC.
217 Also, eaddr is page-aligned. */ 103 Also, eaddr is page-aligned. */
218 104 unsigned int cpu = smp_processor_id();
219 unsigned long long addr, end_addr; 105 unsigned long long addr, end_addr;
220 unsigned long flags = 0; 106 unsigned long flags = 0;
221 unsigned long running_asid, vma_asid; 107 unsigned long running_asid, vma_asid;
@@ -237,17 +123,17 @@ static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long
237 */ 123 */
238 124
239 running_asid = get_asid(); 125 running_asid = get_asid();
240 vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK); 126 vma_asid = cpu_asid(cpu, vma->vm_mm);
241 if (running_asid != vma_asid) { 127 if (running_asid != vma_asid) {
242 local_irq_save(flags); 128 local_irq_save(flags);
243 switch_and_save_asid(vma_asid); 129 switch_and_save_asid(vma_asid);
244 } 130 }
245 while (addr < end_addr) { 131 while (addr < end_addr) {
246 /* Worth unrolling a little */ 132 /* Worth unrolling a little */
247 asm __volatile__("icbi %0, 0" : : "r" (addr)); 133 __asm__ __volatile__("icbi %0, 0" : : "r" (addr));
248 asm __volatile__("icbi %0, 32" : : "r" (addr)); 134 __asm__ __volatile__("icbi %0, 32" : : "r" (addr));
249 asm __volatile__("icbi %0, 64" : : "r" (addr)); 135 __asm__ __volatile__("icbi %0, 64" : : "r" (addr));
250 asm __volatile__("icbi %0, 96" : : "r" (addr)); 136 __asm__ __volatile__("icbi %0, 96" : : "r" (addr));
251 addr += 128; 137 addr += 128;
252 } 138 }
253 if (running_asid != vma_asid) { 139 if (running_asid != vma_asid) {
@@ -256,8 +142,6 @@ static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long
256 } 142 }
257} 143}
258 144
259/****************************************************************************/
260
261static void sh64_icache_inv_user_page_range(struct mm_struct *mm, 145static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
262 unsigned long start, unsigned long end) 146 unsigned long start, unsigned long end)
263{ 147{
@@ -275,10 +159,10 @@ static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
275 possible with the D-cache. Just assume 64 for now as a working 159 possible with the D-cache. Just assume 64 for now as a working
276 figure. 160 figure.
277 */ 161 */
278
279 int n_pages; 162 int n_pages;
280 163
281 if (!mm) return; 164 if (!mm)
165 return;
282 166
283 n_pages = ((end - start) >> PAGE_SHIFT); 167 n_pages = ((end - start) >> PAGE_SHIFT);
284 if (n_pages >= 64) { 168 if (n_pages >= 64) {
@@ -290,7 +174,7 @@ static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
290 unsigned long mm_asid, current_asid; 174 unsigned long mm_asid, current_asid;
291 unsigned long long flags = 0ULL; 175 unsigned long long flags = 0ULL;
292 176
293 mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; 177 mm_asid = cpu_asid(smp_processor_id(), mm);
294 current_asid = get_asid(); 178 current_asid = get_asid();
295 179
296 if (mm_asid != current_asid) { 180 if (mm_asid != current_asid) {
@@ -322,6 +206,7 @@ static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
322 } 206 }
323 aligned_start = vma->vm_end; /* Skip to start of next region */ 207 aligned_start = vma->vm_end; /* Skip to start of next region */
324 } 208 }
209
325 if (mm_asid != current_asid) { 210 if (mm_asid != current_asid) {
326 switch_and_save_asid(current_asid); 211 switch_and_save_asid(current_asid);
327 local_irq_restore(flags); 212 local_irq_restore(flags);
@@ -329,47 +214,46 @@ static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
329 } 214 }
330} 215}
331 216
217/*
218 * Invalidate a small range of user context I-cache, not necessarily page
219 * (or even cache-line) aligned.
220 *
221 * Since this is used inside ptrace, the ASID in the mm context typically
222 * won't match current_asid. We'll have to switch ASID to do this. For
223 * safety, and given that the range will be small, do all this under cli.
224 *
225 * Note, there is a hazard that the ASID in mm->context is no longer
226 * actually associated with mm, i.e. if the mm->context has started a new
227 * cycle since mm was last active. However, this is just a performance
228 * issue: all that happens is that we invalidate lines belonging to
229 * another mm, so the owning process has to refill them when that mm goes
230 * live again. mm itself can't have any cache entries because there will
231 * have been a flush_cache_all when the new mm->context cycle started.
232 */
332static void sh64_icache_inv_user_small_range(struct mm_struct *mm, 233static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
333 unsigned long start, int len) 234 unsigned long start, int len)
334{ 235{
335
336 /* Invalidate a small range of user context I-cache, not necessarily
337 page (or even cache-line) aligned. */
338
339 unsigned long long eaddr = start; 236 unsigned long long eaddr = start;
340 unsigned long long eaddr_end = start + len; 237 unsigned long long eaddr_end = start + len;
341 unsigned long current_asid, mm_asid; 238 unsigned long current_asid, mm_asid;
342 unsigned long long flags; 239 unsigned long long flags;
343 unsigned long long epage_start; 240 unsigned long long epage_start;
344 241
345 /* Since this is used inside ptrace, the ASID in the mm context 242 /*
346 typically won't match current_asid. We'll have to switch ASID to do 243 * Align to start of cache line. Otherwise, suppose len==8 and
347 this. For safety, and given that the range will be small, do all 244 * start was at 32N+28 : the last 4 bytes wouldn't get invalidated.
348 this under cli. 245 */
349 246 eaddr = L1_CACHE_ALIGN(start);
350 Note, there is a hazard that the ASID in mm->context is no longer
351 actually associated with mm, i.e. if the mm->context has started a
352 new cycle since mm was last active. However, this is just a
353 performance issue: all that happens is that we invalidate lines
354 belonging to another mm, so the owning process has to refill them
355 when that mm goes live again. mm itself can't have any cache
356 entries because there will have been a flush_cache_all when the new
357 mm->context cycle started. */
358
359 /* Align to start of cache line. Otherwise, suppose len==8 and start
360 was at 32N+28 : the last 4 bytes wouldn't get invalidated. */
361 eaddr = start & L1_CACHE_ALIGN_MASK;
362 eaddr_end = start + len; 247 eaddr_end = start + len;
363 248
249 mm_asid = cpu_asid(smp_processor_id(), mm);
364 local_irq_save(flags); 250 local_irq_save(flags);
365 mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
366 current_asid = switch_and_save_asid(mm_asid); 251 current_asid = switch_and_save_asid(mm_asid);
367 252
368 epage_start = eaddr & PAGE_MASK; 253 epage_start = eaddr & PAGE_MASK;
369 254
370 while (eaddr < eaddr_end) 255 while (eaddr < eaddr_end) {
371 { 256 __asm__ __volatile__("icbi %0, 0" : : "r" (eaddr));
372 asm __volatile__("icbi %0, 0" : : "r" (eaddr));
373 eaddr += L1_CACHE_BYTES; 257 eaddr += L1_CACHE_BYTES;
374 } 258 }
375 switch_and_save_asid(current_asid); 259 switch_and_save_asid(current_asid);
@@ -394,30 +278,24 @@ static void sh64_icache_inv_current_user_range(unsigned long start, unsigned lon
394 been recycled since we were last active in which case we might just 278 been recycled since we were last active in which case we might just
395 invalidate another processes I-cache entries : no worries, just a 279 invalidate another processes I-cache entries : no worries, just a
396 performance drop for him. */ 280 performance drop for him. */
397 aligned_start = start & L1_CACHE_ALIGN_MASK; 281 aligned_start = L1_CACHE_ALIGN(start);
398 addr = aligned_start; 282 addr = aligned_start;
399 while (addr < ull_end) { 283 while (addr < ull_end) {
400 asm __volatile__ ("icbi %0, 0" : : "r" (addr)); 284 __asm__ __volatile__ ("icbi %0, 0" : : "r" (addr));
401 asm __volatile__ ("nop"); 285 __asm__ __volatile__ ("nop");
402 asm __volatile__ ("nop"); 286 __asm__ __volatile__ ("nop");
403 addr += L1_CACHE_BYTES; 287 addr += L1_CACHE_BYTES;
404 } 288 }
405} 289}
406
407#endif /* !CONFIG_ICACHE_DISABLED */ 290#endif /* !CONFIG_ICACHE_DISABLED */
408 291
409/****************************************************************************/
410
411#ifndef CONFIG_DCACHE_DISABLED 292#ifndef CONFIG_DCACHE_DISABLED
412
413/* Buffer used as the target of alloco instructions to purge data from cache 293/* Buffer used as the target of alloco instructions to purge data from cache
414 sets by natural eviction. -- RPC */ 294 sets by natural eviction. -- RPC */
415#define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4) 295#define DUMMY_ALLOCO_AREA_SIZE ((L1_CACHE_BYTES << 10) + (1024 * 4))
416static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, }; 296static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
417 297
418/****************************************************************************/ 298static void inline sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
419
420static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
421{ 299{
422 /* Purge all ways in a particular block of sets, specified by the base 300 /* Purge all ways in a particular block of sets, specified by the base
423 set number and number of sets. Can handle wrap-around, if that's 301 set number and number of sets. Can handle wrap-around, if that's
@@ -428,102 +306,86 @@ static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets
428 int j; 306 int j;
429 int set_offset; 307 int set_offset;
430 308
431 dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift; 309 dummy_buffer_base_set = ((int)&dummy_alloco_area &
310 cpu_data->dcache.entry_mask) >>
311 cpu_data->dcache.entry_shift;
432 set_offset = sets_to_purge_base - dummy_buffer_base_set; 312 set_offset = sets_to_purge_base - dummy_buffer_base_set;
433 313
434 for (j=0; j<n_sets; j++, set_offset++) { 314 for (j = 0; j < n_sets; j++, set_offset++) {
435 set_offset &= (cpu_data->dcache.sets - 1); 315 set_offset &= (cpu_data->dcache.sets - 1);
436 eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift); 316 eaddr0 = (unsigned long long)dummy_alloco_area +
437 317 (set_offset << cpu_data->dcache.entry_shift);
438 /* Do one alloco which hits the required set per cache way. For 318
439 write-back mode, this will purge the #ways resident lines. There's 319 /*
440 little point unrolling this loop because the allocos stall more if 320 * Do one alloco which hits the required set per cache
441 they're too close together. */ 321 * way. For write-back mode, this will purge the #ways
442 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; 322 * resident lines. There's little point unrolling this
443 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { 323 * loop because the allocos stall more if they're too
444 asm __volatile__ ("alloco %0, 0" : : "r" (eaddr)); 324 * close together.
445 asm __volatile__ ("synco"); /* TAKum03020 */ 325 */
326 eaddr1 = eaddr0 + cpu_data->dcache.way_size *
327 cpu_data->dcache.ways;
328
329 for (eaddr = eaddr0; eaddr < eaddr1;
330 eaddr += cpu_data->dcache.way_size) {
331 __asm__ __volatile__ ("alloco %0, 0" : : "r" (eaddr));
332 __asm__ __volatile__ ("synco"); /* TAKum03020 */
446 } 333 }
447 334
448 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; 335 eaddr1 = eaddr0 + cpu_data->dcache.way_size *
449 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { 336 cpu_data->dcache.ways;
450 /* Load from each address. Required because alloco is a NOP if 337
451 the cache is write-through. Write-through is a config option. */ 338 for (eaddr = eaddr0; eaddr < eaddr1;
339 eaddr += cpu_data->dcache.way_size) {
340 /*
341 * Load from each address. Required because
342 * alloco is a NOP if the cache is write-through.
343 */
452 if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags))) 344 if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
453 *(volatile unsigned char *)(int)eaddr; 345 ctrl_inb(eaddr);
454 } 346 }
455 } 347 }
456 348
457 /* Don't use OCBI to invalidate the lines. That costs cycles directly. 349 /*
458 If the dummy block is just left resident, it will naturally get 350 * Don't use OCBI to invalidate the lines. That costs cycles
459 evicted as required. */ 351 * directly. If the dummy block is just left resident, it will
460 352 * naturally get evicted as required.
461 return; 353 */
462} 354}
463 355
464/****************************************************************************/ 356/*
465 357 * Purge the entire contents of the dcache. The most efficient way to
358 * achieve this is to use alloco instructions on a region of unused
359 * memory equal in size to the cache, thereby causing the current
360 * contents to be discarded by natural eviction. The alternative, namely
361 * reading every tag, setting up a mapping for the corresponding page and
362 * doing an OCBP for the line, would be much more expensive.
363 */
466static void sh64_dcache_purge_all(void) 364static void sh64_dcache_purge_all(void)
467{ 365{
468 /* Purge the entire contents of the dcache. The most efficient way to
469 achieve this is to use alloco instructions on a region of unused
470 memory equal in size to the cache, thereby causing the current
471 contents to be discarded by natural eviction. The alternative,
472 namely reading every tag, setting up a mapping for the corresponding
473 page and doing an OCBP for the line, would be much more expensive.
474 */
475 366
476 sh64_dcache_purge_sets(0, cpu_data->dcache.sets); 367 sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
477
478 return;
479
480} 368}
481 369
482/****************************************************************************/
483
484static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end)
485{
486 /* Purge the range of addresses [start,end] from the D-cache. The
487 addresses lie in the superpage mapping. There's no harm if we
488 overpurge at either end - just a small performance loss. */
489 unsigned long long ullend, addr, aligned_start;
490#if (NEFF == 32)
491 aligned_start = (unsigned long long)(signed long long)(signed long) start;
492#else
493#error "NEFF != 32"
494#endif
495 aligned_start &= L1_CACHE_ALIGN_MASK;
496 addr = aligned_start;
497#if (NEFF == 32)
498 ullend = (unsigned long long) (signed long long) (signed long) end;
499#else
500#error "NEFF != 32"
501#endif
502 while (addr <= ullend) {
503 asm __volatile__ ("ocbp %0, 0" : : "r" (addr));
504 addr += L1_CACHE_BYTES;
505 }
506 return;
507}
508 370
509/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for 371/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
510 anything else in the kernel */ 372 anything else in the kernel */
511#define MAGIC_PAGE0_START 0xffffffffec000000ULL 373#define MAGIC_PAGE0_START 0xffffffffec000000ULL
512 374
513static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr) 375/* Purge the physical page 'paddr' from the cache. It's known that any
376 * cache lines requiring attention have the same page colour as the the
377 * address 'eaddr'.
378 *
379 * This relies on the fact that the D-cache matches on physical tags when
380 * no virtual tag matches. So we create an alias for the original page
381 * and purge through that. (Alternatively, we could have done this by
382 * switching ASID to match the original mapping and purged through that,
383 * but that involves ASID switching cost + probably a TLBMISS + refill
384 * anyway.)
385 */
386static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr,
387 unsigned long eaddr)
514{ 388{
515 /* Purge the physical page 'paddr' from the cache. It's known that any
516 cache lines requiring attention have the same page colour as the the
517 address 'eaddr'.
518
519 This relies on the fact that the D-cache matches on physical tags
520 when no virtual tag matches. So we create an alias for the original
521 page and purge through that. (Alternatively, we could have done
522 this by switching ASID to match the original mapping and purged
523 through that, but that involves ASID switching cost + probably a
524 TLBMISS + refill anyway.)
525 */
526
527 unsigned long long magic_page_start; 389 unsigned long long magic_page_start;
528 unsigned long long magic_eaddr, magic_eaddr_end; 390 unsigned long long magic_eaddr, magic_eaddr_end;
529 391
@@ -531,47 +393,45 @@ static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned lo
531 393
532 /* As long as the kernel is not pre-emptible, this doesn't need to be 394 /* As long as the kernel is not pre-emptible, this doesn't need to be
533 under cli/sti. */ 395 under cli/sti. */
534
535 sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr); 396 sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
536 397
537 magic_eaddr = magic_page_start; 398 magic_eaddr = magic_page_start;
538 magic_eaddr_end = magic_eaddr + PAGE_SIZE; 399 magic_eaddr_end = magic_eaddr + PAGE_SIZE;
400
539 while (magic_eaddr < magic_eaddr_end) { 401 while (magic_eaddr < magic_eaddr_end) {
540 /* Little point in unrolling this loop - the OCBPs are blocking 402 /* Little point in unrolling this loop - the OCBPs are blocking
541 and won't go any quicker (i.e. the loop overhead is parallel 403 and won't go any quicker (i.e. the loop overhead is parallel
542 to part of the OCBP execution.) */ 404 to part of the OCBP execution.) */
543 asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr)); 405 __asm__ __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
544 magic_eaddr += L1_CACHE_BYTES; 406 magic_eaddr += L1_CACHE_BYTES;
545 } 407 }
546 408
547 sh64_teardown_dtlb_cache_slot(); 409 sh64_teardown_dtlb_cache_slot();
548} 410}
549 411
550/****************************************************************************/ 412/*
551 413 * Purge a page given its physical start address, by creating a temporary
414 * 1 page mapping and purging across that. Even if we know the virtual
415 * address (& vma or mm) of the page, the method here is more elegant
416 * because it avoids issues of coping with page faults on the purge
417 * instructions (i.e. no special-case code required in the critical path
418 * in the TLB miss handling).
419 */
552static void sh64_dcache_purge_phy_page(unsigned long paddr) 420static void sh64_dcache_purge_phy_page(unsigned long paddr)
553{ 421{
554 /* Pure a page given its physical start address, by creating a
555 temporary 1 page mapping and purging across that. Even if we know
556 the virtual address (& vma or mm) of the page, the method here is
557 more elegant because it avoids issues of coping with page faults on
558 the purge instructions (i.e. no special-case code required in the
559 critical path in the TLB miss handling). */
560
561 unsigned long long eaddr_start, eaddr, eaddr_end; 422 unsigned long long eaddr_start, eaddr, eaddr_end;
562 int i; 423 int i;
563 424
564 /* As long as the kernel is not pre-emptible, this doesn't need to be 425 /* As long as the kernel is not pre-emptible, this doesn't need to be
565 under cli/sti. */ 426 under cli/sti. */
566
567 eaddr_start = MAGIC_PAGE0_START; 427 eaddr_start = MAGIC_PAGE0_START;
568 for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) { 428 for (i = 0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
569 sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr); 429 sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
570 430
571 eaddr = eaddr_start; 431 eaddr = eaddr_start;
572 eaddr_end = eaddr + PAGE_SIZE; 432 eaddr_end = eaddr + PAGE_SIZE;
573 while (eaddr < eaddr_end) { 433 while (eaddr < eaddr_end) {
574 asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr)); 434 __asm__ __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
575 eaddr += L1_CACHE_BYTES; 435 eaddr += L1_CACHE_BYTES;
576 } 436 }
577 437
@@ -584,6 +444,7 @@ static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
584 unsigned long addr, unsigned long end) 444 unsigned long addr, unsigned long end)
585{ 445{
586 pgd_t *pgd; 446 pgd_t *pgd;
447 pud_t *pud;
587 pmd_t *pmd; 448 pmd_t *pmd;
588 pte_t *pte; 449 pte_t *pte;
589 pte_t entry; 450 pte_t entry;
@@ -597,7 +458,11 @@ static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
597 if (pgd_bad(*pgd)) 458 if (pgd_bad(*pgd))
598 return; 459 return;
599 460
600 pmd = pmd_offset(pgd, addr); 461 pud = pud_offset(pgd, addr);
462 if (pud_none(*pud) || pud_bad(*pud))
463 return;
464
465 pmd = pmd_offset(pud, addr);
601 if (pmd_none(*pmd) || pmd_bad(*pmd)) 466 if (pmd_none(*pmd) || pmd_bad(*pmd))
602 return; 467 return;
603 468
@@ -611,421 +476,357 @@ static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
611 } while (pte++, addr += PAGE_SIZE, addr != end); 476 } while (pte++, addr += PAGE_SIZE, addr != end);
612 pte_unmap_unlock(pte - 1, ptl); 477 pte_unmap_unlock(pte - 1, ptl);
613} 478}
614/****************************************************************************/
615 479
480/*
481 * There are at least 5 choices for the implementation of this, with
482 * pros (+), cons(-), comments(*):
483 *
484 * 1. ocbp each line in the range through the original user's ASID
485 * + no lines spuriously evicted
486 * - tlbmiss handling (must either handle faults on demand => extra
487 * special-case code in tlbmiss critical path), or map the page in
488 * advance (=> flush_tlb_range in advance to avoid multiple hits)
489 * - ASID switching
490 * - expensive for large ranges
491 *
492 * 2. temporarily map each page in the range to a special effective
493 * address and ocbp through the temporary mapping; relies on the
494 * fact that SH-5 OCB* always do TLB lookup and match on ptags (they
495 * never look at the etags)
496 * + no spurious evictions
497 * - expensive for large ranges
498 * * surely cheaper than (1)
499 *
500 * 3. walk all the lines in the cache, check the tags, if a match
501 * occurs create a page mapping to ocbp the line through
502 * + no spurious evictions
503 * - tag inspection overhead
504 * - (especially for small ranges)
505 * - potential cost of setting up/tearing down page mapping for
506 * every line that matches the range
507 * * cost partly independent of range size
508 *
509 * 4. walk all the lines in the cache, check the tags, if a match
510 * occurs use 4 * alloco to purge the line (+3 other probably
511 * innocent victims) by natural eviction
512 * + no tlb mapping overheads
513 * - spurious evictions
514 * - tag inspection overhead
515 *
516 * 5. implement like flush_cache_all
517 * + no tag inspection overhead
518 * - spurious evictions
519 * - bad for small ranges
520 *
521 * (1) can be ruled out as more expensive than (2). (2) appears best
522 * for small ranges. The choice between (3), (4) and (5) for large
523 * ranges and the range size for the large/small boundary need
524 * benchmarking to determine.
525 *
526 * For now use approach (2) for small ranges and (5) for large ones.
527 */
616static void sh64_dcache_purge_user_range(struct mm_struct *mm, 528static void sh64_dcache_purge_user_range(struct mm_struct *mm,
617 unsigned long start, unsigned long end) 529 unsigned long start, unsigned long end)
618{ 530{
619 /* There are at least 5 choices for the implementation of this, with 531 int n_pages = ((end - start) >> PAGE_SHIFT);
620 pros (+), cons(-), comments(*):
621
622 1. ocbp each line in the range through the original user's ASID
623 + no lines spuriously evicted
624 - tlbmiss handling (must either handle faults on demand => extra
625 special-case code in tlbmiss critical path), or map the page in
626 advance (=> flush_tlb_range in advance to avoid multiple hits)
627 - ASID switching
628 - expensive for large ranges
629
630 2. temporarily map each page in the range to a special effective
631 address and ocbp through the temporary mapping; relies on the
632 fact that SH-5 OCB* always do TLB lookup and match on ptags (they
633 never look at the etags)
634 + no spurious evictions
635 - expensive for large ranges
636 * surely cheaper than (1)
637
638 3. walk all the lines in the cache, check the tags, if a match
639 occurs create a page mapping to ocbp the line through
640 + no spurious evictions
641 - tag inspection overhead
642 - (especially for small ranges)
643 - potential cost of setting up/tearing down page mapping for
644 every line that matches the range
645 * cost partly independent of range size
646
647 4. walk all the lines in the cache, check the tags, if a match
648 occurs use 4 * alloco to purge the line (+3 other probably
649 innocent victims) by natural eviction
650 + no tlb mapping overheads
651 - spurious evictions
652 - tag inspection overhead
653
654 5. implement like flush_cache_all
655 + no tag inspection overhead
656 - spurious evictions
657 - bad for small ranges
658
659 (1) can be ruled out as more expensive than (2). (2) appears best
660 for small ranges. The choice between (3), (4) and (5) for large
661 ranges and the range size for the large/small boundary need
662 benchmarking to determine.
663
664 For now use approach (2) for small ranges and (5) for large ones.
665
666 */
667 532
668 int n_pages;
669
670 n_pages = ((end - start) >> PAGE_SHIFT);
671 if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) { 533 if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) {
672#if 1
673 sh64_dcache_purge_all(); 534 sh64_dcache_purge_all();
674#else
675 unsigned long long set, way;
676 unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
677 for (set = 0; set < cpu_data->dcache.sets; set++) {
678 unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift);
679 for (way = 0; way < cpu_data->dcache.ways; way++) {
680 unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift);
681 unsigned long long tag0;
682 unsigned long line_valid;
683
684 asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr));
685 line_valid = tag0 & SH_CACHE_VALID;
686 if (line_valid) {
687 unsigned long cache_asid;
688 unsigned long epn;
689
690 cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift;
691 /* The next line needs some
692 explanation. The virtual tags
693 encode bits [31:13] of the virtual
694 address, bit [12] of the 'tag' being
695 implied by the cache set index. */
696 epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift);
697
698 if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) {
699 /* TODO : could optimise this
700 call by batching multiple
701 adjacent sets together. */
702 sh64_dcache_purge_sets(set, 1);
703 break; /* Don't waste time inspecting other ways for this set */
704 }
705 }
706 }
707 }
708#endif
709 } else { 535 } else {
710 /* Small range, covered by a single page table page */ 536 /* Small range, covered by a single page table page */
711 start &= PAGE_MASK; /* should already be so */ 537 start &= PAGE_MASK; /* should already be so */
712 end = PAGE_ALIGN(end); /* should already be so */ 538 end = PAGE_ALIGN(end); /* should already be so */
713 sh64_dcache_purge_user_pages(mm, start, end); 539 sh64_dcache_purge_user_pages(mm, start, end);
714 } 540 }
715 return;
716} 541}
717 542
718static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end) 543/*
544 * Purge the range of addresses from the D-cache.
545 *
546 * The addresses lie in the superpage mapping. There's no harm if we
547 * overpurge at either end - just a small performance loss.
548 */
549void __flush_purge_region(void *start, int size)
719{ 550{
720 unsigned long long aligned_start; 551 unsigned long long ullend, addr, aligned_start;
721 unsigned long long ull_end;
722 unsigned long long addr;
723
724 ull_end = end;
725 552
726 /* Just wback over the range using the natural addresses. TLB miss 553 aligned_start = (unsigned long long)(signed long long)(signed long) start;
727 handling will be OK (TBC) : the range has just been written to by 554 addr = L1_CACHE_ALIGN(aligned_start);
728 the signal frame setup code, so the PTEs must exist. 555 ullend = (unsigned long long) (signed long long) (signed long) start + size;
729 556
730 Note, if we have CONFIG_PREEMPT and get preempted inside this loop, 557 while (addr <= ullend) {
731 it doesn't matter, even if the pid->ASID mapping changes whilst 558 __asm__ __volatile__ ("ocbp %0, 0" : : "r" (addr));
732 we're away. In that case the cache will have been flushed when the
733 mapping was renewed. So the writebacks below will be nugatory (and
734 we'll doubtless have to fault the TLB entry/ies in again with the
735 new ASID), but it's a rare case.
736 */
737 aligned_start = start & L1_CACHE_ALIGN_MASK;
738 addr = aligned_start;
739 while (addr < ull_end) {
740 asm __volatile__ ("ocbwb %0, 0" : : "r" (addr));
741 addr += L1_CACHE_BYTES; 559 addr += L1_CACHE_BYTES;
742 } 560 }
743} 561}
744 562
745/****************************************************************************/ 563void __flush_wback_region(void *start, int size)
746
747/* These *MUST* lie in an area of virtual address space that's otherwise unused. */
748#define UNIQUE_EADDR_START 0xe0000000UL
749#define UNIQUE_EADDR_END 0xe8000000UL
750
751static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr)
752{ 564{
753 /* Given a physical address paddr, and a user virtual address 565 unsigned long long ullend, addr, aligned_start;
754 user_eaddr which will eventually be mapped to it, create a one-off
755 kernel-private eaddr mapped to the same paddr. This is used for
756 creating special destination pages for copy_user_page and
757 clear_user_page */
758 566
759 static unsigned long current_pointer = UNIQUE_EADDR_START; 567 aligned_start = (unsigned long long)(signed long long)(signed long) start;
760 unsigned long coloured_pointer; 568 addr = L1_CACHE_ALIGN(aligned_start);
569 ullend = (unsigned long long) (signed long long) (signed long) start + size;
761 570
762 if (current_pointer == UNIQUE_EADDR_END) { 571 while (addr < ullend) {
763 sh64_dcache_purge_all(); 572 __asm__ __volatile__ ("ocbwb %0, 0" : : "r" (addr));
764 current_pointer = UNIQUE_EADDR_START; 573 addr += L1_CACHE_BYTES;
765 } 574 }
766
767 coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK);
768 sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
769
770 current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
771
772 return coloured_pointer;
773} 575}
774 576
775/****************************************************************************/ 577void __flush_invalidate_region(void *start, int size)
776
777static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address)
778{ 578{
779 void *coloured_to; 579 unsigned long long ullend, addr, aligned_start;
780
781 /* Discard any existing cache entries of the wrong colour. These are
782 present quite often, if the kernel has recently used the page
783 internally, then given it up, then it's been allocated to the user.
784 */
785 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
786
787 coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
788 sh64_page_copy(from, coloured_to);
789
790 sh64_teardown_dtlb_cache_slot();
791}
792
793static void sh64_clear_user_page_coloured(void *to, unsigned long address)
794{
795 void *coloured_to;
796
797 /* Discard any existing kernel-originated lines of the wrong colour (as
798 above) */
799 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
800 580
801 coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to)); 581 aligned_start = (unsigned long long)(signed long long)(signed long) start;
802 sh64_page_clear(coloured_to); 582 addr = L1_CACHE_ALIGN(aligned_start);
583 ullend = (unsigned long long) (signed long long) (signed long) start + size;
803 584
804 sh64_teardown_dtlb_cache_slot(); 585 while (addr < ullend) {
586 __asm__ __volatile__ ("ocbi %0, 0" : : "r" (addr));
587 addr += L1_CACHE_BYTES;
588 }
805} 589}
806
807#endif /* !CONFIG_DCACHE_DISABLED */ 590#endif /* !CONFIG_DCACHE_DISABLED */
808 591
809/****************************************************************************/ 592/*
810 593 * Invalidate the entire contents of both caches, after writing back to
811/*########################################################################## 594 * memory any dirty data from the D-cache.
812 EXTERNALLY CALLABLE API. 595 */
813 ##########################################################################*/
814
815/* These functions are described in Documentation/cachetlb.txt.
816 Each one of these functions varies in behaviour depending on whether the
817 I-cache and/or D-cache are configured out.
818
819 Note that the Linux term 'flush' corresponds to what is termed 'purge' in
820 the sh/sh64 jargon for the D-cache, i.e. write back dirty data then
821 invalidate the cache lines, and 'invalidate' for the I-cache.
822 */
823
824#undef FLUSH_TRACE
825
826void flush_cache_all(void) 596void flush_cache_all(void)
827{ 597{
828 /* Invalidate the entire contents of both caches, after writing back to
829 memory any dirty data from the D-cache. */
830 sh64_dcache_purge_all(); 598 sh64_dcache_purge_all();
831 sh64_icache_inv_all(); 599 sh64_icache_inv_all();
832} 600}
833 601
834/****************************************************************************/ 602/*
835 603 * Invalidate an entire user-address space from both caches, after
604 * writing back dirty data (e.g. for shared mmap etc).
605 *
606 * This could be coded selectively by inspecting all the tags then
607 * doing 4*alloco on any set containing a match (as for
608 * flush_cache_range), but fork/exit/execve (where this is called from)
609 * are expensive anyway.
610 *
611 * Have to do a purge here, despite the comments re I-cache below.
612 * There could be odd-coloured dirty data associated with the mm still
613 * in the cache - if this gets written out through natural eviction
614 * after the kernel has reused the page there will be chaos.
615 *
616 * The mm being torn down won't ever be active again, so any Icache
617 * lines tagged with its ASID won't be visible for the rest of the
618 * lifetime of this ASID cycle. Before the ASID gets reused, there
619 * will be a flush_cache_all. Hence we don't need to touch the
620 * I-cache. This is similar to the lack of action needed in
621 * flush_tlb_mm - see fault.c.
622 */
836void flush_cache_mm(struct mm_struct *mm) 623void flush_cache_mm(struct mm_struct *mm)
837{ 624{
838 /* Invalidate an entire user-address space from both caches, after
839 writing back dirty data (e.g. for shared mmap etc). */
840
841 /* This could be coded selectively by inspecting all the tags then
842 doing 4*alloco on any set containing a match (as for
843 flush_cache_range), but fork/exit/execve (where this is called from)
844 are expensive anyway. */
845
846 /* Have to do a purge here, despite the comments re I-cache below.
847 There could be odd-coloured dirty data associated with the mm still
848 in the cache - if this gets written out through natural eviction
849 after the kernel has reused the page there will be chaos.
850 */
851
852 sh64_dcache_purge_all(); 625 sh64_dcache_purge_all();
853
854 /* The mm being torn down won't ever be active again, so any Icache
855 lines tagged with its ASID won't be visible for the rest of the
856 lifetime of this ASID cycle. Before the ASID gets reused, there
857 will be a flush_cache_all. Hence we don't need to touch the
858 I-cache. This is similar to the lack of action needed in
859 flush_tlb_mm - see fault.c. */
860} 626}
861 627
862/****************************************************************************/ 628/*
863 629 * Invalidate (from both caches) the range [start,end) of virtual
630 * addresses from the user address space specified by mm, after writing
631 * back any dirty data.
632 *
633 * Note, 'end' is 1 byte beyond the end of the range to flush.
634 */
864void flush_cache_range(struct vm_area_struct *vma, unsigned long start, 635void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
865 unsigned long end) 636 unsigned long end)
866{ 637{
867 struct mm_struct *mm = vma->vm_mm; 638 struct mm_struct *mm = vma->vm_mm;
868 639
869 /* Invalidate (from both caches) the range [start,end) of virtual
870 addresses from the user address space specified by mm, after writing
871 back any dirty data.
872
873 Note, 'end' is 1 byte beyond the end of the range to flush. */
874
875 sh64_dcache_purge_user_range(mm, start, end); 640 sh64_dcache_purge_user_range(mm, start, end);
876 sh64_icache_inv_user_page_range(mm, start, end); 641 sh64_icache_inv_user_page_range(mm, start, end);
877} 642}
878 643
879/****************************************************************************/ 644/*
880 645 * Invalidate any entries in either cache for the vma within the user
881void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn) 646 * address space vma->vm_mm for the page starting at virtual address
647 * 'eaddr'. This seems to be used primarily in breaking COW. Note,
648 * the I-cache must be searched too in case the page in question is
649 * both writable and being executed from (e.g. stack trampolines.)
650 *
651 * Note, this is called with pte lock held.
652 */
653void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr,
654 unsigned long pfn)
882{ 655{
883 /* Invalidate any entries in either cache for the vma within the user
884 address space vma->vm_mm for the page starting at virtual address
885 'eaddr'. This seems to be used primarily in breaking COW. Note,
886 the I-cache must be searched too in case the page in question is
887 both writable and being executed from (e.g. stack trampolines.)
888
889 Note, this is called with pte lock held.
890 */
891
892 sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT); 656 sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
893 657
894 if (vma->vm_flags & VM_EXEC) { 658 if (vma->vm_flags & VM_EXEC)
895 sh64_icache_inv_user_page(vma, eaddr); 659 sh64_icache_inv_user_page(vma, eaddr);
896 }
897} 660}
898 661
899/****************************************************************************/ 662void flush_dcache_page(struct page *page)
663{
664 sh64_dcache_purge_phy_page(page_to_phys(page));
665 wmb();
666}
900 667
901#ifndef CONFIG_DCACHE_DISABLED 668/*
669 * Flush the range [start,end] of kernel virtual adddress space from
670 * the I-cache. The corresponding range must be purged from the
671 * D-cache also because the SH-5 doesn't have cache snooping between
672 * the caches. The addresses will be visible through the superpage
673 * mapping, therefore it's guaranteed that there no cache entries for
674 * the range in cache sets of the wrong colour.
675 */
676void flush_icache_range(unsigned long start, unsigned long end)
677{
678 __flush_purge_region((void *)start, end);
679 wmb();
680 sh64_icache_inv_kernel_range(start, end);
681}
902 682
903void copy_user_page(void *to, void *from, unsigned long address, struct page *page) 683/*
684 * Flush the range of user (defined by vma->vm_mm) address space starting
685 * at 'addr' for 'len' bytes from the cache. The range does not straddle
686 * a page boundary, the unique physical page containing the range is
687 * 'page'. This seems to be used mainly for invalidating an address
688 * range following a poke into the program text through the ptrace() call
689 * from another process (e.g. for BRK instruction insertion).
690 */
691void flush_icache_user_range(struct vm_area_struct *vma,
692 struct page *page, unsigned long addr, int len)
904{ 693{
905 /* 'from' and 'to' are kernel virtual addresses (within the superpage
906 mapping of the physical RAM). 'address' is the user virtual address
907 where the copy 'to' will be mapped after. This allows a custom
908 mapping to be used to ensure that the new copy is placed in the
909 right cache sets for the user to see it without having to bounce it
910 out via memory. Note however : the call to flush_page_to_ram in
911 (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
912 very important case!
913
914 TBD : can we guarantee that on every call, any cache entries for
915 'from' are in the same colour sets as 'address' also? i.e. is this
916 always used just to deal with COW? (I suspect not). */
917
918 /* There are two possibilities here for when the page 'from' was last accessed:
919 * by the kernel : this is OK, no purge required.
920 * by the/a user (e.g. for break_COW) : need to purge.
921
922 If the potential user mapping at 'address' is the same colour as
923 'from' there is no need to purge any cache lines from the 'from'
924 page mapped into cache sets of colour 'address'. (The copy will be
925 accessing the page through 'from').
926 */
927 694
928 if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) { 695 sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
929 sh64_dcache_purge_coloured_phy_page(__pa(from), address); 696 mb();
930 }
931 697
932 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { 698 if (vma->vm_flags & VM_EXEC)
933 /* No synonym problem on destination */ 699 sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
934 sh64_page_copy(from, to); 700}
935 } else {
936 sh64_copy_user_page_coloured(to, from, address);
937 }
938 701
939 /* Note, don't need to flush 'from' page from the cache again - it's 702/*
940 done anyway by the generic code */ 703 * For the address range [start,end), write back the data from the
704 * D-cache and invalidate the corresponding region of the I-cache for the
705 * current process. Used to flush signal trampolines on the stack to
706 * make them executable.
707 */
708void flush_cache_sigtramp(unsigned long vaddr)
709{
710 unsigned long end = vaddr + L1_CACHE_BYTES;
711
712 __flush_wback_region((void *)vaddr, L1_CACHE_BYTES);
713 wmb();
714 sh64_icache_inv_current_user_range(vaddr, end);
941} 715}
942 716
943void clear_user_page(void *to, unsigned long address, struct page *page) 717/*
718 * These *MUST* lie in an area of virtual address space that's otherwise
719 * unused.
720 */
721#define UNIQUE_EADDR_START 0xe0000000UL
722#define UNIQUE_EADDR_END 0xe8000000UL
723
724/*
725 * Given a physical address paddr, and a user virtual address user_eaddr
726 * which will eventually be mapped to it, create a one-off kernel-private
727 * eaddr mapped to the same paddr. This is used for creating special
728 * destination pages for copy_user_page and clear_user_page.
729 */
730static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr,
731 unsigned long paddr)
944{ 732{
945 /* 'to' is a kernel virtual address (within the superpage 733 static unsigned long current_pointer = UNIQUE_EADDR_START;
946 mapping of the physical RAM). 'address' is the user virtual address 734 unsigned long coloured_pointer;
947 where the 'to' page will be mapped after. This allows a custom
948 mapping to be used to ensure that the new copy is placed in the
949 right cache sets for the user to see it without having to bounce it
950 out via memory.
951 */
952 735
953 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { 736 if (current_pointer == UNIQUE_EADDR_END) {
954 /* No synonym problem on destination */ 737 sh64_dcache_purge_all();
955 sh64_page_clear(to); 738 current_pointer = UNIQUE_EADDR_START;
956 } else {
957 sh64_clear_user_page_coloured(to, address);
958 } 739 }
959}
960 740
961#endif /* !CONFIG_DCACHE_DISABLED */ 741 coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) |
742 (user_eaddr & CACHE_OC_SYN_MASK);
743 sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
962 744
963/****************************************************************************/ 745 current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
964 746
965void flush_dcache_page(struct page *page) 747 return coloured_pointer;
966{
967 sh64_dcache_purge_phy_page(page_to_phys(page));
968 wmb();
969} 748}
970 749
971/****************************************************************************/ 750static void sh64_copy_user_page_coloured(void *to, void *from,
972 751 unsigned long address)
973void flush_icache_range(unsigned long start, unsigned long end)
974{ 752{
975 /* Flush the range [start,end] of kernel virtual adddress space from 753 void *coloured_to;
976 the I-cache. The corresponding range must be purged from the
977 D-cache also because the SH-5 doesn't have cache snooping between
978 the caches. The addresses will be visible through the superpage
979 mapping, therefore it's guaranteed that there no cache entries for
980 the range in cache sets of the wrong colour.
981 754
982 Primarily used for cohering the I-cache after a module has 755 /*
983 been loaded. */ 756 * Discard any existing cache entries of the wrong colour. These are
757 * present quite often, if the kernel has recently used the page
758 * internally, then given it up, then it's been allocated to the user.
759 */
760 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long)to);
984 761
985 /* We also make sure to purge the same range from the D-cache since 762 coloured_to = (void *)sh64_make_unique_eaddr(address, __pa(to));
986 flush_page_to_ram() won't be doing this for us! */ 763 copy_page(from, coloured_to);
987 764
988 sh64_dcache_purge_kernel_range(start, end); 765 sh64_teardown_dtlb_cache_slot();
989 wmb();
990 sh64_icache_inv_kernel_range(start, end);
991} 766}
992 767
993/****************************************************************************/ 768static void sh64_clear_user_page_coloured(void *to, unsigned long address)
994
995void flush_icache_user_range(struct vm_area_struct *vma,
996 struct page *page, unsigned long addr, int len)
997{ 769{
998 /* Flush the range of user (defined by vma->vm_mm) address space 770 void *coloured_to;
999 starting at 'addr' for 'len' bytes from the cache. The range does
1000 not straddle a page boundary, the unique physical page containing
1001 the range is 'page'. This seems to be used mainly for invalidating
1002 an address range following a poke into the program text through the
1003 ptrace() call from another process (e.g. for BRK instruction
1004 insertion). */
1005 771
1006 sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr); 772 /*
1007 mb(); 773 * Discard any existing kernel-originated lines of the wrong
774 * colour (as above)
775 */
776 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long)to);
1008 777
1009 if (vma->vm_flags & VM_EXEC) { 778 coloured_to = (void *)sh64_make_unique_eaddr(address, __pa(to));
1010 sh64_icache_inv_user_small_range(vma->vm_mm, addr, len); 779 clear_page(coloured_to);
1011 }
1012}
1013 780
1014/*########################################################################## 781 sh64_teardown_dtlb_cache_slot();
1015 ARCH/SH64 PRIVATE CALLABLE API. 782}
1016 ##########################################################################*/
1017 783
1018void flush_cache_sigtramp(unsigned long vaddr) 784/*
785 * 'from' and 'to' are kernel virtual addresses (within the superpage
786 * mapping of the physical RAM). 'address' is the user virtual address
787 * where the copy 'to' will be mapped after. This allows a custom
788 * mapping to be used to ensure that the new copy is placed in the
789 * right cache sets for the user to see it without having to bounce it
790 * out via memory. Note however : the call to flush_page_to_ram in
791 * (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
792 * very important case!
793 *
794 * TBD : can we guarantee that on every call, any cache entries for
795 * 'from' are in the same colour sets as 'address' also? i.e. is this
796 * always used just to deal with COW? (I suspect not).
797 *
798 * There are two possibilities here for when the page 'from' was last accessed:
799 * - by the kernel : this is OK, no purge required.
800 * - by the/a user (e.g. for break_COW) : need to purge.
801 *
802 * If the potential user mapping at 'address' is the same colour as
803 * 'from' there is no need to purge any cache lines from the 'from'
804 * page mapped into cache sets of colour 'address'. (The copy will be
805 * accessing the page through 'from').
806 */
807void copy_user_page(void *to, void *from, unsigned long address,
808 struct page *page)
1019{ 809{
1020 unsigned long end = vaddr + L1_CACHE_BYTES; 810 if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0)
1021 811 sh64_dcache_purge_coloured_phy_page(__pa(from), address);
1022 /* For the address range [start,end), write back the data from the
1023 D-cache and invalidate the corresponding region of the I-cache for
1024 the current process. Used to flush signal trampolines on the stack
1025 to make them executable. */
1026 812
1027 sh64_dcache_wback_current_user_range(vaddr, end); 813 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0)
1028 wmb(); 814 copy_page(to, from);
1029 sh64_icache_inv_current_user_range(vaddr, end); 815 else
816 sh64_copy_user_page_coloured(to, from, address);
1030} 817}
1031 818
819/*
820 * 'to' is a kernel virtual address (within the superpage mapping of the
821 * physical RAM). 'address' is the user virtual address where the 'to'
822 * page will be mapped after. This allows a custom mapping to be used to
823 * ensure that the new copy is placed in the right cache sets for the
824 * user to see it without having to bounce it out via memory.
825 */
826void clear_user_page(void *to, unsigned long address, struct page *page)
827{
828 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0)
829 clear_page(to);
830 else
831 sh64_clear_user_page_coloured(to, address);
832}
diff --git a/include/asm-sh/cpu-sh5/cacheflush.h b/include/asm-sh/cpu-sh5/cacheflush.h
index f935acbacf3..5a11f0b7e66 100644
--- a/include/asm-sh/cpu-sh5/cacheflush.h
+++ b/include/asm-sh/cpu-sh5/cacheflush.h
@@ -3,8 +3,6 @@
3 3
4#ifndef __ASSEMBLY__ 4#ifndef __ASSEMBLY__
5 5
6#include <asm/page.h>
7
8struct vm_area_struct; 6struct vm_area_struct;
9struct page; 7struct page;
10struct mm_struct; 8struct mm_struct;
@@ -27,7 +25,7 @@ extern void flush_icache_user_range(struct vm_area_struct *vma,
27#define flush_dcache_mmap_unlock(mapping) do { } while (0) 25#define flush_dcache_mmap_unlock(mapping) do { } while (0)
28 26
29#define flush_icache_page(vma, page) do { } while (0) 27#define flush_icache_page(vma, page) do { } while (0)
30#define p3_cache_init() do { } while (0) 28void p3_cache_init(void);
31 29
32#endif /* __ASSEMBLY__ */ 30#endif /* __ASSEMBLY__ */
33 31
diff --git a/include/asm-sh/mmu_context_64.h b/include/asm-sh/mmu_context_64.h
index 020be744b08..9649f1c07ca 100644
--- a/include/asm-sh/mmu_context_64.h
+++ b/include/asm-sh/mmu_context_64.h
@@ -66,6 +66,9 @@ static inline void set_asid(unsigned long asid)
66 : "=r" (sr), "=r" (pc) : "0" (sr)); 66 : "=r" (sr), "=r" (pc) : "0" (sr));
67} 67}
68 68
69/* arch/sh/kernel/cpu/sh5/entry.S */
70extern unsigned long switch_and_save_asid(unsigned long new_asid);
71
69/* No spare register to twiddle, so use a software cache */ 72/* No spare register to twiddle, so use a software cache */
70extern pgd_t *mmu_pdtp_cache; 73extern pgd_t *mmu_pdtp_cache;
71 74
diff --git a/include/asm-sh/page.h b/include/asm-sh/page.h
index 134562dc8c4..304c30b5d94 100644
--- a/include/asm-sh/page.h
+++ b/include/asm-sh/page.h
@@ -55,11 +55,14 @@ extern void clear_page(void *to);
55extern void copy_page(void *to, void *from); 55extern void copy_page(void *to, void *from);
56 56
57#if !defined(CONFIG_CACHE_OFF) && defined(CONFIG_MMU) && \ 57#if !defined(CONFIG_CACHE_OFF) && defined(CONFIG_MMU) && \
58 (defined(CONFIG_CPU_SH4) || defined(CONFIG_SH7705_CACHE_32KB)) 58 (defined(CONFIG_CPU_SH5) || defined(CONFIG_CPU_SH4) || \
59 defined(CONFIG_SH7705_CACHE_32KB))
59struct page; 60struct page;
60struct vm_area_struct; 61struct vm_area_struct;
61extern void clear_user_page(void *to, unsigned long address, struct page *page); 62extern void clear_user_page(void *to, unsigned long address, struct page *page);
62#ifdef CONFIG_CPU_SH4 63extern void copy_user_page(void *to, void *from, unsigned long address,
64 struct page *page);
65#if defined(CONFIG_CPU_SH4)
63extern void copy_user_highpage(struct page *to, struct page *from, 66extern void copy_user_highpage(struct page *to, struct page *from,
64 unsigned long vaddr, struct vm_area_struct *vma); 67 unsigned long vaddr, struct vm_area_struct *vma);
65#define __HAVE_ARCH_COPY_USER_HIGHPAGE 68#define __HAVE_ARCH_COPY_USER_HIGHPAGE