diff options
author | Paul Mundt <lethal@linux-sh.org> | 2007-11-21 03:07:06 -0500 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2008-01-27 23:18:52 -0500 |
commit | 60b2249d45d44bd3494d55f5ea4bccd25c7f8281 (patch) | |
tree | a79cd2e691701cf78cba95095c4341d055ae656c /arch/sh | |
parent | 8214d52ace79163ded60a8605c1d6c44b8b2bd30 (diff) |
sh: Move over SH-5 TLB and cache support code.
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/sh')
-rw-r--r-- | arch/sh/mm/Makefile_64 | 7 | ||||
-rw-r--r-- | arch/sh/mm/cache-sh5.c | 1032 | ||||
-rw-r--r-- | arch/sh/mm/tlb-sh5.c | 166 |
3 files changed, 1203 insertions, 2 deletions
diff --git a/arch/sh/mm/Makefile_64 b/arch/sh/mm/Makefile_64 index 82fe9072fd5c..5ff83583593e 100644 --- a/arch/sh/mm/Makefile_64 +++ b/arch/sh/mm/Makefile_64 | |||
@@ -5,12 +5,15 @@ | |||
5 | obj-y := init.o extable_64.o consistent.o | 5 | obj-y := init.o extable_64.o consistent.o |
6 | 6 | ||
7 | mmu-y := tlb-nommu.o pg-nommu.o | 7 | mmu-y := tlb-nommu.o pg-nommu.o |
8 | mmu-$(CONFIG_MMU) := fault_64.o ioremap_64.o tlb-flush_64.o | 8 | mmu-$(CONFIG_MMU) := fault_64.o ioremap_64.o tlb-flush_64.o tlb-sh5.o |
9 | |||
10 | ifndef CONFIG_CACHE_OFF | ||
11 | obj-y += cache-sh5.o | ||
12 | endif | ||
9 | 13 | ||
10 | obj-y += $(mmu-y) | 14 | obj-y += $(mmu-y) |
11 | 15 | ||
12 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o | 16 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o |
13 | obj-$(CONFIG_PMB) += pmb.o | ||
14 | obj-$(CONFIG_NUMA) += numa.o | 17 | obj-$(CONFIG_NUMA) += numa.o |
15 | 18 | ||
16 | EXTRA_CFLAGS += -Werror | 19 | EXTRA_CFLAGS += -Werror |
diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c new file mode 100644 index 000000000000..421487cfff4c --- /dev/null +++ b/arch/sh/mm/cache-sh5.c | |||
@@ -0,0 +1,1032 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * arch/sh64/mm/cache.c | ||
7 | * | ||
8 | * Original version Copyright (C) 2000, 2001 Paolo Alberelli | ||
9 | * Second version Copyright (C) benedict.gaster@superh.com 2002 | ||
10 | * Third version Copyright Richard.Curnow@superh.com 2003 | ||
11 | * Hacks to third version Copyright (C) 2003 Paul Mundt | ||
12 | */ | ||
13 | |||
14 | /****************************************************************************/ | ||
15 | |||
16 | #include <linux/init.h> | ||
17 | #include <linux/mman.h> | ||
18 | #include <linux/mm.h> | ||
19 | #include <linux/threads.h> | ||
20 | #include <asm/page.h> | ||
21 | #include <asm/pgtable.h> | ||
22 | #include <asm/processor.h> | ||
23 | #include <asm/cache.h> | ||
24 | #include <asm/tlb.h> | ||
25 | #include <asm/io.h> | ||
26 | #include <asm/uaccess.h> | ||
27 | #include <asm/mmu_context.h> | ||
28 | #include <asm/pgalloc.h> /* for flush_itlb_range */ | ||
29 | |||
30 | #include <linux/proc_fs.h> | ||
31 | |||
32 | /* This function is in entry.S */ | ||
33 | extern unsigned long switch_and_save_asid(unsigned long new_asid); | ||
34 | |||
35 | /* Wired TLB entry for the D-cache */ | ||
36 | static unsigned long long dtlb_cache_slot; | ||
37 | |||
38 | /** | ||
39 | * sh64_cache_init() | ||
40 | * | ||
41 | * This is pretty much just a straightforward clone of the SH | ||
42 | * detect_cpu_and_cache_system(). | ||
43 | * | ||
44 | * This function is responsible for setting up all of the cache | ||
45 | * info dynamically as well as taking care of CPU probing and | ||
46 | * setting up the relevant subtype data. | ||
47 | * | ||
48 | * FIXME: For the time being, we only really support the SH5-101 | ||
49 | * out of the box, and don't support dynamic probing for things | ||
50 | * like the SH5-103 or even cut2 of the SH5-101. Implement this | ||
51 | * later! | ||
52 | */ | ||
53 | int __init sh64_cache_init(void) | ||
54 | { | ||
55 | /* | ||
56 | * First, setup some sane values for the I-cache. | ||
57 | */ | ||
58 | cpu_data->icache.ways = 4; | ||
59 | cpu_data->icache.sets = 256; | ||
60 | cpu_data->icache.linesz = L1_CACHE_BYTES; | ||
61 | |||
62 | /* | ||
63 | * FIXME: This can probably be cleaned up a bit as well.. for example, | ||
64 | * do we really need the way shift _and_ the way_step_shift ?? Judging | ||
65 | * by the existing code, I would guess no.. is there any valid reason | ||
66 | * why we need to be tracking this around? | ||
67 | */ | ||
68 | cpu_data->icache.way_shift = 13; | ||
69 | cpu_data->icache.entry_shift = 5; | ||
70 | cpu_data->icache.set_shift = 4; | ||
71 | cpu_data->icache.way_step_shift = 16; | ||
72 | cpu_data->icache.asid_shift = 2; | ||
73 | |||
74 | /* | ||
75 | * way offset = cache size / associativity, so just don't factor in | ||
76 | * associativity in the first place.. | ||
77 | */ | ||
78 | cpu_data->icache.way_ofs = cpu_data->icache.sets * | ||
79 | cpu_data->icache.linesz; | ||
80 | |||
81 | cpu_data->icache.asid_mask = 0x3fc; | ||
82 | cpu_data->icache.idx_mask = 0x1fe0; | ||
83 | cpu_data->icache.epn_mask = 0xffffe000; | ||
84 | cpu_data->icache.flags = 0; | ||
85 | |||
86 | /* | ||
87 | * Next, setup some sane values for the D-cache. | ||
88 | * | ||
89 | * On the SH5, these are pretty consistent with the I-cache settings, | ||
90 | * so we just copy over the existing definitions.. these can be fixed | ||
91 | * up later, especially if we add runtime CPU probing. | ||
92 | * | ||
93 | * Though in the meantime it saves us from having to duplicate all of | ||
94 | * the above definitions.. | ||
95 | */ | ||
96 | cpu_data->dcache = cpu_data->icache; | ||
97 | |||
98 | /* | ||
99 | * Setup any cache-related flags here | ||
100 | */ | ||
101 | #if defined(CONFIG_DCACHE_WRITE_THROUGH) | ||
102 | set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)); | ||
103 | #elif defined(CONFIG_DCACHE_WRITE_BACK) | ||
104 | set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags)); | ||
105 | #endif | ||
106 | |||
107 | /* | ||
108 | * We also need to reserve a slot for the D-cache in the DTLB, so we | ||
109 | * do this now .. | ||
110 | */ | ||
111 | dtlb_cache_slot = sh64_get_wired_dtlb_entry(); | ||
112 | |||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | #ifdef CONFIG_DCACHE_DISABLED | ||
117 | #define sh64_dcache_purge_all() do { } while (0) | ||
118 | #define sh64_dcache_purge_coloured_phy_page(paddr, eaddr) do { } while (0) | ||
119 | #define sh64_dcache_purge_user_range(mm, start, end) do { } while (0) | ||
120 | #define sh64_dcache_purge_phy_page(paddr) do { } while (0) | ||
121 | #define sh64_dcache_purge_virt_page(mm, eaddr) do { } while (0) | ||
122 | #define sh64_dcache_purge_kernel_range(start, end) do { } while (0) | ||
123 | #define sh64_dcache_wback_current_user_range(start, end) do { } while (0) | ||
124 | #endif | ||
125 | |||
126 | /*##########################################################################*/ | ||
127 | |||
128 | /* From here onwards, a rewrite of the implementation, | ||
129 | by Richard.Curnow@superh.com. | ||
130 | |||
131 | The major changes in this compared to the old version are; | ||
132 | 1. use more selective purging through OCBP instead of using ALLOCO to purge | ||
133 | by natural replacement. This avoids purging out unrelated cache lines | ||
134 | that happen to be in the same set. | ||
135 | 2. exploit the APIs copy_user_page and clear_user_page better | ||
136 | 3. be more selective about I-cache purging, in particular use invalidate_all | ||
137 | more sparingly. | ||
138 | |||
139 | */ | ||
140 | |||
141 | /*########################################################################## | ||
142 | SUPPORT FUNCTIONS | ||
143 | ##########################################################################*/ | ||
144 | |||
145 | /****************************************************************************/ | ||
146 | /* The following group of functions deal with mapping and unmapping a temporary | ||
147 | page into the DTLB slot that have been set aside for our exclusive use. */ | ||
148 | /* In order to accomplish this, we use the generic interface for adding and | ||
149 | removing a wired slot entry as defined in arch/sh64/mm/tlb.c */ | ||
150 | /****************************************************************************/ | ||
151 | |||
152 | static unsigned long slot_own_flags; | ||
153 | |||
154 | static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr) | ||
155 | { | ||
156 | local_irq_save(slot_own_flags); | ||
157 | sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr); | ||
158 | } | ||
159 | |||
160 | static inline void sh64_teardown_dtlb_cache_slot(void) | ||
161 | { | ||
162 | sh64_teardown_tlb_slot(dtlb_cache_slot); | ||
163 | local_irq_restore(slot_own_flags); | ||
164 | } | ||
165 | |||
166 | /****************************************************************************/ | ||
167 | |||
168 | #ifndef CONFIG_ICACHE_DISABLED | ||
169 | |||
170 | static void __inline__ sh64_icache_inv_all(void) | ||
171 | { | ||
172 | unsigned long long addr, flag, data; | ||
173 | unsigned int flags; | ||
174 | |||
175 | addr=ICCR0; | ||
176 | flag=ICCR0_ICI; | ||
177 | data=0; | ||
178 | |||
179 | /* Make this a critical section for safety (probably not strictly necessary.) */ | ||
180 | local_irq_save(flags); | ||
181 | |||
182 | /* Without %1 it gets unexplicably wrong */ | ||
183 | asm volatile("getcfg %3, 0, %0\n\t" | ||
184 | "or %0, %2, %0\n\t" | ||
185 | "putcfg %3, 0, %0\n\t" | ||
186 | "synci" | ||
187 | : "=&r" (data) | ||
188 | : "0" (data), "r" (flag), "r" (addr)); | ||
189 | |||
190 | local_irq_restore(flags); | ||
191 | } | ||
192 | |||
193 | static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end) | ||
194 | { | ||
195 | /* Invalidate range of addresses [start,end] from the I-cache, where | ||
196 | * the addresses lie in the kernel superpage. */ | ||
197 | |||
198 | unsigned long long ullend, addr, aligned_start; | ||
199 | #if (NEFF == 32) | ||
200 | aligned_start = (unsigned long long)(signed long long)(signed long) start; | ||
201 | #else | ||
202 | #error "NEFF != 32" | ||
203 | #endif | ||
204 | aligned_start &= L1_CACHE_ALIGN_MASK; | ||
205 | addr = aligned_start; | ||
206 | #if (NEFF == 32) | ||
207 | ullend = (unsigned long long) (signed long long) (signed long) end; | ||
208 | #else | ||
209 | #error "NEFF != 32" | ||
210 | #endif | ||
211 | while (addr <= ullend) { | ||
212 | asm __volatile__ ("icbi %0, 0" : : "r" (addr)); | ||
213 | addr += L1_CACHE_BYTES; | ||
214 | } | ||
215 | } | ||
216 | |||
217 | static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr) | ||
218 | { | ||
219 | /* If we get called, we know that vma->vm_flags contains VM_EXEC. | ||
220 | Also, eaddr is page-aligned. */ | ||
221 | |||
222 | unsigned long long addr, end_addr; | ||
223 | unsigned long flags = 0; | ||
224 | unsigned long running_asid, vma_asid; | ||
225 | addr = eaddr; | ||
226 | end_addr = addr + PAGE_SIZE; | ||
227 | |||
228 | /* Check whether we can use the current ASID for the I-cache | ||
229 | invalidation. For example, if we're called via | ||
230 | access_process_vm->flush_cache_page->here, (e.g. when reading from | ||
231 | /proc), 'running_asid' will be that of the reader, not of the | ||
232 | victim. | ||
233 | |||
234 | Also, note the risk that we might get pre-empted between the ASID | ||
235 | compare and blocking IRQs, and before we regain control, the | ||
236 | pid->ASID mapping changes. However, the whole cache will get | ||
237 | invalidated when the mapping is renewed, so the worst that can | ||
238 | happen is that the loop below ends up invalidating somebody else's | ||
239 | cache entries. | ||
240 | */ | ||
241 | |||
242 | running_asid = get_asid(); | ||
243 | vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK); | ||
244 | if (running_asid != vma_asid) { | ||
245 | local_irq_save(flags); | ||
246 | switch_and_save_asid(vma_asid); | ||
247 | } | ||
248 | while (addr < end_addr) { | ||
249 | /* Worth unrolling a little */ | ||
250 | asm __volatile__("icbi %0, 0" : : "r" (addr)); | ||
251 | asm __volatile__("icbi %0, 32" : : "r" (addr)); | ||
252 | asm __volatile__("icbi %0, 64" : : "r" (addr)); | ||
253 | asm __volatile__("icbi %0, 96" : : "r" (addr)); | ||
254 | addr += 128; | ||
255 | } | ||
256 | if (running_asid != vma_asid) { | ||
257 | switch_and_save_asid(running_asid); | ||
258 | local_irq_restore(flags); | ||
259 | } | ||
260 | } | ||
261 | |||
262 | /****************************************************************************/ | ||
263 | |||
264 | static void sh64_icache_inv_user_page_range(struct mm_struct *mm, | ||
265 | unsigned long start, unsigned long end) | ||
266 | { | ||
267 | /* Used for invalidating big chunks of I-cache, i.e. assume the range | ||
268 | is whole pages. If 'start' or 'end' is not page aligned, the code | ||
269 | is conservative and invalidates to the ends of the enclosing pages. | ||
270 | This is functionally OK, just a performance loss. */ | ||
271 | |||
272 | /* See the comments below in sh64_dcache_purge_user_range() regarding | ||
273 | the choice of algorithm. However, for the I-cache option (2) isn't | ||
274 | available because there are no physical tags so aliases can't be | ||
275 | resolved. The icbi instruction has to be used through the user | ||
276 | mapping. Because icbi is cheaper than ocbp on a cache hit, it | ||
277 | would be cheaper to use the selective code for a large range than is | ||
278 | possible with the D-cache. Just assume 64 for now as a working | ||
279 | figure. | ||
280 | */ | ||
281 | |||
282 | int n_pages; | ||
283 | |||
284 | if (!mm) return; | ||
285 | |||
286 | n_pages = ((end - start) >> PAGE_SHIFT); | ||
287 | if (n_pages >= 64) { | ||
288 | sh64_icache_inv_all(); | ||
289 | } else { | ||
290 | unsigned long aligned_start; | ||
291 | unsigned long eaddr; | ||
292 | unsigned long after_last_page_start; | ||
293 | unsigned long mm_asid, current_asid; | ||
294 | unsigned long long flags = 0ULL; | ||
295 | |||
296 | mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; | ||
297 | current_asid = get_asid(); | ||
298 | |||
299 | if (mm_asid != current_asid) { | ||
300 | /* Switch ASID and run the invalidate loop under cli */ | ||
301 | local_irq_save(flags); | ||
302 | switch_and_save_asid(mm_asid); | ||
303 | } | ||
304 | |||
305 | aligned_start = start & PAGE_MASK; | ||
306 | after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK); | ||
307 | |||
308 | while (aligned_start < after_last_page_start) { | ||
309 | struct vm_area_struct *vma; | ||
310 | unsigned long vma_end; | ||
311 | vma = find_vma(mm, aligned_start); | ||
312 | if (!vma || (aligned_start <= vma->vm_end)) { | ||
313 | /* Avoid getting stuck in an error condition */ | ||
314 | aligned_start += PAGE_SIZE; | ||
315 | continue; | ||
316 | } | ||
317 | vma_end = vma->vm_end; | ||
318 | if (vma->vm_flags & VM_EXEC) { | ||
319 | /* Executable */ | ||
320 | eaddr = aligned_start; | ||
321 | while (eaddr < vma_end) { | ||
322 | sh64_icache_inv_user_page(vma, eaddr); | ||
323 | eaddr += PAGE_SIZE; | ||
324 | } | ||
325 | } | ||
326 | aligned_start = vma->vm_end; /* Skip to start of next region */ | ||
327 | } | ||
328 | if (mm_asid != current_asid) { | ||
329 | switch_and_save_asid(current_asid); | ||
330 | local_irq_restore(flags); | ||
331 | } | ||
332 | } | ||
333 | } | ||
334 | |||
335 | static void sh64_icache_inv_user_small_range(struct mm_struct *mm, | ||
336 | unsigned long start, int len) | ||
337 | { | ||
338 | |||
339 | /* Invalidate a small range of user context I-cache, not necessarily | ||
340 | page (or even cache-line) aligned. */ | ||
341 | |||
342 | unsigned long long eaddr = start; | ||
343 | unsigned long long eaddr_end = start + len; | ||
344 | unsigned long current_asid, mm_asid; | ||
345 | unsigned long long flags; | ||
346 | unsigned long long epage_start; | ||
347 | |||
348 | /* Since this is used inside ptrace, the ASID in the mm context | ||
349 | typically won't match current_asid. We'll have to switch ASID to do | ||
350 | this. For safety, and given that the range will be small, do all | ||
351 | this under cli. | ||
352 | |||
353 | Note, there is a hazard that the ASID in mm->context is no longer | ||
354 | actually associated with mm, i.e. if the mm->context has started a | ||
355 | new cycle since mm was last active. However, this is just a | ||
356 | performance issue: all that happens is that we invalidate lines | ||
357 | belonging to another mm, so the owning process has to refill them | ||
358 | when that mm goes live again. mm itself can't have any cache | ||
359 | entries because there will have been a flush_cache_all when the new | ||
360 | mm->context cycle started. */ | ||
361 | |||
362 | /* Align to start of cache line. Otherwise, suppose len==8 and start | ||
363 | was at 32N+28 : the last 4 bytes wouldn't get invalidated. */ | ||
364 | eaddr = start & L1_CACHE_ALIGN_MASK; | ||
365 | eaddr_end = start + len; | ||
366 | |||
367 | local_irq_save(flags); | ||
368 | mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; | ||
369 | current_asid = switch_and_save_asid(mm_asid); | ||
370 | |||
371 | epage_start = eaddr & PAGE_MASK; | ||
372 | |||
373 | while (eaddr < eaddr_end) | ||
374 | { | ||
375 | asm __volatile__("icbi %0, 0" : : "r" (eaddr)); | ||
376 | eaddr += L1_CACHE_BYTES; | ||
377 | } | ||
378 | switch_and_save_asid(current_asid); | ||
379 | local_irq_restore(flags); | ||
380 | } | ||
381 | |||
382 | static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end) | ||
383 | { | ||
384 | /* The icbi instruction never raises ITLBMISS. i.e. if there's not a | ||
385 | cache hit on the virtual tag the instruction ends there, without a | ||
386 | TLB lookup. */ | ||
387 | |||
388 | unsigned long long aligned_start; | ||
389 | unsigned long long ull_end; | ||
390 | unsigned long long addr; | ||
391 | |||
392 | ull_end = end; | ||
393 | |||
394 | /* Just invalidate over the range using the natural addresses. TLB | ||
395 | miss handling will be OK (TBC). Since it's for the current process, | ||
396 | either we're already in the right ASID context, or the ASIDs have | ||
397 | been recycled since we were last active in which case we might just | ||
398 | invalidate another processes I-cache entries : no worries, just a | ||
399 | performance drop for him. */ | ||
400 | aligned_start = start & L1_CACHE_ALIGN_MASK; | ||
401 | addr = aligned_start; | ||
402 | while (addr < ull_end) { | ||
403 | asm __volatile__ ("icbi %0, 0" : : "r" (addr)); | ||
404 | asm __volatile__ ("nop"); | ||
405 | asm __volatile__ ("nop"); | ||
406 | addr += L1_CACHE_BYTES; | ||
407 | } | ||
408 | } | ||
409 | |||
410 | #endif /* !CONFIG_ICACHE_DISABLED */ | ||
411 | |||
412 | /****************************************************************************/ | ||
413 | |||
414 | #ifndef CONFIG_DCACHE_DISABLED | ||
415 | |||
416 | /* Buffer used as the target of alloco instructions to purge data from cache | ||
417 | sets by natural eviction. -- RPC */ | ||
418 | #define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4) | ||
419 | static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, }; | ||
420 | |||
421 | /****************************************************************************/ | ||
422 | |||
423 | static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets) | ||
424 | { | ||
425 | /* Purge all ways in a particular block of sets, specified by the base | ||
426 | set number and number of sets. Can handle wrap-around, if that's | ||
427 | needed. */ | ||
428 | |||
429 | int dummy_buffer_base_set; | ||
430 | unsigned long long eaddr, eaddr0, eaddr1; | ||
431 | int j; | ||
432 | int set_offset; | ||
433 | |||
434 | dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift; | ||
435 | set_offset = sets_to_purge_base - dummy_buffer_base_set; | ||
436 | |||
437 | for (j=0; j<n_sets; j++, set_offset++) { | ||
438 | set_offset &= (cpu_data->dcache.sets - 1); | ||
439 | eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift); | ||
440 | |||
441 | /* Do one alloco which hits the required set per cache way. For | ||
442 | write-back mode, this will purge the #ways resident lines. There's | ||
443 | little point unrolling this loop because the allocos stall more if | ||
444 | they're too close together. */ | ||
445 | eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; | ||
446 | for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { | ||
447 | asm __volatile__ ("alloco %0, 0" : : "r" (eaddr)); | ||
448 | asm __volatile__ ("synco"); /* TAKum03020 */ | ||
449 | } | ||
450 | |||
451 | eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; | ||
452 | for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { | ||
453 | /* Load from each address. Required because alloco is a NOP if | ||
454 | the cache is write-through. Write-through is a config option. */ | ||
455 | if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags))) | ||
456 | *(volatile unsigned char *)(int)eaddr; | ||
457 | } | ||
458 | } | ||
459 | |||
460 | /* Don't use OCBI to invalidate the lines. That costs cycles directly. | ||
461 | If the dummy block is just left resident, it will naturally get | ||
462 | evicted as required. */ | ||
463 | |||
464 | return; | ||
465 | } | ||
466 | |||
467 | /****************************************************************************/ | ||
468 | |||
469 | static void sh64_dcache_purge_all(void) | ||
470 | { | ||
471 | /* Purge the entire contents of the dcache. The most efficient way to | ||
472 | achieve this is to use alloco instructions on a region of unused | ||
473 | memory equal in size to the cache, thereby causing the current | ||
474 | contents to be discarded by natural eviction. The alternative, | ||
475 | namely reading every tag, setting up a mapping for the corresponding | ||
476 | page and doing an OCBP for the line, would be much more expensive. | ||
477 | */ | ||
478 | |||
479 | sh64_dcache_purge_sets(0, cpu_data->dcache.sets); | ||
480 | |||
481 | return; | ||
482 | |||
483 | } | ||
484 | |||
485 | /****************************************************************************/ | ||
486 | |||
487 | static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end) | ||
488 | { | ||
489 | /* Purge the range of addresses [start,end] from the D-cache. The | ||
490 | addresses lie in the superpage mapping. There's no harm if we | ||
491 | overpurge at either end - just a small performance loss. */ | ||
492 | unsigned long long ullend, addr, aligned_start; | ||
493 | #if (NEFF == 32) | ||
494 | aligned_start = (unsigned long long)(signed long long)(signed long) start; | ||
495 | #else | ||
496 | #error "NEFF != 32" | ||
497 | #endif | ||
498 | aligned_start &= L1_CACHE_ALIGN_MASK; | ||
499 | addr = aligned_start; | ||
500 | #if (NEFF == 32) | ||
501 | ullend = (unsigned long long) (signed long long) (signed long) end; | ||
502 | #else | ||
503 | #error "NEFF != 32" | ||
504 | #endif | ||
505 | while (addr <= ullend) { | ||
506 | asm __volatile__ ("ocbp %0, 0" : : "r" (addr)); | ||
507 | addr += L1_CACHE_BYTES; | ||
508 | } | ||
509 | return; | ||
510 | } | ||
511 | |||
512 | /* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for | ||
513 | anything else in the kernel */ | ||
514 | #define MAGIC_PAGE0_START 0xffffffffec000000ULL | ||
515 | |||
516 | static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr) | ||
517 | { | ||
518 | /* Purge the physical page 'paddr' from the cache. It's known that any | ||
519 | cache lines requiring attention have the same page colour as the the | ||
520 | address 'eaddr'. | ||
521 | |||
522 | This relies on the fact that the D-cache matches on physical tags | ||
523 | when no virtual tag matches. So we create an alias for the original | ||
524 | page and purge through that. (Alternatively, we could have done | ||
525 | this by switching ASID to match the original mapping and purged | ||
526 | through that, but that involves ASID switching cost + probably a | ||
527 | TLBMISS + refill anyway.) | ||
528 | */ | ||
529 | |||
530 | unsigned long long magic_page_start; | ||
531 | unsigned long long magic_eaddr, magic_eaddr_end; | ||
532 | |||
533 | magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK); | ||
534 | |||
535 | /* As long as the kernel is not pre-emptible, this doesn't need to be | ||
536 | under cli/sti. */ | ||
537 | |||
538 | sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr); | ||
539 | |||
540 | magic_eaddr = magic_page_start; | ||
541 | magic_eaddr_end = magic_eaddr + PAGE_SIZE; | ||
542 | while (magic_eaddr < magic_eaddr_end) { | ||
543 | /* Little point in unrolling this loop - the OCBPs are blocking | ||
544 | and won't go any quicker (i.e. the loop overhead is parallel | ||
545 | to part of the OCBP execution.) */ | ||
546 | asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr)); | ||
547 | magic_eaddr += L1_CACHE_BYTES; | ||
548 | } | ||
549 | |||
550 | sh64_teardown_dtlb_cache_slot(); | ||
551 | } | ||
552 | |||
553 | /****************************************************************************/ | ||
554 | |||
555 | static void sh64_dcache_purge_phy_page(unsigned long paddr) | ||
556 | { | ||
557 | /* Pure a page given its physical start address, by creating a | ||
558 | temporary 1 page mapping and purging across that. Even if we know | ||
559 | the virtual address (& vma or mm) of the page, the method here is | ||
560 | more elegant because it avoids issues of coping with page faults on | ||
561 | the purge instructions (i.e. no special-case code required in the | ||
562 | critical path in the TLB miss handling). */ | ||
563 | |||
564 | unsigned long long eaddr_start, eaddr, eaddr_end; | ||
565 | int i; | ||
566 | |||
567 | /* As long as the kernel is not pre-emptible, this doesn't need to be | ||
568 | under cli/sti. */ | ||
569 | |||
570 | eaddr_start = MAGIC_PAGE0_START; | ||
571 | for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) { | ||
572 | sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr); | ||
573 | |||
574 | eaddr = eaddr_start; | ||
575 | eaddr_end = eaddr + PAGE_SIZE; | ||
576 | while (eaddr < eaddr_end) { | ||
577 | asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr)); | ||
578 | eaddr += L1_CACHE_BYTES; | ||
579 | } | ||
580 | |||
581 | sh64_teardown_dtlb_cache_slot(); | ||
582 | eaddr_start += PAGE_SIZE; | ||
583 | } | ||
584 | } | ||
585 | |||
586 | static void sh64_dcache_purge_user_pages(struct mm_struct *mm, | ||
587 | unsigned long addr, unsigned long end) | ||
588 | { | ||
589 | pgd_t *pgd; | ||
590 | pmd_t *pmd; | ||
591 | pte_t *pte; | ||
592 | pte_t entry; | ||
593 | spinlock_t *ptl; | ||
594 | unsigned long paddr; | ||
595 | |||
596 | if (!mm) | ||
597 | return; /* No way to find physical address of page */ | ||
598 | |||
599 | pgd = pgd_offset(mm, addr); | ||
600 | if (pgd_bad(*pgd)) | ||
601 | return; | ||
602 | |||
603 | pmd = pmd_offset(pgd, addr); | ||
604 | if (pmd_none(*pmd) || pmd_bad(*pmd)) | ||
605 | return; | ||
606 | |||
607 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | ||
608 | do { | ||
609 | entry = *pte; | ||
610 | if (pte_none(entry) || !pte_present(entry)) | ||
611 | continue; | ||
612 | paddr = pte_val(entry) & PAGE_MASK; | ||
613 | sh64_dcache_purge_coloured_phy_page(paddr, addr); | ||
614 | } while (pte++, addr += PAGE_SIZE, addr != end); | ||
615 | pte_unmap_unlock(pte - 1, ptl); | ||
616 | } | ||
617 | /****************************************************************************/ | ||
618 | |||
619 | static void sh64_dcache_purge_user_range(struct mm_struct *mm, | ||
620 | unsigned long start, unsigned long end) | ||
621 | { | ||
622 | /* There are at least 5 choices for the implementation of this, with | ||
623 | pros (+), cons(-), comments(*): | ||
624 | |||
625 | 1. ocbp each line in the range through the original user's ASID | ||
626 | + no lines spuriously evicted | ||
627 | - tlbmiss handling (must either handle faults on demand => extra | ||
628 | special-case code in tlbmiss critical path), or map the page in | ||
629 | advance (=> flush_tlb_range in advance to avoid multiple hits) | ||
630 | - ASID switching | ||
631 | - expensive for large ranges | ||
632 | |||
633 | 2. temporarily map each page in the range to a special effective | ||
634 | address and ocbp through the temporary mapping; relies on the | ||
635 | fact that SH-5 OCB* always do TLB lookup and match on ptags (they | ||
636 | never look at the etags) | ||
637 | + no spurious evictions | ||
638 | - expensive for large ranges | ||
639 | * surely cheaper than (1) | ||
640 | |||
641 | 3. walk all the lines in the cache, check the tags, if a match | ||
642 | occurs create a page mapping to ocbp the line through | ||
643 | + no spurious evictions | ||
644 | - tag inspection overhead | ||
645 | - (especially for small ranges) | ||
646 | - potential cost of setting up/tearing down page mapping for | ||
647 | every line that matches the range | ||
648 | * cost partly independent of range size | ||
649 | |||
650 | 4. walk all the lines in the cache, check the tags, if a match | ||
651 | occurs use 4 * alloco to purge the line (+3 other probably | ||
652 | innocent victims) by natural eviction | ||
653 | + no tlb mapping overheads | ||
654 | - spurious evictions | ||
655 | - tag inspection overhead | ||
656 | |||
657 | 5. implement like flush_cache_all | ||
658 | + no tag inspection overhead | ||
659 | - spurious evictions | ||
660 | - bad for small ranges | ||
661 | |||
662 | (1) can be ruled out as more expensive than (2). (2) appears best | ||
663 | for small ranges. The choice between (3), (4) and (5) for large | ||
664 | ranges and the range size for the large/small boundary need | ||
665 | benchmarking to determine. | ||
666 | |||
667 | For now use approach (2) for small ranges and (5) for large ones. | ||
668 | |||
669 | */ | ||
670 | |||
671 | int n_pages; | ||
672 | |||
673 | n_pages = ((end - start) >> PAGE_SHIFT); | ||
674 | if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) { | ||
675 | #if 1 | ||
676 | sh64_dcache_purge_all(); | ||
677 | #else | ||
678 | unsigned long long set, way; | ||
679 | unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; | ||
680 | for (set = 0; set < cpu_data->dcache.sets; set++) { | ||
681 | unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift); | ||
682 | for (way = 0; way < cpu_data->dcache.ways; way++) { | ||
683 | unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift); | ||
684 | unsigned long long tag0; | ||
685 | unsigned long line_valid; | ||
686 | |||
687 | asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr)); | ||
688 | line_valid = tag0 & SH_CACHE_VALID; | ||
689 | if (line_valid) { | ||
690 | unsigned long cache_asid; | ||
691 | unsigned long epn; | ||
692 | |||
693 | cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift; | ||
694 | /* The next line needs some | ||
695 | explanation. The virtual tags | ||
696 | encode bits [31:13] of the virtual | ||
697 | address, bit [12] of the 'tag' being | ||
698 | implied by the cache set index. */ | ||
699 | epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift); | ||
700 | |||
701 | if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) { | ||
702 | /* TODO : could optimise this | ||
703 | call by batching multiple | ||
704 | adjacent sets together. */ | ||
705 | sh64_dcache_purge_sets(set, 1); | ||
706 | break; /* Don't waste time inspecting other ways for this set */ | ||
707 | } | ||
708 | } | ||
709 | } | ||
710 | } | ||
711 | #endif | ||
712 | } else { | ||
713 | /* Small range, covered by a single page table page */ | ||
714 | start &= PAGE_MASK; /* should already be so */ | ||
715 | end = PAGE_ALIGN(end); /* should already be so */ | ||
716 | sh64_dcache_purge_user_pages(mm, start, end); | ||
717 | } | ||
718 | return; | ||
719 | } | ||
720 | |||
721 | static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end) | ||
722 | { | ||
723 | unsigned long long aligned_start; | ||
724 | unsigned long long ull_end; | ||
725 | unsigned long long addr; | ||
726 | |||
727 | ull_end = end; | ||
728 | |||
729 | /* Just wback over the range using the natural addresses. TLB miss | ||
730 | handling will be OK (TBC) : the range has just been written to by | ||
731 | the signal frame setup code, so the PTEs must exist. | ||
732 | |||
733 | Note, if we have CONFIG_PREEMPT and get preempted inside this loop, | ||
734 | it doesn't matter, even if the pid->ASID mapping changes whilst | ||
735 | we're away. In that case the cache will have been flushed when the | ||
736 | mapping was renewed. So the writebacks below will be nugatory (and | ||
737 | we'll doubtless have to fault the TLB entry/ies in again with the | ||
738 | new ASID), but it's a rare case. | ||
739 | */ | ||
740 | aligned_start = start & L1_CACHE_ALIGN_MASK; | ||
741 | addr = aligned_start; | ||
742 | while (addr < ull_end) { | ||
743 | asm __volatile__ ("ocbwb %0, 0" : : "r" (addr)); | ||
744 | addr += L1_CACHE_BYTES; | ||
745 | } | ||
746 | } | ||
747 | |||
748 | /****************************************************************************/ | ||
749 | |||
750 | /* These *MUST* lie in an area of virtual address space that's otherwise unused. */ | ||
751 | #define UNIQUE_EADDR_START 0xe0000000UL | ||
752 | #define UNIQUE_EADDR_END 0xe8000000UL | ||
753 | |||
754 | static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr) | ||
755 | { | ||
756 | /* Given a physical address paddr, and a user virtual address | ||
757 | user_eaddr which will eventually be mapped to it, create a one-off | ||
758 | kernel-private eaddr mapped to the same paddr. This is used for | ||
759 | creating special destination pages for copy_user_page and | ||
760 | clear_user_page */ | ||
761 | |||
762 | static unsigned long current_pointer = UNIQUE_EADDR_START; | ||
763 | unsigned long coloured_pointer; | ||
764 | |||
765 | if (current_pointer == UNIQUE_EADDR_END) { | ||
766 | sh64_dcache_purge_all(); | ||
767 | current_pointer = UNIQUE_EADDR_START; | ||
768 | } | ||
769 | |||
770 | coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK); | ||
771 | sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr); | ||
772 | |||
773 | current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS); | ||
774 | |||
775 | return coloured_pointer; | ||
776 | } | ||
777 | |||
778 | /****************************************************************************/ | ||
779 | |||
780 | static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address) | ||
781 | { | ||
782 | void *coloured_to; | ||
783 | |||
784 | /* Discard any existing cache entries of the wrong colour. These are | ||
785 | present quite often, if the kernel has recently used the page | ||
786 | internally, then given it up, then it's been allocated to the user. | ||
787 | */ | ||
788 | sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to); | ||
789 | |||
790 | coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to)); | ||
791 | sh64_page_copy(from, coloured_to); | ||
792 | |||
793 | sh64_teardown_dtlb_cache_slot(); | ||
794 | } | ||
795 | |||
796 | static void sh64_clear_user_page_coloured(void *to, unsigned long address) | ||
797 | { | ||
798 | void *coloured_to; | ||
799 | |||
800 | /* Discard any existing kernel-originated lines of the wrong colour (as | ||
801 | above) */ | ||
802 | sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to); | ||
803 | |||
804 | coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to)); | ||
805 | sh64_page_clear(coloured_to); | ||
806 | |||
807 | sh64_teardown_dtlb_cache_slot(); | ||
808 | } | ||
809 | |||
810 | #endif /* !CONFIG_DCACHE_DISABLED */ | ||
811 | |||
812 | /****************************************************************************/ | ||
813 | |||
814 | /*########################################################################## | ||
815 | EXTERNALLY CALLABLE API. | ||
816 | ##########################################################################*/ | ||
817 | |||
818 | /* These functions are described in Documentation/cachetlb.txt. | ||
819 | Each one of these functions varies in behaviour depending on whether the | ||
820 | I-cache and/or D-cache are configured out. | ||
821 | |||
822 | Note that the Linux term 'flush' corresponds to what is termed 'purge' in | ||
823 | the sh/sh64 jargon for the D-cache, i.e. write back dirty data then | ||
824 | invalidate the cache lines, and 'invalidate' for the I-cache. | ||
825 | */ | ||
826 | |||
827 | #undef FLUSH_TRACE | ||
828 | |||
829 | void flush_cache_all(void) | ||
830 | { | ||
831 | /* Invalidate the entire contents of both caches, after writing back to | ||
832 | memory any dirty data from the D-cache. */ | ||
833 | sh64_dcache_purge_all(); | ||
834 | sh64_icache_inv_all(); | ||
835 | } | ||
836 | |||
837 | /****************************************************************************/ | ||
838 | |||
839 | void flush_cache_mm(struct mm_struct *mm) | ||
840 | { | ||
841 | /* Invalidate an entire user-address space from both caches, after | ||
842 | writing back dirty data (e.g. for shared mmap etc). */ | ||
843 | |||
844 | /* This could be coded selectively by inspecting all the tags then | ||
845 | doing 4*alloco on any set containing a match (as for | ||
846 | flush_cache_range), but fork/exit/execve (where this is called from) | ||
847 | are expensive anyway. */ | ||
848 | |||
849 | /* Have to do a purge here, despite the comments re I-cache below. | ||
850 | There could be odd-coloured dirty data associated with the mm still | ||
851 | in the cache - if this gets written out through natural eviction | ||
852 | after the kernel has reused the page there will be chaos. | ||
853 | */ | ||
854 | |||
855 | sh64_dcache_purge_all(); | ||
856 | |||
857 | /* The mm being torn down won't ever be active again, so any Icache | ||
858 | lines tagged with its ASID won't be visible for the rest of the | ||
859 | lifetime of this ASID cycle. Before the ASID gets reused, there | ||
860 | will be a flush_cache_all. Hence we don't need to touch the | ||
861 | I-cache. This is similar to the lack of action needed in | ||
862 | flush_tlb_mm - see fault.c. */ | ||
863 | } | ||
864 | |||
865 | /****************************************************************************/ | ||
866 | |||
867 | void flush_cache_range(struct vm_area_struct *vma, unsigned long start, | ||
868 | unsigned long end) | ||
869 | { | ||
870 | struct mm_struct *mm = vma->vm_mm; | ||
871 | |||
872 | /* Invalidate (from both caches) the range [start,end) of virtual | ||
873 | addresses from the user address space specified by mm, after writing | ||
874 | back any dirty data. | ||
875 | |||
876 | Note, 'end' is 1 byte beyond the end of the range to flush. */ | ||
877 | |||
878 | sh64_dcache_purge_user_range(mm, start, end); | ||
879 | sh64_icache_inv_user_page_range(mm, start, end); | ||
880 | } | ||
881 | |||
882 | /****************************************************************************/ | ||
883 | |||
884 | void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn) | ||
885 | { | ||
886 | /* Invalidate any entries in either cache for the vma within the user | ||
887 | address space vma->vm_mm for the page starting at virtual address | ||
888 | 'eaddr'. This seems to be used primarily in breaking COW. Note, | ||
889 | the I-cache must be searched too in case the page in question is | ||
890 | both writable and being executed from (e.g. stack trampolines.) | ||
891 | |||
892 | Note, this is called with pte lock held. | ||
893 | */ | ||
894 | |||
895 | sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT); | ||
896 | |||
897 | if (vma->vm_flags & VM_EXEC) { | ||
898 | sh64_icache_inv_user_page(vma, eaddr); | ||
899 | } | ||
900 | } | ||
901 | |||
902 | /****************************************************************************/ | ||
903 | |||
904 | #ifndef CONFIG_DCACHE_DISABLED | ||
905 | |||
906 | void copy_user_page(void *to, void *from, unsigned long address, struct page *page) | ||
907 | { | ||
908 | /* 'from' and 'to' are kernel virtual addresses (within the superpage | ||
909 | mapping of the physical RAM). 'address' is the user virtual address | ||
910 | where the copy 'to' will be mapped after. This allows a custom | ||
911 | mapping to be used to ensure that the new copy is placed in the | ||
912 | right cache sets for the user to see it without having to bounce it | ||
913 | out via memory. Note however : the call to flush_page_to_ram in | ||
914 | (generic)/mm/memory.c:(break_cow) undoes all this good work in that one | ||
915 | very important case! | ||
916 | |||
917 | TBD : can we guarantee that on every call, any cache entries for | ||
918 | 'from' are in the same colour sets as 'address' also? i.e. is this | ||
919 | always used just to deal with COW? (I suspect not). */ | ||
920 | |||
921 | /* There are two possibilities here for when the page 'from' was last accessed: | ||
922 | * by the kernel : this is OK, no purge required. | ||
923 | * by the/a user (e.g. for break_COW) : need to purge. | ||
924 | |||
925 | If the potential user mapping at 'address' is the same colour as | ||
926 | 'from' there is no need to purge any cache lines from the 'from' | ||
927 | page mapped into cache sets of colour 'address'. (The copy will be | ||
928 | accessing the page through 'from'). | ||
929 | */ | ||
930 | |||
931 | if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) { | ||
932 | sh64_dcache_purge_coloured_phy_page(__pa(from), address); | ||
933 | } | ||
934 | |||
935 | if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { | ||
936 | /* No synonym problem on destination */ | ||
937 | sh64_page_copy(from, to); | ||
938 | } else { | ||
939 | sh64_copy_user_page_coloured(to, from, address); | ||
940 | } | ||
941 | |||
942 | /* Note, don't need to flush 'from' page from the cache again - it's | ||
943 | done anyway by the generic code */ | ||
944 | } | ||
945 | |||
946 | void clear_user_page(void *to, unsigned long address, struct page *page) | ||
947 | { | ||
948 | /* 'to' is a kernel virtual address (within the superpage | ||
949 | mapping of the physical RAM). 'address' is the user virtual address | ||
950 | where the 'to' page will be mapped after. This allows a custom | ||
951 | mapping to be used to ensure that the new copy is placed in the | ||
952 | right cache sets for the user to see it without having to bounce it | ||
953 | out via memory. | ||
954 | */ | ||
955 | |||
956 | if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { | ||
957 | /* No synonym problem on destination */ | ||
958 | sh64_page_clear(to); | ||
959 | } else { | ||
960 | sh64_clear_user_page_coloured(to, address); | ||
961 | } | ||
962 | } | ||
963 | |||
964 | #endif /* !CONFIG_DCACHE_DISABLED */ | ||
965 | |||
966 | /****************************************************************************/ | ||
967 | |||
968 | void flush_dcache_page(struct page *page) | ||
969 | { | ||
970 | sh64_dcache_purge_phy_page(page_to_phys(page)); | ||
971 | wmb(); | ||
972 | } | ||
973 | |||
974 | /****************************************************************************/ | ||
975 | |||
976 | void flush_icache_range(unsigned long start, unsigned long end) | ||
977 | { | ||
978 | /* Flush the range [start,end] of kernel virtual adddress space from | ||
979 | the I-cache. The corresponding range must be purged from the | ||
980 | D-cache also because the SH-5 doesn't have cache snooping between | ||
981 | the caches. The addresses will be visible through the superpage | ||
982 | mapping, therefore it's guaranteed that there no cache entries for | ||
983 | the range in cache sets of the wrong colour. | ||
984 | |||
985 | Primarily used for cohering the I-cache after a module has | ||
986 | been loaded. */ | ||
987 | |||
988 | /* We also make sure to purge the same range from the D-cache since | ||
989 | flush_page_to_ram() won't be doing this for us! */ | ||
990 | |||
991 | sh64_dcache_purge_kernel_range(start, end); | ||
992 | wmb(); | ||
993 | sh64_icache_inv_kernel_range(start, end); | ||
994 | } | ||
995 | |||
996 | /****************************************************************************/ | ||
997 | |||
998 | void flush_icache_user_range(struct vm_area_struct *vma, | ||
999 | struct page *page, unsigned long addr, int len) | ||
1000 | { | ||
1001 | /* Flush the range of user (defined by vma->vm_mm) address space | ||
1002 | starting at 'addr' for 'len' bytes from the cache. The range does | ||
1003 | not straddle a page boundary, the unique physical page containing | ||
1004 | the range is 'page'. This seems to be used mainly for invalidating | ||
1005 | an address range following a poke into the program text through the | ||
1006 | ptrace() call from another process (e.g. for BRK instruction | ||
1007 | insertion). */ | ||
1008 | |||
1009 | sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr); | ||
1010 | mb(); | ||
1011 | |||
1012 | if (vma->vm_flags & VM_EXEC) { | ||
1013 | sh64_icache_inv_user_small_range(vma->vm_mm, addr, len); | ||
1014 | } | ||
1015 | } | ||
1016 | |||
1017 | /*########################################################################## | ||
1018 | ARCH/SH64 PRIVATE CALLABLE API. | ||
1019 | ##########################################################################*/ | ||
1020 | |||
1021 | void flush_cache_sigtramp(unsigned long start, unsigned long end) | ||
1022 | { | ||
1023 | /* For the address range [start,end), write back the data from the | ||
1024 | D-cache and invalidate the corresponding region of the I-cache for | ||
1025 | the current process. Used to flush signal trampolines on the stack | ||
1026 | to make them executable. */ | ||
1027 | |||
1028 | sh64_dcache_wback_current_user_range(start, end); | ||
1029 | wmb(); | ||
1030 | sh64_icache_inv_current_user_range(start, end); | ||
1031 | } | ||
1032 | |||
diff --git a/arch/sh/mm/tlb-sh5.c b/arch/sh/mm/tlb-sh5.c new file mode 100644 index 000000000000..d517e7d70340 --- /dev/null +++ b/arch/sh/mm/tlb-sh5.c | |||
@@ -0,0 +1,166 @@ | |||
1 | /* | ||
2 | * arch/sh64/mm/tlb.c | ||
3 | * | ||
4 | * Copyright (C) 2003 Paul Mundt <lethal@linux-sh.org> | ||
5 | * Copyright (C) 2003 Richard Curnow <richard.curnow@superh.com> | ||
6 | * | ||
7 | * This file is subject to the terms and conditions of the GNU General Public | ||
8 | * License. See the file "COPYING" in the main directory of this archive | ||
9 | * for more details. | ||
10 | * | ||
11 | */ | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <asm/page.h> | ||
15 | #include <asm/tlb.h> | ||
16 | #include <asm/mmu_context.h> | ||
17 | |||
18 | /** | ||
19 | * sh64_tlb_init | ||
20 | * | ||
21 | * Perform initial setup for the DTLB and ITLB. | ||
22 | */ | ||
23 | int __init sh64_tlb_init(void) | ||
24 | { | ||
25 | /* Assign some sane DTLB defaults */ | ||
26 | cpu_data->dtlb.entries = 64; | ||
27 | cpu_data->dtlb.step = 0x10; | ||
28 | |||
29 | cpu_data->dtlb.first = DTLB_FIXED | cpu_data->dtlb.step; | ||
30 | cpu_data->dtlb.next = cpu_data->dtlb.first; | ||
31 | |||
32 | cpu_data->dtlb.last = DTLB_FIXED | | ||
33 | ((cpu_data->dtlb.entries - 1) * | ||
34 | cpu_data->dtlb.step); | ||
35 | |||
36 | /* And again for the ITLB */ | ||
37 | cpu_data->itlb.entries = 64; | ||
38 | cpu_data->itlb.step = 0x10; | ||
39 | |||
40 | cpu_data->itlb.first = ITLB_FIXED | cpu_data->itlb.step; | ||
41 | cpu_data->itlb.next = cpu_data->itlb.first; | ||
42 | cpu_data->itlb.last = ITLB_FIXED | | ||
43 | ((cpu_data->itlb.entries - 1) * | ||
44 | cpu_data->itlb.step); | ||
45 | |||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | /** | ||
50 | * sh64_next_free_dtlb_entry | ||
51 | * | ||
52 | * Find the next available DTLB entry | ||
53 | */ | ||
54 | unsigned long long sh64_next_free_dtlb_entry(void) | ||
55 | { | ||
56 | return cpu_data->dtlb.next; | ||
57 | } | ||
58 | |||
59 | /** | ||
60 | * sh64_get_wired_dtlb_entry | ||
61 | * | ||
62 | * Allocate a wired (locked-in) entry in the DTLB | ||
63 | */ | ||
64 | unsigned long long sh64_get_wired_dtlb_entry(void) | ||
65 | { | ||
66 | unsigned long long entry = sh64_next_free_dtlb_entry(); | ||
67 | |||
68 | cpu_data->dtlb.first += cpu_data->dtlb.step; | ||
69 | cpu_data->dtlb.next += cpu_data->dtlb.step; | ||
70 | |||
71 | return entry; | ||
72 | } | ||
73 | |||
74 | /** | ||
75 | * sh64_put_wired_dtlb_entry | ||
76 | * | ||
77 | * @entry: Address of TLB slot. | ||
78 | * | ||
79 | * Free a wired (locked-in) entry in the DTLB. | ||
80 | * | ||
81 | * Works like a stack, last one to allocate must be first one to free. | ||
82 | */ | ||
83 | int sh64_put_wired_dtlb_entry(unsigned long long entry) | ||
84 | { | ||
85 | __flush_tlb_slot(entry); | ||
86 | |||
87 | /* | ||
88 | * We don't do any particularly useful tracking of wired entries, | ||
89 | * so this approach works like a stack .. last one to be allocated | ||
90 | * has to be the first one to be freed. | ||
91 | * | ||
92 | * We could potentially load wired entries into a list and work on | ||
93 | * rebalancing the list periodically (which also entails moving the | ||
94 | * contents of a TLB entry) .. though I have a feeling that this is | ||
95 | * more trouble than it's worth. | ||
96 | */ | ||
97 | |||
98 | /* | ||
99 | * Entry must be valid .. we don't want any ITLB addresses! | ||
100 | */ | ||
101 | if (entry <= DTLB_FIXED) | ||
102 | return -EINVAL; | ||
103 | |||
104 | /* | ||
105 | * Next, check if we're within range to be freed. (ie, must be the | ||
106 | * entry beneath the first 'free' entry! | ||
107 | */ | ||
108 | if (entry < (cpu_data->dtlb.first - cpu_data->dtlb.step)) | ||
109 | return -EINVAL; | ||
110 | |||
111 | /* If we are, then bring this entry back into the list */ | ||
112 | cpu_data->dtlb.first -= cpu_data->dtlb.step; | ||
113 | cpu_data->dtlb.next = entry; | ||
114 | |||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | /** | ||
119 | * sh64_setup_tlb_slot | ||
120 | * | ||
121 | * @config_addr: Address of TLB slot. | ||
122 | * @eaddr: Virtual address. | ||
123 | * @asid: Address Space Identifier. | ||
124 | * @paddr: Physical address. | ||
125 | * | ||
126 | * Load up a virtual<->physical translation for @eaddr<->@paddr in the | ||
127 | * pre-allocated TLB slot @config_addr (see sh64_get_wired_dtlb_entry). | ||
128 | */ | ||
129 | inline void sh64_setup_tlb_slot(unsigned long long config_addr, | ||
130 | unsigned long eaddr, | ||
131 | unsigned long asid, | ||
132 | unsigned long paddr) | ||
133 | { | ||
134 | unsigned long long pteh, ptel; | ||
135 | |||
136 | /* Sign extension */ | ||
137 | #if (NEFF == 32) | ||
138 | pteh = (unsigned long long)(signed long long)(signed long) eaddr; | ||
139 | #else | ||
140 | #error "Can't sign extend more than 32 bits yet" | ||
141 | #endif | ||
142 | pteh &= PAGE_MASK; | ||
143 | pteh |= (asid << PTEH_ASID_SHIFT) | PTEH_VALID; | ||
144 | #if (NEFF == 32) | ||
145 | ptel = (unsigned long long)(signed long long)(signed long) paddr; | ||
146 | #else | ||
147 | #error "Can't sign extend more than 32 bits yet" | ||
148 | #endif | ||
149 | ptel &= PAGE_MASK; | ||
150 | ptel |= (_PAGE_CACHABLE | _PAGE_READ | _PAGE_WRITE); | ||
151 | |||
152 | asm volatile("putcfg %0, 1, %1\n\t" | ||
153 | "putcfg %0, 0, %2\n" | ||
154 | : : "r" (config_addr), "r" (ptel), "r" (pteh)); | ||
155 | } | ||
156 | |||
157 | /** | ||
158 | * sh64_teardown_tlb_slot | ||
159 | * | ||
160 | * @config_addr: Address of TLB slot. | ||
161 | * | ||
162 | * Teardown any existing mapping in the TLB slot @config_addr. | ||
163 | */ | ||
164 | inline void sh64_teardown_tlb_slot(unsigned long long config_addr) | ||
165 | __attribute__ ((alias("__flush_tlb_slot"))); | ||
166 | |||