aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sh64/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/sh64/mm
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/sh64/mm')
-rw-r--r--arch/sh64/mm/Makefile44
-rw-r--r--arch/sh64/mm/cache.c1041
-rw-r--r--arch/sh64/mm/extable.c81
-rw-r--r--arch/sh64/mm/fault.c601
-rw-r--r--arch/sh64/mm/hugetlbpage.c264
-rw-r--r--arch/sh64/mm/init.c196
-rw-r--r--arch/sh64/mm/ioremap.c469
-rw-r--r--arch/sh64/mm/tlb.c166
-rw-r--r--arch/sh64/mm/tlbmiss.c280
9 files changed, 3142 insertions, 0 deletions
diff --git a/arch/sh64/mm/Makefile b/arch/sh64/mm/Makefile
new file mode 100644
index 000000000000..ff19378ac90a
--- /dev/null
+++ b/arch/sh64/mm/Makefile
@@ -0,0 +1,44 @@
1#
2# This file is subject to the terms and conditions of the GNU General Public
3# License. See the file "COPYING" in the main directory of this archive
4# for more details.
5#
6# Copyright (C) 2000, 2001 Paolo Alberelli
7# Copyright (C) 2003, 2004 Paul Mundt
8#
9# Makefile for the sh64-specific parts of the Linux memory manager.
10#
11# Note! Dependencies are done automagically by 'make dep', which also
12# removes any old dependencies. DON'T put your own dependencies here
13# unless it's something special (ie not a .c file).
14#
15
16obj-y := init.o fault.o ioremap.o extable.o cache.o tlbmiss.o tlb.o
17
18obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
19
20# Special flags for tlbmiss.o. This puts restrictions on the number of
21# caller-save registers that the compiler can target when building this file.
22# This is required because the code is called from a context in entry.S where
23# very few registers have been saved in the exception handler (for speed
24# reasons).
25# The caller save registers that have been saved and which can be used are
26# r2,r3,r4,r5 : argument passing
27# r15, r18 : SP and LINK
28# tr0-4 : allow all caller-save TR's. The compiler seems to be able to make
29# use of them, so it's probably beneficial to performance to save them
30# and have them available for it.
31#
32# The resources not listed below are callee save, i.e. the compiler is free to
33# use any of them and will spill them to the stack itself.
34
35CFLAGS_tlbmiss.o += -ffixed-r7 \
36 -ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12 \
37 -ffixed-r13 -ffixed-r14 -ffixed-r16 -ffixed-r17 -ffixed-r19 \
38 -ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \
39 -ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \
40 -ffixed-r36 -ffixed-r37 -ffixed-r38 -ffixed-r39 -ffixed-r40 \
41 -ffixed-r41 -ffixed-r42 -ffixed-r43 \
42 -ffixed-r60 -ffixed-r61 -ffixed-r62 \
43 -fomit-frame-pointer
44
diff --git a/arch/sh64/mm/cache.c b/arch/sh64/mm/cache.c
new file mode 100644
index 000000000000..3b87e25ea773
--- /dev/null
+++ b/arch/sh64/mm/cache.c
@@ -0,0 +1,1041 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * arch/sh64/mm/cache.c
7 *
8 * Original version Copyright (C) 2000, 2001 Paolo Alberelli
9 * Second version Copyright (C) benedict.gaster@superh.com 2002
10 * Third version Copyright Richard.Curnow@superh.com 2003
11 * Hacks to third version Copyright (C) 2003 Paul Mundt
12 */
13
14/****************************************************************************/
15
16#include <linux/config.h>
17#include <linux/init.h>
18#include <linux/mman.h>
19#include <linux/mm.h>
20#include <linux/threads.h>
21#include <asm/page.h>
22#include <asm/pgtable.h>
23#include <asm/processor.h>
24#include <asm/cache.h>
25#include <asm/tlb.h>
26#include <asm/io.h>
27#include <asm/uaccess.h>
28#include <asm/mmu_context.h>
29#include <asm/pgalloc.h> /* for flush_itlb_range */
30
31#include <linux/proc_fs.h>
32
33/* This function is in entry.S */
34extern unsigned long switch_and_save_asid(unsigned long new_asid);
35
36/* Wired TLB entry for the D-cache */
37static unsigned long long dtlb_cache_slot;
38
39/**
40 * sh64_cache_init()
41 *
42 * This is pretty much just a straightforward clone of the SH
43 * detect_cpu_and_cache_system().
44 *
45 * This function is responsible for setting up all of the cache
46 * info dynamically as well as taking care of CPU probing and
47 * setting up the relevant subtype data.
48 *
49 * FIXME: For the time being, we only really support the SH5-101
50 * out of the box, and don't support dynamic probing for things
51 * like the SH5-103 or even cut2 of the SH5-101. Implement this
52 * later!
53 */
54int __init sh64_cache_init(void)
55{
56 /*
57 * First, setup some sane values for the I-cache.
58 */
59 cpu_data->icache.ways = 4;
60 cpu_data->icache.sets = 256;
61 cpu_data->icache.linesz = L1_CACHE_BYTES;
62
63 /*
64 * FIXME: This can probably be cleaned up a bit as well.. for example,
65 * do we really need the way shift _and_ the way_step_shift ?? Judging
66 * by the existing code, I would guess no.. is there any valid reason
67 * why we need to be tracking this around?
68 */
69 cpu_data->icache.way_shift = 13;
70 cpu_data->icache.entry_shift = 5;
71 cpu_data->icache.set_shift = 4;
72 cpu_data->icache.way_step_shift = 16;
73 cpu_data->icache.asid_shift = 2;
74
75 /*
76 * way offset = cache size / associativity, so just don't factor in
77 * associativity in the first place..
78 */
79 cpu_data->icache.way_ofs = cpu_data->icache.sets *
80 cpu_data->icache.linesz;
81
82 cpu_data->icache.asid_mask = 0x3fc;
83 cpu_data->icache.idx_mask = 0x1fe0;
84 cpu_data->icache.epn_mask = 0xffffe000;
85 cpu_data->icache.flags = 0;
86
87 /*
88 * Next, setup some sane values for the D-cache.
89 *
90 * On the SH5, these are pretty consistent with the I-cache settings,
91 * so we just copy over the existing definitions.. these can be fixed
92 * up later, especially if we add runtime CPU probing.
93 *
94 * Though in the meantime it saves us from having to duplicate all of
95 * the above definitions..
96 */
97 cpu_data->dcache = cpu_data->icache;
98
99 /*
100 * Setup any cache-related flags here
101 */
102#if defined(CONFIG_DCACHE_WRITE_THROUGH)
103 set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags));
104#elif defined(CONFIG_DCACHE_WRITE_BACK)
105 set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags));
106#endif
107
108 /*
109 * We also need to reserve a slot for the D-cache in the DTLB, so we
110 * do this now ..
111 */
112 dtlb_cache_slot = sh64_get_wired_dtlb_entry();
113
114 return 0;
115}
116
117#ifdef CONFIG_DCACHE_DISABLED
118#define sh64_dcache_purge_all() do { } while (0)
119#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr) do { } while (0)
120#define sh64_dcache_purge_user_range(mm, start, end) do { } while (0)
121#define sh64_dcache_purge_phy_page(paddr) do { } while (0)
122#define sh64_dcache_purge_virt_page(mm, eaddr) do { } while (0)
123#define sh64_dcache_purge_kernel_range(start, end) do { } while (0)
124#define sh64_dcache_wback_current_user_range(start, end) do { } while (0)
125#endif
126
127/*##########################################################################*/
128
129/* From here onwards, a rewrite of the implementation,
130 by Richard.Curnow@superh.com.
131
132 The major changes in this compared to the old version are;
133 1. use more selective purging through OCBP instead of using ALLOCO to purge
134 by natural replacement. This avoids purging out unrelated cache lines
135 that happen to be in the same set.
136 2. exploit the APIs copy_user_page and clear_user_page better
137 3. be more selective about I-cache purging, in particular use invalidate_all
138 more sparingly.
139
140 */
141
142/*##########################################################################
143 SUPPORT FUNCTIONS
144 ##########################################################################*/
145
146/****************************************************************************/
147/* The following group of functions deal with mapping and unmapping a temporary
148 page into the DTLB slot that have been set aside for our exclusive use. */
149/* In order to accomplish this, we use the generic interface for adding and
150 removing a wired slot entry as defined in arch/sh64/mm/tlb.c */
151/****************************************************************************/
152
153static unsigned long slot_own_flags;
154
155static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr)
156{
157 local_irq_save(slot_own_flags);
158 sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
159}
160
161static inline void sh64_teardown_dtlb_cache_slot(void)
162{
163 sh64_teardown_tlb_slot(dtlb_cache_slot);
164 local_irq_restore(slot_own_flags);
165}
166
167/****************************************************************************/
168
169#ifndef CONFIG_ICACHE_DISABLED
170
171static void __inline__ sh64_icache_inv_all(void)
172{
173 unsigned long long addr, flag, data;
174 unsigned int flags;
175
176 addr=ICCR0;
177 flag=ICCR0_ICI;
178 data=0;
179
180 /* Make this a critical section for safety (probably not strictly necessary.) */
181 local_irq_save(flags);
182
183 /* Without %1 it gets unexplicably wrong */
184 asm volatile("getcfg %3, 0, %0\n\t"
185 "or %0, %2, %0\n\t"
186 "putcfg %3, 0, %0\n\t"
187 "synci"
188 : "=&r" (data)
189 : "0" (data), "r" (flag), "r" (addr));
190
191 local_irq_restore(flags);
192}
193
194static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
195{
196 /* Invalidate range of addresses [start,end] from the I-cache, where
197 * the addresses lie in the kernel superpage. */
198
199 unsigned long long ullend, addr, aligned_start;
200#if (NEFF == 32)
201 aligned_start = (unsigned long long)(signed long long)(signed long) start;
202#else
203#error "NEFF != 32"
204#endif
205 aligned_start &= L1_CACHE_ALIGN_MASK;
206 addr = aligned_start;
207#if (NEFF == 32)
208 ullend = (unsigned long long) (signed long long) (signed long) end;
209#else
210#error "NEFF != 32"
211#endif
212 while (addr <= ullend) {
213 asm __volatile__ ("icbi %0, 0" : : "r" (addr));
214 addr += L1_CACHE_BYTES;
215 }
216}
217
218static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
219{
220 /* If we get called, we know that vma->vm_flags contains VM_EXEC.
221 Also, eaddr is page-aligned. */
222
223 unsigned long long addr, end_addr;
224 unsigned long flags = 0;
225 unsigned long running_asid, vma_asid;
226 addr = eaddr;
227 end_addr = addr + PAGE_SIZE;
228
229 /* Check whether we can use the current ASID for the I-cache
230 invalidation. For example, if we're called via
231 access_process_vm->flush_cache_page->here, (e.g. when reading from
232 /proc), 'running_asid' will be that of the reader, not of the
233 victim.
234
235 Also, note the risk that we might get pre-empted between the ASID
236 compare and blocking IRQs, and before we regain control, the
237 pid->ASID mapping changes. However, the whole cache will get
238 invalidated when the mapping is renewed, so the worst that can
239 happen is that the loop below ends up invalidating somebody else's
240 cache entries.
241 */
242
243 running_asid = get_asid();
244 vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK);
245 if (running_asid != vma_asid) {
246 local_irq_save(flags);
247 switch_and_save_asid(vma_asid);
248 }
249 while (addr < end_addr) {
250 /* Worth unrolling a little */
251 asm __volatile__("icbi %0, 0" : : "r" (addr));
252 asm __volatile__("icbi %0, 32" : : "r" (addr));
253 asm __volatile__("icbi %0, 64" : : "r" (addr));
254 asm __volatile__("icbi %0, 96" : : "r" (addr));
255 addr += 128;
256 }
257 if (running_asid != vma_asid) {
258 switch_and_save_asid(running_asid);
259 local_irq_restore(flags);
260 }
261}
262
263/****************************************************************************/
264
265static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
266 unsigned long start, unsigned long end)
267{
268 /* Used for invalidating big chunks of I-cache, i.e. assume the range
269 is whole pages. If 'start' or 'end' is not page aligned, the code
270 is conservative and invalidates to the ends of the enclosing pages.
271 This is functionally OK, just a performance loss. */
272
273 /* See the comments below in sh64_dcache_purge_user_range() regarding
274 the choice of algorithm. However, for the I-cache option (2) isn't
275 available because there are no physical tags so aliases can't be
276 resolved. The icbi instruction has to be used through the user
277 mapping. Because icbi is cheaper than ocbp on a cache hit, it
278 would be cheaper to use the selective code for a large range than is
279 possible with the D-cache. Just assume 64 for now as a working
280 figure.
281 */
282
283 int n_pages;
284
285 if (!mm) return;
286
287 n_pages = ((end - start) >> PAGE_SHIFT);
288 if (n_pages >= 64) {
289 sh64_icache_inv_all();
290 } else {
291 unsigned long aligned_start;
292 unsigned long eaddr;
293 unsigned long after_last_page_start;
294 unsigned long mm_asid, current_asid;
295 unsigned long long flags = 0ULL;
296
297 mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
298 current_asid = get_asid();
299
300 if (mm_asid != current_asid) {
301 /* Switch ASID and run the invalidate loop under cli */
302 local_irq_save(flags);
303 switch_and_save_asid(mm_asid);
304 }
305
306 aligned_start = start & PAGE_MASK;
307 after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
308
309 while (aligned_start < after_last_page_start) {
310 struct vm_area_struct *vma;
311 unsigned long vma_end;
312 vma = find_vma(mm, aligned_start);
313 if (!vma || (aligned_start <= vma->vm_end)) {
314 /* Avoid getting stuck in an error condition */
315 aligned_start += PAGE_SIZE;
316 continue;
317 }
318 vma_end = vma->vm_end;
319 if (vma->vm_flags & VM_EXEC) {
320 /* Executable */
321 eaddr = aligned_start;
322 while (eaddr < vma_end) {
323 sh64_icache_inv_user_page(vma, eaddr);
324 eaddr += PAGE_SIZE;
325 }
326 }
327 aligned_start = vma->vm_end; /* Skip to start of next region */
328 }
329 if (mm_asid != current_asid) {
330 switch_and_save_asid(current_asid);
331 local_irq_restore(flags);
332 }
333 }
334}
335
336static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
337 unsigned long start, int len)
338{
339
340 /* Invalidate a small range of user context I-cache, not necessarily
341 page (or even cache-line) aligned. */
342
343 unsigned long long eaddr = start;
344 unsigned long long eaddr_end = start + len;
345 unsigned long current_asid, mm_asid;
346 unsigned long long flags;
347 unsigned long long epage_start;
348
349 /* Since this is used inside ptrace, the ASID in the mm context
350 typically won't match current_asid. We'll have to switch ASID to do
351 this. For safety, and given that the range will be small, do all
352 this under cli.
353
354 Note, there is a hazard that the ASID in mm->context is no longer
355 actually associated with mm, i.e. if the mm->context has started a
356 new cycle since mm was last active. However, this is just a
357 performance issue: all that happens is that we invalidate lines
358 belonging to another mm, so the owning process has to refill them
359 when that mm goes live again. mm itself can't have any cache
360 entries because there will have been a flush_cache_all when the new
361 mm->context cycle started. */
362
363 /* Align to start of cache line. Otherwise, suppose len==8 and start
364 was at 32N+28 : the last 4 bytes wouldn't get invalidated. */
365 eaddr = start & L1_CACHE_ALIGN_MASK;
366 eaddr_end = start + len;
367
368 local_irq_save(flags);
369 mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
370 current_asid = switch_and_save_asid(mm_asid);
371
372 epage_start = eaddr & PAGE_MASK;
373
374 while (eaddr < eaddr_end)
375 {
376 asm __volatile__("icbi %0, 0" : : "r" (eaddr));
377 eaddr += L1_CACHE_BYTES;
378 }
379 switch_and_save_asid(current_asid);
380 local_irq_restore(flags);
381}
382
383static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
384{
385 /* The icbi instruction never raises ITLBMISS. i.e. if there's not a
386 cache hit on the virtual tag the instruction ends there, without a
387 TLB lookup. */
388
389 unsigned long long aligned_start;
390 unsigned long long ull_end;
391 unsigned long long addr;
392
393 ull_end = end;
394
395 /* Just invalidate over the range using the natural addresses. TLB
396 miss handling will be OK (TBC). Since it's for the current process,
397 either we're already in the right ASID context, or the ASIDs have
398 been recycled since we were last active in which case we might just
399 invalidate another processes I-cache entries : no worries, just a
400 performance drop for him. */
401 aligned_start = start & L1_CACHE_ALIGN_MASK;
402 addr = aligned_start;
403 while (addr < ull_end) {
404 asm __volatile__ ("icbi %0, 0" : : "r" (addr));
405 asm __volatile__ ("nop");
406 asm __volatile__ ("nop");
407 addr += L1_CACHE_BYTES;
408 }
409}
410
411#endif /* !CONFIG_ICACHE_DISABLED */
412
413/****************************************************************************/
414
415#ifndef CONFIG_DCACHE_DISABLED
416
417/* Buffer used as the target of alloco instructions to purge data from cache
418 sets by natural eviction. -- RPC */
419#define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4)
420static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
421
422/****************************************************************************/
423
424static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
425{
426 /* Purge all ways in a particular block of sets, specified by the base
427 set number and number of sets. Can handle wrap-around, if that's
428 needed. */
429
430 int dummy_buffer_base_set;
431 unsigned long long eaddr, eaddr0, eaddr1;
432 int j;
433 int set_offset;
434
435 dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift;
436 set_offset = sets_to_purge_base - dummy_buffer_base_set;
437
438 for (j=0; j<n_sets; j++, set_offset++) {
439 set_offset &= (cpu_data->dcache.sets - 1);
440 eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift);
441
442 /* Do one alloco which hits the required set per cache way. For
443 write-back mode, this will purge the #ways resident lines. There's
444 little point unrolling this loop because the allocos stall more if
445 they're too close together. */
446 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
447 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
448 asm __volatile__ ("alloco %0, 0" : : "r" (eaddr));
449 asm __volatile__ ("synco"); /* TAKum03020 */
450 }
451
452 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
453 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
454 /* Load from each address. Required because alloco is a NOP if
455 the cache is write-through. Write-through is a config option. */
456 if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
457 *(volatile unsigned char *)(int)eaddr;
458 }
459 }
460
461 /* Don't use OCBI to invalidate the lines. That costs cycles directly.
462 If the dummy block is just left resident, it will naturally get
463 evicted as required. */
464
465 return;
466}
467
468/****************************************************************************/
469
470static void sh64_dcache_purge_all(void)
471{
472 /* Purge the entire contents of the dcache. The most efficient way to
473 achieve this is to use alloco instructions on a region of unused
474 memory equal in size to the cache, thereby causing the current
475 contents to be discarded by natural eviction. The alternative,
476 namely reading every tag, setting up a mapping for the corresponding
477 page and doing an OCBP for the line, would be much more expensive.
478 */
479
480 sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
481
482 return;
483
484}
485
486/****************************************************************************/
487
488static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end)
489{
490 /* Purge the range of addresses [start,end] from the D-cache. The
491 addresses lie in the superpage mapping. There's no harm if we
492 overpurge at either end - just a small performance loss. */
493 unsigned long long ullend, addr, aligned_start;
494#if (NEFF == 32)
495 aligned_start = (unsigned long long)(signed long long)(signed long) start;
496#else
497#error "NEFF != 32"
498#endif
499 aligned_start &= L1_CACHE_ALIGN_MASK;
500 addr = aligned_start;
501#if (NEFF == 32)
502 ullend = (unsigned long long) (signed long long) (signed long) end;
503#else
504#error "NEFF != 32"
505#endif
506 while (addr <= ullend) {
507 asm __volatile__ ("ocbp %0, 0" : : "r" (addr));
508 addr += L1_CACHE_BYTES;
509 }
510 return;
511}
512
513/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
514 anything else in the kernel */
515#define MAGIC_PAGE0_START 0xffffffffec000000ULL
516
517static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr)
518{
519 /* Purge the physical page 'paddr' from the cache. It's known that any
520 cache lines requiring attention have the same page colour as the the
521 address 'eaddr'.
522
523 This relies on the fact that the D-cache matches on physical tags
524 when no virtual tag matches. So we create an alias for the original
525 page and purge through that. (Alternatively, we could have done
526 this by switching ASID to match the original mapping and purged
527 through that, but that involves ASID switching cost + probably a
528 TLBMISS + refill anyway.)
529 */
530
531 unsigned long long magic_page_start;
532 unsigned long long magic_eaddr, magic_eaddr_end;
533
534 magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
535
536 /* As long as the kernel is not pre-emptible, this doesn't need to be
537 under cli/sti. */
538
539 sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
540
541 magic_eaddr = magic_page_start;
542 magic_eaddr_end = magic_eaddr + PAGE_SIZE;
543 while (magic_eaddr < magic_eaddr_end) {
544 /* Little point in unrolling this loop - the OCBPs are blocking
545 and won't go any quicker (i.e. the loop overhead is parallel
546 to part of the OCBP execution.) */
547 asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
548 magic_eaddr += L1_CACHE_BYTES;
549 }
550
551 sh64_teardown_dtlb_cache_slot();
552}
553
554/****************************************************************************/
555
556static void sh64_dcache_purge_phy_page(unsigned long paddr)
557{
558 /* Pure a page given its physical start address, by creating a
559 temporary 1 page mapping and purging across that. Even if we know
560 the virtual address (& vma or mm) of the page, the method here is
561 more elegant because it avoids issues of coping with page faults on
562 the purge instructions (i.e. no special-case code required in the
563 critical path in the TLB miss handling). */
564
565 unsigned long long eaddr_start, eaddr, eaddr_end;
566 int i;
567
568 /* As long as the kernel is not pre-emptible, this doesn't need to be
569 under cli/sti. */
570
571 eaddr_start = MAGIC_PAGE0_START;
572 for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
573 sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
574
575 eaddr = eaddr_start;
576 eaddr_end = eaddr + PAGE_SIZE;
577 while (eaddr < eaddr_end) {
578 asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
579 eaddr += L1_CACHE_BYTES;
580 }
581
582 sh64_teardown_dtlb_cache_slot();
583 eaddr_start += PAGE_SIZE;
584 }
585}
586
587static void sh64_dcache_purge_user_page(struct mm_struct *mm, unsigned long eaddr)
588{
589 pgd_t *pgd;
590 pmd_t *pmd;
591 pte_t *pte;
592 pte_t entry;
593 unsigned long paddr;
594
595 /* NOTE : all the callers of this have mm->page_table_lock held, so the
596 following page table traversal is safe even on SMP/pre-emptible. */
597
598 if (!mm) return; /* No way to find physical address of page */
599 pgd = pgd_offset(mm, eaddr);
600 if (pgd_bad(*pgd)) return;
601
602 pmd = pmd_offset(pgd, eaddr);
603 if (pmd_none(*pmd) || pmd_bad(*pmd)) return;
604
605 pte = pte_offset_kernel(pmd, eaddr);
606 entry = *pte;
607 if (pte_none(entry) || !pte_present(entry)) return;
608
609 paddr = pte_val(entry) & PAGE_MASK;
610
611 sh64_dcache_purge_coloured_phy_page(paddr, eaddr);
612
613}
614/****************************************************************************/
615
616static void sh64_dcache_purge_user_range(struct mm_struct *mm,
617 unsigned long start, unsigned long end)
618{
619 /* There are at least 5 choices for the implementation of this, with
620 pros (+), cons(-), comments(*):
621
622 1. ocbp each line in the range through the original user's ASID
623 + no lines spuriously evicted
624 - tlbmiss handling (must either handle faults on demand => extra
625 special-case code in tlbmiss critical path), or map the page in
626 advance (=> flush_tlb_range in advance to avoid multiple hits)
627 - ASID switching
628 - expensive for large ranges
629
630 2. temporarily map each page in the range to a special effective
631 address and ocbp through the temporary mapping; relies on the
632 fact that SH-5 OCB* always do TLB lookup and match on ptags (they
633 never look at the etags)
634 + no spurious evictions
635 - expensive for large ranges
636 * surely cheaper than (1)
637
638 3. walk all the lines in the cache, check the tags, if a match
639 occurs create a page mapping to ocbp the line through
640 + no spurious evictions
641 - tag inspection overhead
642 - (especially for small ranges)
643 - potential cost of setting up/tearing down page mapping for
644 every line that matches the range
645 * cost partly independent of range size
646
647 4. walk all the lines in the cache, check the tags, if a match
648 occurs use 4 * alloco to purge the line (+3 other probably
649 innocent victims) by natural eviction
650 + no tlb mapping overheads
651 - spurious evictions
652 - tag inspection overhead
653
654 5. implement like flush_cache_all
655 + no tag inspection overhead
656 - spurious evictions
657 - bad for small ranges
658
659 (1) can be ruled out as more expensive than (2). (2) appears best
660 for small ranges. The choice between (3), (4) and (5) for large
661 ranges and the range size for the large/small boundary need
662 benchmarking to determine.
663
664 For now use approach (2) for small ranges and (5) for large ones.
665
666 */
667
668 int n_pages;
669
670 n_pages = ((end - start) >> PAGE_SHIFT);
671 if (n_pages >= 64) {
672#if 1
673 sh64_dcache_purge_all();
674#else
675 unsigned long long set, way;
676 unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
677 for (set = 0; set < cpu_data->dcache.sets; set++) {
678 unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift);
679 for (way = 0; way < cpu_data->dcache.ways; way++) {
680 unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift);
681 unsigned long long tag0;
682 unsigned long line_valid;
683
684 asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr));
685 line_valid = tag0 & SH_CACHE_VALID;
686 if (line_valid) {
687 unsigned long cache_asid;
688 unsigned long epn;
689
690 cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift;
691 /* The next line needs some
692 explanation. The virtual tags
693 encode bits [31:13] of the virtual
694 address, bit [12] of the 'tag' being
695 implied by the cache set index. */
696 epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift);
697
698 if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) {
699 /* TODO : could optimise this
700 call by batching multiple
701 adjacent sets together. */
702 sh64_dcache_purge_sets(set, 1);
703 break; /* Don't waste time inspecting other ways for this set */
704 }
705 }
706 }
707 }
708#endif
709 } else {
710 /* 'Small' range */
711 unsigned long aligned_start;
712 unsigned long eaddr;
713 unsigned long last_page_start;
714
715 aligned_start = start & PAGE_MASK;
716 /* 'end' is 1 byte beyond the end of the range */
717 last_page_start = (end - 1) & PAGE_MASK;
718
719 eaddr = aligned_start;
720 while (eaddr <= last_page_start) {
721 sh64_dcache_purge_user_page(mm, eaddr);
722 eaddr += PAGE_SIZE;
723 }
724 }
725 return;
726}
727
728static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end)
729{
730 unsigned long long aligned_start;
731 unsigned long long ull_end;
732 unsigned long long addr;
733
734 ull_end = end;
735
736 /* Just wback over the range using the natural addresses. TLB miss
737 handling will be OK (TBC) : the range has just been written to by
738 the signal frame setup code, so the PTEs must exist.
739
740 Note, if we have CONFIG_PREEMPT and get preempted inside this loop,
741 it doesn't matter, even if the pid->ASID mapping changes whilst
742 we're away. In that case the cache will have been flushed when the
743 mapping was renewed. So the writebacks below will be nugatory (and
744 we'll doubtless have to fault the TLB entry/ies in again with the
745 new ASID), but it's a rare case.
746 */
747 aligned_start = start & L1_CACHE_ALIGN_MASK;
748 addr = aligned_start;
749 while (addr < ull_end) {
750 asm __volatile__ ("ocbwb %0, 0" : : "r" (addr));
751 addr += L1_CACHE_BYTES;
752 }
753}
754
755/****************************************************************************/
756
757/* These *MUST* lie in an area of virtual address space that's otherwise unused. */
758#define UNIQUE_EADDR_START 0xe0000000UL
759#define UNIQUE_EADDR_END 0xe8000000UL
760
761static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr)
762{
763 /* Given a physical address paddr, and a user virtual address
764 user_eaddr which will eventually be mapped to it, create a one-off
765 kernel-private eaddr mapped to the same paddr. This is used for
766 creating special destination pages for copy_user_page and
767 clear_user_page */
768
769 static unsigned long current_pointer = UNIQUE_EADDR_START;
770 unsigned long coloured_pointer;
771
772 if (current_pointer == UNIQUE_EADDR_END) {
773 sh64_dcache_purge_all();
774 current_pointer = UNIQUE_EADDR_START;
775 }
776
777 coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK);
778 sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
779
780 current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
781
782 return coloured_pointer;
783}
784
785/****************************************************************************/
786
787static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address)
788{
789 void *coloured_to;
790
791 /* Discard any existing cache entries of the wrong colour. These are
792 present quite often, if the kernel has recently used the page
793 internally, then given it up, then it's been allocated to the user.
794 */
795 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
796
797 coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
798 sh64_page_copy(from, coloured_to);
799
800 sh64_teardown_dtlb_cache_slot();
801}
802
803static void sh64_clear_user_page_coloured(void *to, unsigned long address)
804{
805 void *coloured_to;
806
807 /* Discard any existing kernel-originated lines of the wrong colour (as
808 above) */
809 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
810
811 coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
812 sh64_page_clear(coloured_to);
813
814 sh64_teardown_dtlb_cache_slot();
815}
816
817#endif /* !CONFIG_DCACHE_DISABLED */
818
819/****************************************************************************/
820
821/*##########################################################################
822 EXTERNALLY CALLABLE API.
823 ##########################################################################*/
824
825/* These functions are described in Documentation/cachetlb.txt.
826 Each one of these functions varies in behaviour depending on whether the
827 I-cache and/or D-cache are configured out.
828
829 Note that the Linux term 'flush' corresponds to what is termed 'purge' in
830 the sh/sh64 jargon for the D-cache, i.e. write back dirty data then
831 invalidate the cache lines, and 'invalidate' for the I-cache.
832 */
833
834#undef FLUSH_TRACE
835
836void flush_cache_all(void)
837{
838 /* Invalidate the entire contents of both caches, after writing back to
839 memory any dirty data from the D-cache. */
840 sh64_dcache_purge_all();
841 sh64_icache_inv_all();
842}
843
844/****************************************************************************/
845
846void flush_cache_mm(struct mm_struct *mm)
847{
848 /* Invalidate an entire user-address space from both caches, after
849 writing back dirty data (e.g. for shared mmap etc). */
850
851 /* This could be coded selectively by inspecting all the tags then
852 doing 4*alloco on any set containing a match (as for
853 flush_cache_range), but fork/exit/execve (where this is called from)
854 are expensive anyway. */
855
856 /* Have to do a purge here, despite the comments re I-cache below.
857 There could be odd-coloured dirty data associated with the mm still
858 in the cache - if this gets written out through natural eviction
859 after the kernel has reused the page there will be chaos.
860 */
861
862 sh64_dcache_purge_all();
863
864 /* The mm being torn down won't ever be active again, so any Icache
865 lines tagged with its ASID won't be visible for the rest of the
866 lifetime of this ASID cycle. Before the ASID gets reused, there
867 will be a flush_cache_all. Hence we don't need to touch the
868 I-cache. This is similar to the lack of action needed in
869 flush_tlb_mm - see fault.c. */
870}
871
872/****************************************************************************/
873
874void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
875 unsigned long end)
876{
877 struct mm_struct *mm = vma->vm_mm;
878
879 /* Invalidate (from both caches) the range [start,end) of virtual
880 addresses from the user address space specified by mm, after writing
881 back any dirty data.
882
883 Note(1), 'end' is 1 byte beyond the end of the range to flush.
884
885 Note(2), this is called with mm->page_table_lock held.*/
886
887 sh64_dcache_purge_user_range(mm, start, end);
888 sh64_icache_inv_user_page_range(mm, start, end);
889}
890
891/****************************************************************************/
892
893void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn)
894{
895 /* Invalidate any entries in either cache for the vma within the user
896 address space vma->vm_mm for the page starting at virtual address
897 'eaddr'. This seems to be used primarily in breaking COW. Note,
898 the I-cache must be searched too in case the page in question is
899 both writable and being executed from (e.g. stack trampolines.)
900
901 Note(1), this is called with mm->page_table_lock held.
902 */
903
904 sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
905
906 if (vma->vm_flags & VM_EXEC) {
907 sh64_icache_inv_user_page(vma, eaddr);
908 }
909}
910
911/****************************************************************************/
912
913#ifndef CONFIG_DCACHE_DISABLED
914
915void copy_user_page(void *to, void *from, unsigned long address, struct page *page)
916{
917 /* 'from' and 'to' are kernel virtual addresses (within the superpage
918 mapping of the physical RAM). 'address' is the user virtual address
919 where the copy 'to' will be mapped after. This allows a custom
920 mapping to be used to ensure that the new copy is placed in the
921 right cache sets for the user to see it without having to bounce it
922 out via memory. Note however : the call to flush_page_to_ram in
923 (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
924 very important case!
925
926 TBD : can we guarantee that on every call, any cache entries for
927 'from' are in the same colour sets as 'address' also? i.e. is this
928 always used just to deal with COW? (I suspect not). */
929
930 /* There are two possibilities here for when the page 'from' was last accessed:
931 * by the kernel : this is OK, no purge required.
932 * by the/a user (e.g. for break_COW) : need to purge.
933
934 If the potential user mapping at 'address' is the same colour as
935 'from' there is no need to purge any cache lines from the 'from'
936 page mapped into cache sets of colour 'address'. (The copy will be
937 accessing the page through 'from').
938 */
939
940 if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) {
941 sh64_dcache_purge_coloured_phy_page(__pa(from), address);
942 }
943
944 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
945 /* No synonym problem on destination */
946 sh64_page_copy(from, to);
947 } else {
948 sh64_copy_user_page_coloured(to, from, address);
949 }
950
951 /* Note, don't need to flush 'from' page from the cache again - it's
952 done anyway by the generic code */
953}
954
955void clear_user_page(void *to, unsigned long address, struct page *page)
956{
957 /* 'to' is a kernel virtual address (within the superpage
958 mapping of the physical RAM). 'address' is the user virtual address
959 where the 'to' page will be mapped after. This allows a custom
960 mapping to be used to ensure that the new copy is placed in the
961 right cache sets for the user to see it without having to bounce it
962 out via memory.
963 */
964
965 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
966 /* No synonym problem on destination */
967 sh64_page_clear(to);
968 } else {
969 sh64_clear_user_page_coloured(to, address);
970 }
971}
972
973#endif /* !CONFIG_DCACHE_DISABLED */
974
975/****************************************************************************/
976
977void flush_dcache_page(struct page *page)
978{
979 sh64_dcache_purge_phy_page(page_to_phys(page));
980 wmb();
981}
982
983/****************************************************************************/
984
985void flush_icache_range(unsigned long start, unsigned long end)
986{
987 /* Flush the range [start,end] of kernel virtual adddress space from
988 the I-cache. The corresponding range must be purged from the
989 D-cache also because the SH-5 doesn't have cache snooping between
990 the caches. The addresses will be visible through the superpage
991 mapping, therefore it's guaranteed that there no cache entries for
992 the range in cache sets of the wrong colour.
993
994 Primarily used for cohering the I-cache after a module has
995 been loaded. */
996
997 /* We also make sure to purge the same range from the D-cache since
998 flush_page_to_ram() won't be doing this for us! */
999
1000 sh64_dcache_purge_kernel_range(start, end);
1001 wmb();
1002 sh64_icache_inv_kernel_range(start, end);
1003}
1004
1005/****************************************************************************/
1006
1007void flush_icache_user_range(struct vm_area_struct *vma,
1008 struct page *page, unsigned long addr, int len)
1009{
1010 /* Flush the range of user (defined by vma->vm_mm) address space
1011 starting at 'addr' for 'len' bytes from the cache. The range does
1012 not straddle a page boundary, the unique physical page containing
1013 the range is 'page'. This seems to be used mainly for invalidating
1014 an address range following a poke into the program text through the
1015 ptrace() call from another process (e.g. for BRK instruction
1016 insertion). */
1017
1018 sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
1019 mb();
1020
1021 if (vma->vm_flags & VM_EXEC) {
1022 sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
1023 }
1024}
1025
1026/*##########################################################################
1027 ARCH/SH64 PRIVATE CALLABLE API.
1028 ##########################################################################*/
1029
1030void flush_cache_sigtramp(unsigned long start, unsigned long end)
1031{
1032 /* For the address range [start,end), write back the data from the
1033 D-cache and invalidate the corresponding region of the I-cache for
1034 the current process. Used to flush signal trampolines on the stack
1035 to make them executable. */
1036
1037 sh64_dcache_wback_current_user_range(start, end);
1038 wmb();
1039 sh64_icache_inv_current_user_range(start, end);
1040}
1041
diff --git a/arch/sh64/mm/extable.c b/arch/sh64/mm/extable.c
new file mode 100644
index 000000000000..9da50e28b3fa
--- /dev/null
+++ b/arch/sh64/mm/extable.c
@@ -0,0 +1,81 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * arch/sh64/mm/extable.c
7 *
8 * Copyright (C) 2003 Richard Curnow
9 * Copyright (C) 2003, 2004 Paul Mundt
10 *
11 * Cloned from the 2.5 SH version..
12 */
13#include <linux/config.h>
14#include <linux/rwsem.h>
15#include <linux/module.h>
16#include <asm/uaccess.h>
17
18extern unsigned long copy_user_memcpy, copy_user_memcpy_end;
19extern void __copy_user_fixup(void);
20
21static const struct exception_table_entry __copy_user_fixup_ex = {
22 .fixup = (unsigned long)&__copy_user_fixup,
23};
24
25/* Some functions that may trap due to a bad user-mode address have too many loads
26 and stores in them to make it at all practical to label each one and put them all in
27 the main exception table.
28
29 In particular, the fast memcpy routine is like this. It's fix-up is just to fall back
30 to a slow byte-at-a-time copy, which is handled the conventional way. So it's functionally
31 OK to just handle any trap occurring in the fast memcpy with that fixup. */
32static const struct exception_table_entry *check_exception_ranges(unsigned long addr)
33{
34 if ((addr >= (unsigned long)&copy_user_memcpy) &&
35 (addr <= (unsigned long)&copy_user_memcpy_end))
36 return &__copy_user_fixup_ex;
37
38 return NULL;
39}
40
41/* Simple binary search */
42const struct exception_table_entry *
43search_extable(const struct exception_table_entry *first,
44 const struct exception_table_entry *last,
45 unsigned long value)
46{
47 const struct exception_table_entry *mid;
48
49 mid = check_exception_ranges(value);
50 if (mid)
51 return mid;
52
53 while (first <= last) {
54 long diff;
55
56 mid = (last - first) / 2 + first;
57 diff = mid->insn - value;
58 if (diff == 0)
59 return mid;
60 else if (diff < 0)
61 first = mid+1;
62 else
63 last = mid-1;
64 }
65
66 return NULL;
67}
68
69int fixup_exception(struct pt_regs *regs)
70{
71 const struct exception_table_entry *fixup;
72
73 fixup = search_exception_tables(regs->pc);
74 if (fixup) {
75 regs->pc = fixup->fixup;
76 return 1;
77 }
78
79 return 0;
80}
81
diff --git a/arch/sh64/mm/fault.c b/arch/sh64/mm/fault.c
new file mode 100644
index 000000000000..a24932881dbb
--- /dev/null
+++ b/arch/sh64/mm/fault.c
@@ -0,0 +1,601 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * arch/sh64/mm/fault.c
7 *
8 * Copyright (C) 2000, 2001 Paolo Alberelli
9 * Copyright (C) 2003 Richard Curnow (/proc/tlb, bug fixes)
10 * Copyright (C) 2003 Paul Mundt
11 *
12 */
13
14#include <linux/signal.h>
15#include <linux/rwsem.h>
16#include <linux/sched.h>
17#include <linux/kernel.h>
18#include <linux/errno.h>
19#include <linux/string.h>
20#include <linux/types.h>
21#include <linux/ptrace.h>
22#include <linux/mman.h>
23#include <linux/mm.h>
24#include <linux/smp.h>
25#include <linux/smp_lock.h>
26#include <linux/interrupt.h>
27
28#include <asm/system.h>
29#include <asm/io.h>
30#include <asm/tlb.h>
31#include <asm/uaccess.h>
32#include <asm/pgalloc.h>
33#include <asm/mmu_context.h>
34#include <asm/registers.h> /* required by inline asm statements */
35
36#if defined(CONFIG_SH64_PROC_TLB)
37#include <linux/init.h>
38#include <linux/proc_fs.h>
39/* Count numbers of tlb refills in each region */
40static unsigned long long calls_to_update_mmu_cache = 0ULL;
41static unsigned long long calls_to_flush_tlb_page = 0ULL;
42static unsigned long long calls_to_flush_tlb_range = 0ULL;
43static unsigned long long calls_to_flush_tlb_mm = 0ULL;
44static unsigned long long calls_to_flush_tlb_all = 0ULL;
45unsigned long long calls_to_do_slow_page_fault = 0ULL;
46unsigned long long calls_to_do_fast_page_fault = 0ULL;
47
48/* Count size of ranges for flush_tlb_range */
49static unsigned long long flush_tlb_range_1 = 0ULL;
50static unsigned long long flush_tlb_range_2 = 0ULL;
51static unsigned long long flush_tlb_range_3_4 = 0ULL;
52static unsigned long long flush_tlb_range_5_7 = 0ULL;
53static unsigned long long flush_tlb_range_8_11 = 0ULL;
54static unsigned long long flush_tlb_range_12_15 = 0ULL;
55static unsigned long long flush_tlb_range_16_up = 0ULL;
56
57static unsigned long long page_not_present = 0ULL;
58
59#endif
60
61extern void die(const char *,struct pt_regs *,long);
62
63#define PFLAG(val,flag) (( (val) & (flag) ) ? #flag : "" )
64#define PPROT(flag) PFLAG(pgprot_val(prot),flag)
65
66static inline void print_prots(pgprot_t prot)
67{
68 printk("prot is 0x%08lx\n",pgprot_val(prot));
69
70 printk("%s %s %s %s %s\n",PPROT(_PAGE_SHARED),PPROT(_PAGE_READ),
71 PPROT(_PAGE_EXECUTE),PPROT(_PAGE_WRITE),PPROT(_PAGE_USER));
72}
73
74static inline void print_vma(struct vm_area_struct *vma)
75{
76 printk("vma start 0x%08lx\n", vma->vm_start);
77 printk("vma end 0x%08lx\n", vma->vm_end);
78
79 print_prots(vma->vm_page_prot);
80 printk("vm_flags 0x%08lx\n", vma->vm_flags);
81}
82
83static inline void print_task(struct task_struct *tsk)
84{
85 printk("Task pid %d\n", tsk->pid);
86}
87
88static pte_t *lookup_pte(struct mm_struct *mm, unsigned long address)
89{
90 pgd_t *dir;
91 pmd_t *pmd;
92 pte_t *pte;
93 pte_t entry;
94
95 dir = pgd_offset(mm, address);
96 if (pgd_none(*dir)) {
97 return NULL;
98 }
99
100 pmd = pmd_offset(dir, address);
101 if (pmd_none(*pmd)) {
102 return NULL;
103 }
104
105 pte = pte_offset_kernel(pmd, address);
106 entry = *pte;
107
108 if (pte_none(entry)) {
109 return NULL;
110 }
111 if (!pte_present(entry)) {
112 return NULL;
113 }
114
115 return pte;
116}
117
118/*
119 * This routine handles page faults. It determines the address,
120 * and the problem, and then passes it off to one of the appropriate
121 * routines.
122 */
123asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
124 unsigned long textaccess, unsigned long address)
125{
126 struct task_struct *tsk;
127 struct mm_struct *mm;
128 struct vm_area_struct * vma;
129 const struct exception_table_entry *fixup;
130 pte_t *pte;
131
132#if defined(CONFIG_SH64_PROC_TLB)
133 ++calls_to_do_slow_page_fault;
134#endif
135
136 /* SIM
137 * Note this is now called with interrupts still disabled
138 * This is to cope with being called for a missing IO port
139 * address with interupts disabled. This should be fixed as
140 * soon as we have a better 'fast path' miss handler.
141 *
142 * Plus take care how you try and debug this stuff.
143 * For example, writing debug data to a port which you
144 * have just faulted on is not going to work.
145 */
146
147 tsk = current;
148 mm = tsk->mm;
149
150 /* Not an IO address, so reenable interrupts */
151 local_irq_enable();
152
153 /*
154 * If we're in an interrupt or have no user
155 * context, we must not take the fault..
156 */
157 if (in_interrupt() || !mm)
158 goto no_context;
159
160 /* TLB misses upon some cache flushes get done under cli() */
161 down_read(&mm->mmap_sem);
162
163 vma = find_vma(mm, address);
164
165 if (!vma) {
166#ifdef DEBUG_FAULT
167 print_task(tsk);
168 printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
169 __FUNCTION__,__LINE__,
170 address,regs->pc,textaccess,writeaccess);
171 show_regs(regs);
172#endif
173 goto bad_area;
174 }
175 if (vma->vm_start <= address) {
176 goto good_area;
177 }
178
179 if (!(vma->vm_flags & VM_GROWSDOWN)) {
180#ifdef DEBUG_FAULT
181 print_task(tsk);
182 printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
183 __FUNCTION__,__LINE__,
184 address,regs->pc,textaccess,writeaccess);
185 show_regs(regs);
186
187 print_vma(vma);
188#endif
189 goto bad_area;
190 }
191 if (expand_stack(vma, address)) {
192#ifdef DEBUG_FAULT
193 print_task(tsk);
194 printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
195 __FUNCTION__,__LINE__,
196 address,regs->pc,textaccess,writeaccess);
197 show_regs(regs);
198#endif
199 goto bad_area;
200 }
201/*
202 * Ok, we have a good vm_area for this memory access, so
203 * we can handle it..
204 */
205good_area:
206 if (textaccess) {
207 if (!(vma->vm_flags & VM_EXEC))
208 goto bad_area;
209 } else {
210 if (writeaccess) {
211 if (!(vma->vm_flags & VM_WRITE))
212 goto bad_area;
213 } else {
214 if (!(vma->vm_flags & VM_READ))
215 goto bad_area;
216 }
217 }
218
219 /*
220 * If for any reason at all we couldn't handle the fault,
221 * make sure we exit gracefully rather than endlessly redo
222 * the fault.
223 */
224survive:
225 switch (handle_mm_fault(mm, vma, address, writeaccess)) {
226 case 1:
227 tsk->min_flt++;
228 break;
229 case 2:
230 tsk->maj_flt++;
231 break;
232 case 0:
233 goto do_sigbus;
234 default:
235 goto out_of_memory;
236 }
237 /* If we get here, the page fault has been handled. Do the TLB refill
238 now from the newly-setup PTE, to avoid having to fault again right
239 away on the same instruction. */
240 pte = lookup_pte (mm, address);
241 if (!pte) {
242 /* From empirical evidence, we can get here, due to
243 !pte_present(pte). (e.g. if a swap-in occurs, and the page
244 is swapped back out again before the process that wanted it
245 gets rescheduled?) */
246 goto no_pte;
247 }
248
249 __do_tlb_refill(address, textaccess, pte);
250
251no_pte:
252
253 up_read(&mm->mmap_sem);
254 return;
255
256/*
257 * Something tried to access memory that isn't in our memory map..
258 * Fix it, but check if it's kernel or user first..
259 */
260bad_area:
261#ifdef DEBUG_FAULT
262 printk("fault:bad area\n");
263#endif
264 up_read(&mm->mmap_sem);
265
266 if (user_mode(regs)) {
267 static int count=0;
268 siginfo_t info;
269 if (count < 4) {
270 /* This is really to help debug faults when starting
271 * usermode, so only need a few */
272 count++;
273 printk("user mode bad_area address=%08lx pid=%d (%s) pc=%08lx\n",
274 address, current->pid, current->comm,
275 (unsigned long) regs->pc);
276#if 0
277 show_regs(regs);
278#endif
279 }
280 if (tsk->pid == 1) {
281 panic("INIT had user mode bad_area\n");
282 }
283 tsk->thread.address = address;
284 tsk->thread.error_code = writeaccess;
285 info.si_signo = SIGSEGV;
286 info.si_errno = 0;
287 info.si_addr = (void *) address;
288 force_sig_info(SIGSEGV, &info, tsk);
289 return;
290 }
291
292no_context:
293#ifdef DEBUG_FAULT
294 printk("fault:No context\n");
295#endif
296 /* Are we prepared to handle this kernel fault? */
297 fixup = search_exception_tables(regs->pc);
298 if (fixup) {
299 regs->pc = fixup->fixup;
300 return;
301 }
302
303/*
304 * Oops. The kernel tried to access some bad page. We'll have to
305 * terminate things with extreme prejudice.
306 *
307 */
308 if (address < PAGE_SIZE)
309 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
310 else
311 printk(KERN_ALERT "Unable to handle kernel paging request");
312 printk(" at virtual address %08lx\n", address);
313 printk(KERN_ALERT "pc = %08Lx%08Lx\n", regs->pc >> 32, regs->pc & 0xffffffff);
314 die("Oops", regs, writeaccess);
315 do_exit(SIGKILL);
316
317/*
318 * We ran out of memory, or some other thing happened to us that made
319 * us unable to handle the page fault gracefully.
320 */
321out_of_memory:
322 if (current->pid == 1) {
323 panic("INIT out of memory\n");
324 yield();
325 goto survive;
326 }
327 printk("fault:Out of memory\n");
328 up_read(&mm->mmap_sem);
329 if (current->pid == 1) {
330 yield();
331 down_read(&mm->mmap_sem);
332 goto survive;
333 }
334 printk("VM: killing process %s\n", tsk->comm);
335 if (user_mode(regs))
336 do_exit(SIGKILL);
337 goto no_context;
338
339do_sigbus:
340 printk("fault:Do sigbus\n");
341 up_read(&mm->mmap_sem);
342
343 /*
344 * Send a sigbus, regardless of whether we were in kernel
345 * or user mode.
346 */
347 tsk->thread.address = address;
348 tsk->thread.error_code = writeaccess;
349 tsk->thread.trap_no = 14;
350 force_sig(SIGBUS, tsk);
351
352 /* Kernel mode? Handle exceptions or die */
353 if (!user_mode(regs))
354 goto no_context;
355}
356
357
358void flush_tlb_all(void);
359
360void update_mmu_cache(struct vm_area_struct * vma,
361 unsigned long address, pte_t pte)
362{
363#if defined(CONFIG_SH64_PROC_TLB)
364 ++calls_to_update_mmu_cache;
365#endif
366
367 /*
368 * This appears to get called once for every pte entry that gets
369 * established => I don't think it's efficient to try refilling the
370 * TLBs with the pages - some may not get accessed even. Also, for
371 * executable pages, it is impossible to determine reliably here which
372 * TLB they should be mapped into (or both even).
373 *
374 * So, just do nothing here and handle faults on demand. In the
375 * TLBMISS handling case, the refill is now done anyway after the pte
376 * has been fixed up, so that deals with most useful cases.
377 */
378}
379
380static void __flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
381{
382 unsigned long long match, pteh=0, lpage;
383 unsigned long tlb;
384 struct mm_struct *mm;
385
386 mm = vma->vm_mm;
387
388 if (mm->context == NO_CONTEXT)
389 return;
390
391 /*
392 * Sign-extend based on neff.
393 */
394 lpage = (page & NEFF_SIGN) ? (page | NEFF_MASK) : page;
395 match = ((mm->context & MMU_CONTEXT_ASID_MASK) << PTEH_ASID_SHIFT) | PTEH_VALID;
396 match |= lpage;
397
398 /* Do ITLB : don't bother for pages in non-exectutable VMAs */
399 if (vma->vm_flags & VM_EXEC) {
400 for_each_itlb_entry(tlb) {
401 asm volatile ("getcfg %1, 0, %0"
402 : "=r" (pteh)
403 : "r" (tlb) );
404
405 if (pteh == match) {
406 __flush_tlb_slot(tlb);
407 break;
408 }
409
410 }
411 }
412
413 /* Do DTLB : any page could potentially be in here. */
414 for_each_dtlb_entry(tlb) {
415 asm volatile ("getcfg %1, 0, %0"
416 : "=r" (pteh)
417 : "r" (tlb) );
418
419 if (pteh == match) {
420 __flush_tlb_slot(tlb);
421 break;
422 }
423
424 }
425}
426
427void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
428{
429 unsigned long flags;
430
431#if defined(CONFIG_SH64_PROC_TLB)
432 ++calls_to_flush_tlb_page;
433#endif
434
435 if (vma->vm_mm) {
436 page &= PAGE_MASK;
437 local_irq_save(flags);
438 __flush_tlb_page(vma, page);
439 local_irq_restore(flags);
440 }
441}
442
443void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
444 unsigned long end)
445{
446 unsigned long flags;
447 unsigned long long match, pteh=0, pteh_epn, pteh_low;
448 unsigned long tlb;
449 struct mm_struct *mm;
450
451 mm = vma->vm_mm;
452
453#if defined(CONFIG_SH64_PROC_TLB)
454 ++calls_to_flush_tlb_range;
455
456 {
457 unsigned long size = (end - 1) - start;
458 size >>= 12; /* divide by PAGE_SIZE */
459 size++; /* end=start+4096 => 1 page */
460 switch (size) {
461 case 1 : flush_tlb_range_1++; break;
462 case 2 : flush_tlb_range_2++; break;
463 case 3 ... 4 : flush_tlb_range_3_4++; break;
464 case 5 ... 7 : flush_tlb_range_5_7++; break;
465 case 8 ... 11 : flush_tlb_range_8_11++; break;
466 case 12 ... 15 : flush_tlb_range_12_15++; break;
467 default : flush_tlb_range_16_up++; break;
468 }
469 }
470#endif
471
472 if (mm->context == NO_CONTEXT)
473 return;
474
475 local_irq_save(flags);
476
477 start &= PAGE_MASK;
478 end &= PAGE_MASK;
479
480 match = ((mm->context & MMU_CONTEXT_ASID_MASK) << PTEH_ASID_SHIFT) | PTEH_VALID;
481
482 /* Flush ITLB */
483 for_each_itlb_entry(tlb) {
484 asm volatile ("getcfg %1, 0, %0"
485 : "=r" (pteh)
486 : "r" (tlb) );
487
488 pteh_epn = pteh & PAGE_MASK;
489 pteh_low = pteh & ~PAGE_MASK;
490
491 if (pteh_low == match && pteh_epn >= start && pteh_epn <= end)
492 __flush_tlb_slot(tlb);
493 }
494
495 /* Flush DTLB */
496 for_each_dtlb_entry(tlb) {
497 asm volatile ("getcfg %1, 0, %0"
498 : "=r" (pteh)
499 : "r" (tlb) );
500
501 pteh_epn = pteh & PAGE_MASK;
502 pteh_low = pteh & ~PAGE_MASK;
503
504 if (pteh_low == match && pteh_epn >= start && pteh_epn <= end)
505 __flush_tlb_slot(tlb);
506 }
507
508 local_irq_restore(flags);
509}
510
511void flush_tlb_mm(struct mm_struct *mm)
512{
513 unsigned long flags;
514
515#if defined(CONFIG_SH64_PROC_TLB)
516 ++calls_to_flush_tlb_mm;
517#endif
518
519 if (mm->context == NO_CONTEXT)
520 return;
521
522 local_irq_save(flags);
523
524 mm->context=NO_CONTEXT;
525 if(mm==current->mm)
526 activate_context(mm);
527
528 local_irq_restore(flags);
529
530}
531
532void flush_tlb_all(void)
533{
534 /* Invalidate all, including shared pages, excluding fixed TLBs */
535
536 unsigned long flags, tlb;
537
538#if defined(CONFIG_SH64_PROC_TLB)
539 ++calls_to_flush_tlb_all;
540#endif
541
542 local_irq_save(flags);
543
544 /* Flush each ITLB entry */
545 for_each_itlb_entry(tlb) {
546 __flush_tlb_slot(tlb);
547 }
548
549 /* Flush each DTLB entry */
550 for_each_dtlb_entry(tlb) {
551 __flush_tlb_slot(tlb);
552 }
553
554 local_irq_restore(flags);
555}
556
557void flush_tlb_kernel_range(unsigned long start, unsigned long end)
558{
559 /* FIXME: Optimize this later.. */
560 flush_tlb_all();
561}
562
563#if defined(CONFIG_SH64_PROC_TLB)
564/* Procfs interface to read the performance information */
565
566static int
567tlb_proc_info(char *buf, char **start, off_t fpos, int length, int *eof, void *data)
568{
569 int len=0;
570 len += sprintf(buf+len, "do_fast_page_fault called %12lld times\n", calls_to_do_fast_page_fault);
571 len += sprintf(buf+len, "do_slow_page_fault called %12lld times\n", calls_to_do_slow_page_fault);
572 len += sprintf(buf+len, "update_mmu_cache called %12lld times\n", calls_to_update_mmu_cache);
573 len += sprintf(buf+len, "flush_tlb_page called %12lld times\n", calls_to_flush_tlb_page);
574 len += sprintf(buf+len, "flush_tlb_range called %12lld times\n", calls_to_flush_tlb_range);
575 len += sprintf(buf+len, "flush_tlb_mm called %12lld times\n", calls_to_flush_tlb_mm);
576 len += sprintf(buf+len, "flush_tlb_all called %12lld times\n", calls_to_flush_tlb_all);
577 len += sprintf(buf+len, "flush_tlb_range_sizes\n"
578 " 1 : %12lld\n"
579 " 2 : %12lld\n"
580 " 3 - 4 : %12lld\n"
581 " 5 - 7 : %12lld\n"
582 " 8 - 11 : %12lld\n"
583 "12 - 15 : %12lld\n"
584 "16+ : %12lld\n",
585 flush_tlb_range_1, flush_tlb_range_2, flush_tlb_range_3_4,
586 flush_tlb_range_5_7, flush_tlb_range_8_11, flush_tlb_range_12_15,
587 flush_tlb_range_16_up);
588 len += sprintf(buf+len, "page not present %12lld times\n", page_not_present);
589 *eof = 1;
590 return len;
591}
592
593static int __init register_proc_tlb(void)
594{
595 create_proc_read_entry("tlb", 0, NULL, tlb_proc_info, NULL);
596 return 0;
597}
598
599__initcall(register_proc_tlb);
600
601#endif
diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c
new file mode 100644
index 000000000000..bcad2aefa4ee
--- /dev/null
+++ b/arch/sh64/mm/hugetlbpage.c
@@ -0,0 +1,264 @@
1/*
2 * arch/sh64/mm/hugetlbpage.c
3 *
4 * SuperH HugeTLB page support.
5 *
6 * Cloned from sparc64 by Paul Mundt.
7 *
8 * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com)
9 */
10
11#include <linux/config.h>
12#include <linux/init.h>
13#include <linux/fs.h>
14#include <linux/mm.h>
15#include <linux/hugetlb.h>
16#include <linux/pagemap.h>
17#include <linux/smp_lock.h>
18#include <linux/slab.h>
19#include <linux/sysctl.h>
20
21#include <asm/mman.h>
22#include <asm/pgalloc.h>
23#include <asm/tlb.h>
24#include <asm/tlbflush.h>
25#include <asm/cacheflush.h>
26
27static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
28{
29 pgd_t *pgd;
30 pmd_t *pmd;
31 pte_t *pte = NULL;
32
33 pgd = pgd_offset(mm, addr);
34 if (pgd) {
35 pmd = pmd_alloc(mm, pgd, addr);
36 if (pmd)
37 pte = pte_alloc_map(mm, pmd, addr);
38 }
39 return pte;
40}
41
42static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
43{
44 pgd_t *pgd;
45 pmd_t *pmd;
46 pte_t *pte = NULL;
47
48 pgd = pgd_offset(mm, addr);
49 if (pgd) {
50 pmd = pmd_offset(pgd, addr);
51 if (pmd)
52 pte = pte_offset_map(pmd, addr);
53 }
54 return pte;
55}
56
57#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
58
59static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
60 struct page *page, pte_t * page_table, int write_access)
61{
62 unsigned long i;
63 pte_t entry;
64
65 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
66
67 if (write_access)
68 entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
69 vma->vm_page_prot)));
70 else
71 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
72 entry = pte_mkyoung(entry);
73 mk_pte_huge(entry);
74
75 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
76 set_pte(page_table, entry);
77 page_table++;
78
79 pte_val(entry) += PAGE_SIZE;
80 }
81}
82
83/*
84 * This function checks for proper alignment of input addr and len parameters.
85 */
86int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
87{
88 if (len & ~HPAGE_MASK)
89 return -EINVAL;
90 if (addr & ~HPAGE_MASK)
91 return -EINVAL;
92 return 0;
93}
94
95int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
96 struct vm_area_struct *vma)
97{
98 pte_t *src_pte, *dst_pte, entry;
99 struct page *ptepage;
100 unsigned long addr = vma->vm_start;
101 unsigned long end = vma->vm_end;
102 int i;
103
104 while (addr < end) {
105 dst_pte = huge_pte_alloc(dst, addr);
106 if (!dst_pte)
107 goto nomem;
108 src_pte = huge_pte_offset(src, addr);
109 BUG_ON(!src_pte || pte_none(*src_pte));
110 entry = *src_pte;
111 ptepage = pte_page(entry);
112 get_page(ptepage);
113 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
114 set_pte(dst_pte, entry);
115 pte_val(entry) += PAGE_SIZE;
116 dst_pte++;
117 }
118 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
119 addr += HPAGE_SIZE;
120 }
121 return 0;
122
123nomem:
124 return -ENOMEM;
125}
126
127int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
128 struct page **pages, struct vm_area_struct **vmas,
129 unsigned long *position, int *length, int i)
130{
131 unsigned long vaddr = *position;
132 int remainder = *length;
133
134 WARN_ON(!is_vm_hugetlb_page(vma));
135
136 while (vaddr < vma->vm_end && remainder) {
137 if (pages) {
138 pte_t *pte;
139 struct page *page;
140
141 pte = huge_pte_offset(mm, vaddr);
142
143 /* hugetlb should be locked, and hence, prefaulted */
144 BUG_ON(!pte || pte_none(*pte));
145
146 page = pte_page(*pte);
147
148 WARN_ON(!PageCompound(page));
149
150 get_page(page);
151 pages[i] = page;
152 }
153
154 if (vmas)
155 vmas[i] = vma;
156
157 vaddr += PAGE_SIZE;
158 --remainder;
159 ++i;
160 }
161
162 *length = remainder;
163 *position = vaddr;
164
165 return i;
166}
167
168struct page *follow_huge_addr(struct mm_struct *mm,
169 unsigned long address, int write)
170{
171 return ERR_PTR(-EINVAL);
172}
173
174int pmd_huge(pmd_t pmd)
175{
176 return 0;
177}
178
179struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
180 pmd_t *pmd, int write)
181{
182 return NULL;
183}
184
185void unmap_hugepage_range(struct vm_area_struct *vma,
186 unsigned long start, unsigned long end)
187{
188 struct mm_struct *mm = vma->vm_mm;
189 unsigned long address;
190 pte_t *pte;
191 struct page *page;
192 int i;
193
194 BUG_ON(start & (HPAGE_SIZE - 1));
195 BUG_ON(end & (HPAGE_SIZE - 1));
196
197 for (address = start; address < end; address += HPAGE_SIZE) {
198 pte = huge_pte_offset(mm, address);
199 BUG_ON(!pte);
200 if (pte_none(*pte))
201 continue;
202 page = pte_page(*pte);
203 put_page(page);
204 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
205 pte_clear(mm, address+(i*PAGE_SIZE), pte);
206 pte++;
207 }
208 }
209 add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
210 flush_tlb_range(vma, start, end);
211}
212
213int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
214{
215 struct mm_struct *mm = current->mm;
216 unsigned long addr;
217 int ret = 0;
218
219 BUG_ON(vma->vm_start & ~HPAGE_MASK);
220 BUG_ON(vma->vm_end & ~HPAGE_MASK);
221
222 spin_lock(&mm->page_table_lock);
223 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
224 unsigned long idx;
225 pte_t *pte = huge_pte_alloc(mm, addr);
226 struct page *page;
227
228 if (!pte) {
229 ret = -ENOMEM;
230 goto out;
231 }
232 if (!pte_none(*pte))
233 continue;
234
235 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
236 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
237 page = find_get_page(mapping, idx);
238 if (!page) {
239 /* charge the fs quota first */
240 if (hugetlb_get_quota(mapping)) {
241 ret = -ENOMEM;
242 goto out;
243 }
244 page = alloc_huge_page();
245 if (!page) {
246 hugetlb_put_quota(mapping);
247 ret = -ENOMEM;
248 goto out;
249 }
250 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
251 if (! ret) {
252 unlock_page(page);
253 } else {
254 hugetlb_put_quota(mapping);
255 free_huge_page(page);
256 goto out;
257 }
258 }
259 set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
260 }
261out:
262 spin_unlock(&mm->page_table_lock);
263 return ret;
264}
diff --git a/arch/sh64/mm/init.c b/arch/sh64/mm/init.c
new file mode 100644
index 000000000000..a65e8bb2c3cc
--- /dev/null
+++ b/arch/sh64/mm/init.c
@@ -0,0 +1,196 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * arch/sh64/mm/init.c
7 *
8 * Copyright (C) 2000, 2001 Paolo Alberelli
9 * Copyright (C) 2003, 2004 Paul Mundt
10 *
11 */
12
13#include <linux/init.h>
14#include <linux/rwsem.h>
15#include <linux/mm.h>
16#include <linux/swap.h>
17#include <linux/bootmem.h>
18
19#include <asm/mmu_context.h>
20#include <asm/page.h>
21#include <asm/pgalloc.h>
22#include <asm/pgtable.h>
23#include <asm/tlb.h>
24
25#ifdef CONFIG_BLK_DEV_INITRD
26#include <linux/blk.h>
27#endif
28
29DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
30
31/*
32 * Cache of MMU context last used.
33 */
34unsigned long mmu_context_cache;
35pgd_t * mmu_pdtp_cache;
36int after_bootmem = 0;
37
38/*
39 * BAD_PAGE is the page that is used for page faults when linux
40 * is out-of-memory. Older versions of linux just did a
41 * do_exit(), but using this instead means there is less risk
42 * for a process dying in kernel mode, possibly leaving an inode
43 * unused etc..
44 *
45 * BAD_PAGETABLE is the accompanying page-table: it is initialized
46 * to point to BAD_PAGE entries.
47 *
48 * ZERO_PAGE is a special page that is used for zero-initialized
49 * data and COW.
50 */
51
52extern unsigned char empty_zero_page[PAGE_SIZE];
53extern unsigned char empty_bad_page[PAGE_SIZE];
54extern pte_t empty_bad_pte_table[PTRS_PER_PTE];
55extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
56
57extern char _text, _etext, _edata, __bss_start, _end;
58extern char __init_begin, __init_end;
59
60/* It'd be good if these lines were in the standard header file. */
61#define START_PFN (NODE_DATA(0)->bdata->node_boot_start >> PAGE_SHIFT)
62#define MAX_LOW_PFN (NODE_DATA(0)->bdata->node_low_pfn)
63
64
65void show_mem(void)
66{
67 int i, total = 0, reserved = 0;
68 int shared = 0, cached = 0;
69
70 printk("Mem-info:\n");
71 show_free_areas();
72 printk("Free swap: %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
73 i = max_mapnr;
74 while (i-- > 0) {
75 total++;
76 if (PageReserved(mem_map+i))
77 reserved++;
78 else if (PageSwapCache(mem_map+i))
79 cached++;
80 else if (page_count(mem_map+i))
81 shared += page_count(mem_map+i) - 1;
82 }
83 printk("%d pages of RAM\n",total);
84 printk("%d reserved pages\n",reserved);
85 printk("%d pages shared\n",shared);
86 printk("%d pages swap cached\n",cached);
87 printk("%ld pages in page table cache\n",pgtable_cache_size);
88}
89
90/*
91 * paging_init() sets up the page tables.
92 *
93 * head.S already did a lot to set up address translation for the kernel.
94 * Here we comes with:
95 * . MMU enabled
96 * . ASID set (SR)
97 * . some 512MB regions being mapped of which the most relevant here is:
98 * . CACHED segment (ASID 0 [irrelevant], shared AND NOT user)
99 * . possible variable length regions being mapped as:
100 * . UNCACHED segment (ASID 0 [irrelevant], shared AND NOT user)
101 * . All of the memory regions are placed, independently from the platform
102 * on high addresses, above 0x80000000.
103 * . swapper_pg_dir is already cleared out by the .space directive
104 * in any case swapper does not require a real page directory since
105 * it's all kernel contained.
106 *
107 * Those pesky NULL-reference errors in the kernel are then
108 * dealt with by not mapping address 0x00000000 at all.
109 *
110 */
111void __init paging_init(void)
112{
113 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
114
115 pgd_init((unsigned long)swapper_pg_dir);
116 pgd_init((unsigned long)swapper_pg_dir +
117 sizeof(pgd_t) * USER_PTRS_PER_PGD);
118
119 mmu_context_cache = MMU_CONTEXT_FIRST_VERSION;
120
121 /*
122 * All memory is good as ZONE_NORMAL (fall-through) and ZONE_DMA.
123 */
124 zones_size[ZONE_DMA] = MAX_LOW_PFN - START_PFN;
125 NODE_DATA(0)->node_mem_map = NULL;
126 free_area_init_node(0, NODE_DATA(0), zones_size, __MEMORY_START >> PAGE_SHIFT, 0);
127}
128
129void __init mem_init(void)
130{
131 int codesize, reservedpages, datasize, initsize;
132 int tmp;
133
134 max_mapnr = num_physpages = MAX_LOW_PFN - START_PFN;
135 high_memory = (void *)__va(MAX_LOW_PFN * PAGE_SIZE);
136
137 /*
138 * Clear the zero-page.
139 * This is not required but we might want to re-use
140 * this very page to pass boot parameters, one day.
141 */
142 memset(empty_zero_page, 0, PAGE_SIZE);
143
144 /* this will put all low memory onto the freelists */
145 totalram_pages += free_all_bootmem_node(NODE_DATA(0));
146 reservedpages = 0;
147 for (tmp = 0; tmp < num_physpages; tmp++)
148 /*
149 * Only count reserved RAM pages
150 */
151 if (PageReserved(mem_map+tmp))
152 reservedpages++;
153
154 after_bootmem = 1;
155
156 codesize = (unsigned long) &_etext - (unsigned long) &_text;
157 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
158 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
159
160 printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
161 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
162 max_mapnr << (PAGE_SHIFT-10),
163 codesize >> 10,
164 reservedpages << (PAGE_SHIFT-10),
165 datasize >> 10,
166 initsize >> 10);
167}
168
169void free_initmem(void)
170{
171 unsigned long addr;
172
173 addr = (unsigned long)(&__init_begin);
174 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
175 ClearPageReserved(virt_to_page(addr));
176 set_page_count(virt_to_page(addr), 1);
177 free_page(addr);
178 totalram_pages++;
179 }
180 printk ("Freeing unused kernel memory: %ldk freed\n", (&__init_end - &__init_begin) >> 10);
181}
182
183#ifdef CONFIG_BLK_DEV_INITRD
184void free_initrd_mem(unsigned long start, unsigned long end)
185{
186 unsigned long p;
187 for (p = start; p < end; p += PAGE_SIZE) {
188 ClearPageReserved(virt_to_page(p));
189 set_page_count(virt_to_page(p), 1);
190 free_page(p);
191 totalram_pages++;
192 }
193 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
194}
195#endif
196
diff --git a/arch/sh64/mm/ioremap.c b/arch/sh64/mm/ioremap.c
new file mode 100644
index 000000000000..f4003da556bc
--- /dev/null
+++ b/arch/sh64/mm/ioremap.c
@@ -0,0 +1,469 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * arch/sh64/mm/ioremap.c
7 *
8 * Copyright (C) 2000, 2001 Paolo Alberelli
9 * Copyright (C) 2003, 2004 Paul Mundt
10 *
11 * Mostly derived from arch/sh/mm/ioremap.c which, in turn is mostly
12 * derived from arch/i386/mm/ioremap.c .
13 *
14 * (C) Copyright 1995 1996 Linus Torvalds
15 */
16#include <linux/kernel.h>
17#include <linux/slab.h>
18#include <linux/vmalloc.h>
19#include <linux/sched.h>
20#include <linux/string.h>
21#include <asm/io.h>
22#include <asm/pgalloc.h>
23#include <asm/tlbflush.h>
24#include <linux/ioport.h>
25#include <linux/bootmem.h>
26#include <linux/proc_fs.h>
27
28static void shmedia_mapioaddr(unsigned long, unsigned long);
29static unsigned long shmedia_ioremap(struct resource *, u32, int);
30
31static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
32 unsigned long phys_addr, unsigned long flags)
33{
34 unsigned long end;
35 unsigned long pfn;
36 pgprot_t pgprot = __pgprot(_PAGE_PRESENT | _PAGE_READ |
37 _PAGE_WRITE | _PAGE_DIRTY |
38 _PAGE_ACCESSED | _PAGE_SHARED | flags);
39
40 address &= ~PMD_MASK;
41 end = address + size;
42 if (end > PMD_SIZE)
43 end = PMD_SIZE;
44 if (address >= end)
45 BUG();
46
47 pfn = phys_addr >> PAGE_SHIFT;
48
49 pr_debug(" %s: pte %p address %lx size %lx phys_addr %lx\n",
50 __FUNCTION__,pte,address,size,phys_addr);
51
52 do {
53 if (!pte_none(*pte)) {
54 printk("remap_area_pte: page already exists\n");
55 BUG();
56 }
57
58 set_pte(pte, pfn_pte(pfn, pgprot));
59 address += PAGE_SIZE;
60 pfn++;
61 pte++;
62 } while (address && (address < end));
63}
64
65static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
66 unsigned long phys_addr, unsigned long flags)
67{
68 unsigned long end;
69
70 address &= ~PGDIR_MASK;
71 end = address + size;
72
73 if (end > PGDIR_SIZE)
74 end = PGDIR_SIZE;
75
76 phys_addr -= address;
77
78 if (address >= end)
79 BUG();
80
81 do {
82 pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
83 if (!pte)
84 return -ENOMEM;
85 remap_area_pte(pte, address, end - address, address + phys_addr, flags);
86 address = (address + PMD_SIZE) & PMD_MASK;
87 pmd++;
88 } while (address && (address < end));
89 return 0;
90}
91
92static int remap_area_pages(unsigned long address, unsigned long phys_addr,
93 unsigned long size, unsigned long flags)
94{
95 int error;
96 pgd_t * dir;
97 unsigned long end = address + size;
98
99 phys_addr -= address;
100 dir = pgd_offset_k(address);
101 flush_cache_all();
102 if (address >= end)
103 BUG();
104 spin_lock(&init_mm.page_table_lock);
105 do {
106 pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
107 error = -ENOMEM;
108 if (!pmd)
109 break;
110 if (remap_area_pmd(pmd, address, end - address,
111 phys_addr + address, flags)) {
112 break;
113 }
114 error = 0;
115 address = (address + PGDIR_SIZE) & PGDIR_MASK;
116 dir++;
117 } while (address && (address < end));
118 spin_unlock(&init_mm.page_table_lock);
119 flush_tlb_all();
120 return 0;
121}
122
123/*
124 * Generic mapping function (not visible outside):
125 */
126
127/*
128 * Remap an arbitrary physical address space into the kernel virtual
129 * address space. Needed when the kernel wants to access high addresses
130 * directly.
131 *
132 * NOTE! We need to allow non-page-aligned mappings too: we will obviously
133 * have to convert them into an offset in a page-aligned mapping, but the
134 * caller shouldn't need to know that small detail.
135 */
136void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
137{
138 void * addr;
139 struct vm_struct * area;
140 unsigned long offset, last_addr;
141
142 /* Don't allow wraparound or zero size */
143 last_addr = phys_addr + size - 1;
144 if (!size || last_addr < phys_addr)
145 return NULL;
146
147 /*
148 * Mappings have to be page-aligned
149 */
150 offset = phys_addr & ~PAGE_MASK;
151 phys_addr &= PAGE_MASK;
152 size = PAGE_ALIGN(last_addr + 1) - phys_addr;
153
154 /*
155 * Ok, go for it..
156 */
157 area = get_vm_area(size, VM_IOREMAP);
158 pr_debug("Get vm_area returns %p addr %p\n",area,area->addr);
159 if (!area)
160 return NULL;
161 area->phys_addr = phys_addr;
162 addr = area->addr;
163 if (remap_area_pages((unsigned long)addr, phys_addr, size, flags)) {
164 vunmap(addr);
165 return NULL;
166 }
167 return (void *) (offset + (char *)addr);
168}
169
170void iounmap(void *addr)
171{
172 struct vm_struct *area;
173
174 vfree((void *) (PAGE_MASK & (unsigned long) addr));
175 area = remove_vm_area((void *) (PAGE_MASK & (unsigned long) addr));
176 if (!area) {
177 printk(KERN_ERR "iounmap: bad address %p\n", addr);
178 return;
179 }
180
181 kfree(area);
182}
183
184static struct resource shmedia_iomap = {
185 .name = "shmedia_iomap",
186 .start = IOBASE_VADDR + PAGE_SIZE,
187 .end = IOBASE_END - 1,
188};
189
190static void shmedia_mapioaddr(unsigned long pa, unsigned long va);
191static void shmedia_unmapioaddr(unsigned long vaddr);
192static unsigned long shmedia_ioremap(struct resource *res, u32 pa, int sz);
193
194/*
195 * We have the same problem as the SPARC, so lets have the same comment:
196 * Our mini-allocator...
197 * Boy this is gross! We need it because we must map I/O for
198 * timers and interrupt controller before the kmalloc is available.
199 */
200
201#define XNMLN 15
202#define XNRES 10
203
204struct xresource {
205 struct resource xres; /* Must be first */
206 int xflag; /* 1 == used */
207 char xname[XNMLN+1];
208};
209
210static struct xresource xresv[XNRES];
211
212static struct xresource *xres_alloc(void)
213{
214 struct xresource *xrp;
215 int n;
216
217 xrp = xresv;
218 for (n = 0; n < XNRES; n++) {
219 if (xrp->xflag == 0) {
220 xrp->xflag = 1;
221 return xrp;
222 }
223 xrp++;
224 }
225 return NULL;
226}
227
228static void xres_free(struct xresource *xrp)
229{
230 xrp->xflag = 0;
231}
232
233static struct resource *shmedia_find_resource(struct resource *root,
234 unsigned long vaddr)
235{
236 struct resource *res;
237
238 for (res = root->child; res; res = res->sibling)
239 if (res->start <= vaddr && res->end >= vaddr)
240 return res;
241
242 return NULL;
243}
244
245static unsigned long shmedia_alloc_io(unsigned long phys, unsigned long size,
246 const char *name)
247{
248 static int printed_full = 0;
249 struct xresource *xres;
250 struct resource *res;
251 char *tack;
252 int tlen;
253
254 if (name == NULL) name = "???";
255
256 if ((xres = xres_alloc()) != 0) {
257 tack = xres->xname;
258 res = &xres->xres;
259 } else {
260 if (!printed_full) {
261 printk("%s: done with statics, switching to kmalloc\n",
262 __FUNCTION__);
263 printed_full = 1;
264 }
265 tlen = strlen(name);
266 tack = kmalloc(sizeof (struct resource) + tlen + 1, GFP_KERNEL);
267 if (!tack)
268 return -ENOMEM;
269 memset(tack, 0, sizeof(struct resource));
270 res = (struct resource *) tack;
271 tack += sizeof (struct resource);
272 }
273
274 strncpy(tack, name, XNMLN);
275 tack[XNMLN] = 0;
276 res->name = tack;
277
278 return shmedia_ioremap(res, phys, size);
279}
280
281static unsigned long shmedia_ioremap(struct resource *res, u32 pa, int sz)
282{
283 unsigned long offset = ((unsigned long) pa) & (~PAGE_MASK);
284 unsigned long round_sz = (offset + sz + PAGE_SIZE-1) & PAGE_MASK;
285 unsigned long va;
286 unsigned int psz;
287
288 if (allocate_resource(&shmedia_iomap, res, round_sz,
289 shmedia_iomap.start, shmedia_iomap.end,
290 PAGE_SIZE, NULL, NULL) != 0) {
291 panic("alloc_io_res(%s): cannot occupy\n",
292 (res->name != NULL)? res->name: "???");
293 }
294
295 va = res->start;
296 pa &= PAGE_MASK;
297
298 psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
299
300 /* log at boot time ... */
301 printk("mapioaddr: %6s [%2d page%s] va 0x%08lx pa 0x%08x\n",
302 ((res->name != NULL) ? res->name : "???"),
303 psz, psz == 1 ? " " : "s", va, pa);
304
305 for (psz = res->end - res->start + 1; psz != 0; psz -= PAGE_SIZE) {
306 shmedia_mapioaddr(pa, va);
307 va += PAGE_SIZE;
308 pa += PAGE_SIZE;
309 }
310
311 res->start += offset;
312 res->end = res->start + sz - 1; /* not strictly necessary.. */
313
314 return res->start;
315}
316
317static void shmedia_free_io(struct resource *res)
318{
319 unsigned long len = res->end - res->start + 1;
320
321 BUG_ON((len & (PAGE_SIZE - 1)) != 0);
322
323 while (len) {
324 len -= PAGE_SIZE;
325 shmedia_unmapioaddr(res->start + len);
326 }
327
328 release_resource(res);
329}
330
331static void *sh64_get_page(void)
332{
333 extern int after_bootmem;
334 void *page;
335
336 if (after_bootmem) {
337 page = (void *)get_zeroed_page(GFP_ATOMIC);
338 } else {
339 page = alloc_bootmem_pages(PAGE_SIZE);
340 }
341
342 if (!page || ((unsigned long)page & ~PAGE_MASK))
343 panic("sh64_get_page: Out of memory already?\n");
344
345 return page;
346}
347
348static void shmedia_mapioaddr(unsigned long pa, unsigned long va)
349{
350 pgd_t *pgdp;
351 pmd_t *pmdp;
352 pte_t *ptep, pte;
353 pgprot_t prot;
354 unsigned long flags = 1; /* 1 = CB0-1 device */
355
356 pr_debug("shmedia_mapiopage pa %08lx va %08lx\n", pa, va);
357
358 pgdp = pgd_offset_k(va);
359 if (pgd_none(*pgdp) || !pgd_present(*pgdp)) {
360 pmdp = (pmd_t *)sh64_get_page();
361 set_pgd(pgdp, __pgd((unsigned long)pmdp | _KERNPG_TABLE));
362 }
363
364 pmdp = pmd_offset(pgdp, va);
365 if (pmd_none(*pmdp) || !pmd_present(*pmdp) ) {
366 ptep = (pte_t *)sh64_get_page();
367 set_pmd(pmdp, __pmd((unsigned long)ptep + _PAGE_TABLE));
368 }
369
370 prot = __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE |
371 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SHARED | flags);
372
373 pte = pfn_pte(pa >> PAGE_SHIFT, prot);
374 ptep = pte_offset_kernel(pmdp, va);
375
376 if (!pte_none(*ptep) &&
377 pte_val(*ptep) != pte_val(pte))
378 pte_ERROR(*ptep);
379
380 set_pte(ptep, pte);
381
382 flush_tlb_kernel_range(va, PAGE_SIZE);
383}
384
385static void shmedia_unmapioaddr(unsigned long vaddr)
386{
387 pgd_t *pgdp;
388 pmd_t *pmdp;
389 pte_t *ptep;
390
391 pgdp = pgd_offset_k(vaddr);
392 pmdp = pmd_offset(pgdp, vaddr);
393
394 if (pmd_none(*pmdp) || pmd_bad(*pmdp))
395 return;
396
397 ptep = pte_offset_kernel(pmdp, vaddr);
398
399 if (pte_none(*ptep) || !pte_present(*ptep))
400 return;
401
402 clear_page((void *)ptep);
403 pte_clear(&init_mm, vaddr, ptep);
404}
405
406unsigned long onchip_remap(unsigned long phys, unsigned long size, const char *name)
407{
408 if (size < PAGE_SIZE)
409 size = PAGE_SIZE;
410
411 return shmedia_alloc_io(phys, size, name);
412}
413
414void onchip_unmap(unsigned long vaddr)
415{
416 struct resource *res;
417 unsigned int psz;
418
419 res = shmedia_find_resource(&shmedia_iomap, vaddr);
420 if (!res) {
421 printk(KERN_ERR "%s: Failed to free 0x%08lx\n",
422 __FUNCTION__, vaddr);
423 return;
424 }
425
426 psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
427
428 printk(KERN_DEBUG "unmapioaddr: %6s [%2d page%s] freed\n",
429 res->name, psz, psz == 1 ? " " : "s");
430
431 shmedia_free_io(res);
432
433 if ((char *)res >= (char *)xresv &&
434 (char *)res < (char *)&xresv[XNRES]) {
435 xres_free((struct xresource *)res);
436 } else {
437 kfree(res);
438 }
439}
440
441#ifdef CONFIG_PROC_FS
442static int
443ioremap_proc_info(char *buf, char **start, off_t fpos, int length, int *eof,
444 void *data)
445{
446 char *p = buf, *e = buf + length;
447 struct resource *r;
448 const char *nm;
449
450 for (r = ((struct resource *)data)->child; r != NULL; r = r->sibling) {
451 if (p + 32 >= e) /* Better than nothing */
452 break;
453 if ((nm = r->name) == 0) nm = "???";
454 p += sprintf(p, "%08lx-%08lx: %s\n", r->start, r->end, nm);
455 }
456
457 return p-buf;
458}
459#endif /* CONFIG_PROC_FS */
460
461static int __init register_proc_onchip(void)
462{
463#ifdef CONFIG_PROC_FS
464 create_proc_read_entry("io_map",0,0, ioremap_proc_info, &shmedia_iomap);
465#endif
466 return 0;
467}
468
469__initcall(register_proc_onchip);
diff --git a/arch/sh64/mm/tlb.c b/arch/sh64/mm/tlb.c
new file mode 100644
index 000000000000..d517e7d70340
--- /dev/null
+++ b/arch/sh64/mm/tlb.c
@@ -0,0 +1,166 @@
1/*
2 * arch/sh64/mm/tlb.c
3 *
4 * Copyright (C) 2003 Paul Mundt <lethal@linux-sh.org>
5 * Copyright (C) 2003 Richard Curnow <richard.curnow@superh.com>
6 *
7 * This file is subject to the terms and conditions of the GNU General Public
8 * License. See the file "COPYING" in the main directory of this archive
9 * for more details.
10 *
11 */
12#include <linux/mm.h>
13#include <linux/init.h>
14#include <asm/page.h>
15#include <asm/tlb.h>
16#include <asm/mmu_context.h>
17
18/**
19 * sh64_tlb_init
20 *
21 * Perform initial setup for the DTLB and ITLB.
22 */
23int __init sh64_tlb_init(void)
24{
25 /* Assign some sane DTLB defaults */
26 cpu_data->dtlb.entries = 64;
27 cpu_data->dtlb.step = 0x10;
28
29 cpu_data->dtlb.first = DTLB_FIXED | cpu_data->dtlb.step;
30 cpu_data->dtlb.next = cpu_data->dtlb.first;
31
32 cpu_data->dtlb.last = DTLB_FIXED |
33 ((cpu_data->dtlb.entries - 1) *
34 cpu_data->dtlb.step);
35
36 /* And again for the ITLB */
37 cpu_data->itlb.entries = 64;
38 cpu_data->itlb.step = 0x10;
39
40 cpu_data->itlb.first = ITLB_FIXED | cpu_data->itlb.step;
41 cpu_data->itlb.next = cpu_data->itlb.first;
42 cpu_data->itlb.last = ITLB_FIXED |
43 ((cpu_data->itlb.entries - 1) *
44 cpu_data->itlb.step);
45
46 return 0;
47}
48
49/**
50 * sh64_next_free_dtlb_entry
51 *
52 * Find the next available DTLB entry
53 */
54unsigned long long sh64_next_free_dtlb_entry(void)
55{
56 return cpu_data->dtlb.next;
57}
58
59/**
60 * sh64_get_wired_dtlb_entry
61 *
62 * Allocate a wired (locked-in) entry in the DTLB
63 */
64unsigned long long sh64_get_wired_dtlb_entry(void)
65{
66 unsigned long long entry = sh64_next_free_dtlb_entry();
67
68 cpu_data->dtlb.first += cpu_data->dtlb.step;
69 cpu_data->dtlb.next += cpu_data->dtlb.step;
70
71 return entry;
72}
73
74/**
75 * sh64_put_wired_dtlb_entry
76 *
77 * @entry: Address of TLB slot.
78 *
79 * Free a wired (locked-in) entry in the DTLB.
80 *
81 * Works like a stack, last one to allocate must be first one to free.
82 */
83int sh64_put_wired_dtlb_entry(unsigned long long entry)
84{
85 __flush_tlb_slot(entry);
86
87 /*
88 * We don't do any particularly useful tracking of wired entries,
89 * so this approach works like a stack .. last one to be allocated
90 * has to be the first one to be freed.
91 *
92 * We could potentially load wired entries into a list and work on
93 * rebalancing the list periodically (which also entails moving the
94 * contents of a TLB entry) .. though I have a feeling that this is
95 * more trouble than it's worth.
96 */
97
98 /*
99 * Entry must be valid .. we don't want any ITLB addresses!
100 */
101 if (entry <= DTLB_FIXED)
102 return -EINVAL;
103
104 /*
105 * Next, check if we're within range to be freed. (ie, must be the
106 * entry beneath the first 'free' entry!
107 */
108 if (entry < (cpu_data->dtlb.first - cpu_data->dtlb.step))
109 return -EINVAL;
110
111 /* If we are, then bring this entry back into the list */
112 cpu_data->dtlb.first -= cpu_data->dtlb.step;
113 cpu_data->dtlb.next = entry;
114
115 return 0;
116}
117
118/**
119 * sh64_setup_tlb_slot
120 *
121 * @config_addr: Address of TLB slot.
122 * @eaddr: Virtual address.
123 * @asid: Address Space Identifier.
124 * @paddr: Physical address.
125 *
126 * Load up a virtual<->physical translation for @eaddr<->@paddr in the
127 * pre-allocated TLB slot @config_addr (see sh64_get_wired_dtlb_entry).
128 */
129inline void sh64_setup_tlb_slot(unsigned long long config_addr,
130 unsigned long eaddr,
131 unsigned long asid,
132 unsigned long paddr)
133{
134 unsigned long long pteh, ptel;
135
136 /* Sign extension */
137#if (NEFF == 32)
138 pteh = (unsigned long long)(signed long long)(signed long) eaddr;
139#else
140#error "Can't sign extend more than 32 bits yet"
141#endif
142 pteh &= PAGE_MASK;
143 pteh |= (asid << PTEH_ASID_SHIFT) | PTEH_VALID;
144#if (NEFF == 32)
145 ptel = (unsigned long long)(signed long long)(signed long) paddr;
146#else
147#error "Can't sign extend more than 32 bits yet"
148#endif
149 ptel &= PAGE_MASK;
150 ptel |= (_PAGE_CACHABLE | _PAGE_READ | _PAGE_WRITE);
151
152 asm volatile("putcfg %0, 1, %1\n\t"
153 "putcfg %0, 0, %2\n"
154 : : "r" (config_addr), "r" (ptel), "r" (pteh));
155}
156
157/**
158 * sh64_teardown_tlb_slot
159 *
160 * @config_addr: Address of TLB slot.
161 *
162 * Teardown any existing mapping in the TLB slot @config_addr.
163 */
164inline void sh64_teardown_tlb_slot(unsigned long long config_addr)
165 __attribute__ ((alias("__flush_tlb_slot")));
166
diff --git a/arch/sh64/mm/tlbmiss.c b/arch/sh64/mm/tlbmiss.c
new file mode 100644
index 000000000000..c8615954aaa9
--- /dev/null
+++ b/arch/sh64/mm/tlbmiss.c
@@ -0,0 +1,280 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * arch/sh64/mm/tlbmiss.c
7 *
8 * Original code from fault.c
9 * Copyright (C) 2000, 2001 Paolo Alberelli
10 *
11 * Fast PTE->TLB refill path
12 * Copyright (C) 2003 Richard.Curnow@superh.com
13 *
14 * IMPORTANT NOTES :
15 * The do_fast_page_fault function is called from a context in entry.S where very few registers
16 * have been saved. In particular, the code in this file must be compiled not to use ANY
17 * caller-save regiseters that are not part of the restricted save set. Also, it means that
18 * code in this file must not make calls to functions elsewhere in the kernel, or else the
19 * excepting context will see corruption in its caller-save registers. Plus, the entry.S save
20 * area is non-reentrant, so this code has to run with SR.BL==1, i.e. no interrupts taken inside
21 * it and panic on any exception.
22 *
23 */
24
25#include <linux/signal.h>
26#include <linux/sched.h>
27#include <linux/kernel.h>
28#include <linux/errno.h>
29#include <linux/string.h>
30#include <linux/types.h>
31#include <linux/ptrace.h>
32#include <linux/mman.h>
33#include <linux/mm.h>
34#include <linux/smp.h>
35#include <linux/smp_lock.h>
36#include <linux/interrupt.h>
37
38#include <asm/system.h>
39#include <asm/tlb.h>
40#include <asm/io.h>
41#include <asm/uaccess.h>
42#include <asm/pgalloc.h>
43#include <asm/mmu_context.h>
44#include <asm/registers.h> /* required by inline asm statements */
45
46/* Callable from fault.c, so not static */
47inline void __do_tlb_refill(unsigned long address,
48 unsigned long long is_text_not_data, pte_t *pte)
49{
50 unsigned long long ptel;
51 unsigned long long pteh=0;
52 struct tlb_info *tlbp;
53 unsigned long long next;
54
55 /* Get PTEL first */
56 ptel = pte_val(*pte);
57
58 /*
59 * Set PTEH register
60 */
61 pteh = address & MMU_VPN_MASK;
62
63 /* Sign extend based on neff. */
64#if (NEFF == 32)
65 /* Faster sign extension */
66 pteh = (unsigned long long)(signed long long)(signed long)pteh;
67#else
68 /* General case */
69 pteh = (pteh & NEFF_SIGN) ? (pteh | NEFF_MASK) : pteh;
70#endif
71
72 /* Set the ASID. */
73 pteh |= get_asid() << PTEH_ASID_SHIFT;
74 pteh |= PTEH_VALID;
75
76 /* Set PTEL register, set_pte has performed the sign extension */
77 ptel &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
78
79 tlbp = is_text_not_data ? &(cpu_data->itlb) : &(cpu_data->dtlb);
80 next = tlbp->next;
81 __flush_tlb_slot(next);
82 asm volatile ("putcfg %0,1,%2\n\n\t"
83 "putcfg %0,0,%1\n"
84 : : "r" (next), "r" (pteh), "r" (ptel) );
85
86 next += TLB_STEP;
87 if (next > tlbp->last) next = tlbp->first;
88 tlbp->next = next;
89
90}
91
92static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long protection_flags,
93 unsigned long long textaccess,
94 unsigned long address)
95{
96 pgd_t *dir;
97 pmd_t *pmd;
98 static pte_t *pte;
99 pte_t entry;
100
101 dir = pgd_offset_k(address);
102 pmd = pmd_offset(dir, address);
103
104 if (pmd_none(*pmd)) {
105 return 0;
106 }
107
108 if (pmd_bad(*pmd)) {
109 pmd_clear(pmd);
110 return 0;
111 }
112
113 pte = pte_offset_kernel(pmd, address);
114 entry = *pte;
115
116 if (pte_none(entry) || !pte_present(entry)) {
117 return 0;
118 }
119
120 if ((pte_val(entry) & protection_flags) != protection_flags) {
121 return 0;
122 }
123
124 __do_tlb_refill(address, textaccess, pte);
125
126 return 1;
127}
128
129static int handle_tlbmiss(struct mm_struct *mm, unsigned long long protection_flags,
130 unsigned long long textaccess,
131 unsigned long address)
132{
133 pgd_t *dir;
134 pmd_t *pmd;
135 pte_t *pte;
136 pte_t entry;
137
138 /* NB. The PGD currently only contains a single entry - there is no
139 page table tree stored for the top half of the address space since
140 virtual pages in that region should never be mapped in user mode.
141 (In kernel mode, the only things in that region are the 512Mb super
142 page (locked in), and vmalloc (modules) + I/O device pages (handled
143 by handle_vmalloc_fault), so no PGD for the upper half is required
144 by kernel mode either).
145
146 See how mm->pgd is allocated and initialised in pgd_alloc to see why
147 the next test is necessary. - RPC */
148 if (address >= (unsigned long) TASK_SIZE) {
149 /* upper half - never has page table entries. */
150 return 0;
151 }
152 dir = pgd_offset(mm, address);
153 if (pgd_none(*dir)) {
154 return 0;
155 }
156 if (!pgd_present(*dir)) {
157 return 0;
158 }
159
160 pmd = pmd_offset(dir, address);
161 if (pmd_none(*pmd)) {
162 return 0;
163 }
164 if (!pmd_present(*pmd)) {
165 return 0;
166 }
167 pte = pte_offset_kernel(pmd, address);
168 entry = *pte;
169 if (pte_none(entry)) {
170 return 0;
171 }
172 if (!pte_present(entry)) {
173 return 0;
174 }
175
176 /* If the page doesn't have sufficient protection bits set to service the
177 kind of fault being handled, there's not much point doing the TLB refill.
178 Punt the fault to the general handler. */
179 if ((pte_val(entry) & protection_flags) != protection_flags) {
180 return 0;
181 }
182
183 __do_tlb_refill(address, textaccess, pte);
184
185 return 1;
186}
187
188/* Put all this information into one structure so that everything is just arithmetic
189 relative to a single base address. This reduces the number of movi/shori pairs needed
190 just to load addresses of static data. */
191struct expevt_lookup {
192 unsigned short protection_flags[8];
193 unsigned char is_text_access[8];
194 unsigned char is_write_access[8];
195};
196
197#define PRU (1<<9)
198#define PRW (1<<8)
199#define PRX (1<<7)
200#define PRR (1<<6)
201
202#define DIRTY (_PAGE_DIRTY | _PAGE_ACCESSED)
203#define YOUNG (_PAGE_ACCESSED)
204
205/* Sized as 8 rather than 4 to allow checking the PTE's PRU bit against whether
206 the fault happened in user mode or privileged mode. */
207static struct expevt_lookup expevt_lookup_table = {
208 .protection_flags = {PRX, PRX, 0, 0, PRR, PRR, PRW, PRW},
209 .is_text_access = {1, 1, 0, 0, 0, 0, 0, 0}
210};
211
212/*
213 This routine handles page faults that can be serviced just by refilling a
214 TLB entry from an existing page table entry. (This case represents a very
215 large majority of page faults.) Return 1 if the fault was successfully
216 handled. Return 0 if the fault could not be handled. (This leads into the
217 general fault handling in fault.c which deals with mapping file-backed
218 pages, stack growth, segmentation faults, swapping etc etc)
219 */
220asmlinkage int do_fast_page_fault(unsigned long long ssr_md, unsigned long long expevt,
221 unsigned long address)
222{
223 struct task_struct *tsk;
224 struct mm_struct *mm;
225 unsigned long long textaccess;
226 unsigned long long protection_flags;
227 unsigned long long index;
228 unsigned long long expevt4;
229
230 /* The next few lines implement a way of hashing EXPEVT into a small array index
231 which can be used to lookup parameters specific to the type of TLBMISS being
232 handled. Note:
233 ITLBMISS has EXPEVT==0xa40
234 RTLBMISS has EXPEVT==0x040
235 WTLBMISS has EXPEVT==0x060
236 */
237
238 expevt4 = (expevt >> 4);
239 /* TODO : xor ssr_md into this expression too. Then we can check that PRU is set
240 when it needs to be. */
241 index = expevt4 ^ (expevt4 >> 5);
242 index &= 7;
243 protection_flags = expevt_lookup_table.protection_flags[index];
244 textaccess = expevt_lookup_table.is_text_access[index];
245
246#ifdef CONFIG_SH64_PROC_TLB
247 ++calls_to_do_fast_page_fault;
248#endif
249
250 /* SIM
251 * Note this is now called with interrupts still disabled
252 * This is to cope with being called for a missing IO port
253 * address with interupts disabled. This should be fixed as
254 * soon as we have a better 'fast path' miss handler.
255 *
256 * Plus take care how you try and debug this stuff.
257 * For example, writing debug data to a port which you
258 * have just faulted on is not going to work.
259 */
260
261 tsk = current;
262 mm = tsk->mm;
263
264 if ((address >= VMALLOC_START && address < VMALLOC_END) ||
265 (address >= IOBASE_VADDR && address < IOBASE_END)) {
266 if (ssr_md) {
267 /* Process-contexts can never have this address range mapped */
268 if (handle_vmalloc_fault(mm, protection_flags, textaccess, address)) {
269 return 1;
270 }
271 }
272 } else if (!in_interrupt() && mm) {
273 if (handle_tlbmiss(mm, protection_flags, textaccess, address)) {
274 return 1;
275 }
276 }
277
278 return 0;
279}
280