aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStuart Menefy <stuart.menefy@st.com>2006-11-23 21:42:24 -0500
committerPaul Mundt <lethal@linux-sh.org>2006-12-05 20:45:38 -0500
commit9b3a53ab76771e3669e50086c131e1574fe25847 (patch)
tree07dab1cd3972c7b82ddd5b7ad1e28628d7756dbb
parent9daa0c257d6c200b58092e0bfc32b77c4618a8af (diff)
sh: TLB miss fast-path optimizations.
Handle simple TLB miss faults which can be resolved completely from the page table in assembler. Signed-off-by: Stuart Menefy <stuart.menefy@st.com> Signed-off-by: Paul Mundt <lethal@linux-sh.org>
-rw-r--r--arch/sh/Kconfig3
-rw-r--r--arch/sh/kernel/cpu/sh3/entry.S206
-rw-r--r--arch/sh/kernel/cpu/sh4/probe.c19
-rw-r--r--arch/sh/mm/Kconfig1
-rw-r--r--arch/sh/mm/fault.c86
-rw-r--r--include/asm-sh/pgtable.h6
6 files changed, 204 insertions, 117 deletions
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index a03f155571c8..48308dc86e33 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -379,6 +379,9 @@ config CPU_HAS_SR_RB
379 See <file:Documentation/sh/register-banks.txt> for further 379 See <file:Documentation/sh/register-banks.txt> for further
380 information on SR.RB and register banking in the kernel in general. 380 information on SR.RB and register banking in the kernel in general.
381 381
382config CPU_HAS_PTEA
383 bool
384
382endmenu 385endmenu
383 386
384menu "Timer support" 387menu "Timer support"
diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S
index 869d56fb7d63..5de99b498738 100644
--- a/arch/sh/kernel/cpu/sh3/entry.S
+++ b/arch/sh/kernel/cpu/sh3/entry.S
@@ -13,8 +13,10 @@
13#include <linux/linkage.h> 13#include <linux/linkage.h>
14#include <asm/asm-offsets.h> 14#include <asm/asm-offsets.h>
15#include <asm/thread_info.h> 15#include <asm/thread_info.h>
16#include <asm/cpu/mmu_context.h>
17#include <asm/unistd.h> 16#include <asm/unistd.h>
17#include <asm/cpu/mmu_context.h>
18#include <asm/pgtable.h>
19#include <asm/page.h>
18 20
19! NOTE: 21! NOTE:
20! GNU as (as of 2.9.1) changes bf/s into bt/s and bra, when the address 22! GNU as (as of 2.9.1) changes bf/s into bt/s and bra, when the address
@@ -136,29 +138,14 @@ ENTRY(tlb_protection_violation_store)
136 138
137call_dpf: 139call_dpf:
138 mov.l 1f, r0 140 mov.l 1f, r0
139 mov r5, r8 141 mov.l @r0, r6 ! address
140 mov.l @r0, r6
141 mov r6, r9
142 mov.l 2f, r0
143 sts pr, r10
144 jsr @r0
145 mov r15, r4
146 !
147 tst r0, r0
148 bf/s 0f
149 lds r10, pr
150 rts
151 nop
1520: sti
153 mov.l 3f, r0 142 mov.l 3f, r0
154 mov r9, r6 143 sti
155 mov r8, r5
156 jmp @r0 144 jmp @r0
157 mov r15, r4 145 mov r15, r4 ! regs
158 146
159 .align 2 147 .align 2
1601: .long MMU_TEA 1481: .long MMU_TEA
1612: .long __do_page_fault
1623: .long do_page_fault 1493: .long do_page_fault
163 150
164 .align 2 151 .align 2
@@ -344,9 +331,176 @@ general_exception:
3442: .long ret_from_exception 3312: .long ret_from_exception
345! 332!
346! 333!
334
335/* This code makes some assumptions to improve performance.
336 * Make sure they are stil true. */
337#if PTRS_PER_PGD != PTRS_PER_PTE
338#error PDG and PTE sizes don't match
339#endif
340
341/* gas doesn't flag impossible values for mov #immediate as an error */
342#if (_PAGE_PRESENT >> 2) > 0x7f
343#error cannot load PAGE_PRESENT as an immediate
344#endif
345#if _PAGE_DIRTY > 0x7f
346#error cannot load PAGE_DIRTY as an immediate
347#endif
348#if (_PAGE_PRESENT << 2) != _PAGE_ACCESSED
349#error cannot derive PAGE_ACCESSED from PAGE_PRESENT
350#endif
351
352#if defined(CONFIG_CPU_SH4)
353#define ldmmupteh(r) mov.l 8f, r
354#else
355#define ldmmupteh(r) mov #MMU_PTEH, r
356#endif
357
347 .balign 1024,0,1024 358 .balign 1024,0,1024
348tlb_miss: 359tlb_miss:
349 mov.l 1f, k2 360#ifdef COUNT_EXCEPTIONS
361 ! Increment the counts
362 mov.l 9f, k1
363 mov.l @k1, k2
364 add #1, k2
365 mov.l k2, @k1
366#endif
367
368 ! k0 scratch
369 ! k1 pgd and pte pointers
370 ! k2 faulting address
371 ! k3 pgd and pte index masks
372 ! k4 shift
373
374 ! Load up the pgd entry (k1)
375
376 ldmmupteh(k0) ! 9 LS (latency=2) MMU_PTEH
377
378 mov.w 4f, k3 ! 8 LS (latency=2) (PTRS_PER_PGD-1) << 2
379 mov #-(PGDIR_SHIFT-2), k4 ! 6 EX
380
381 mov.l @(MMU_TEA-MMU_PTEH,k0), k2 ! 18 LS (latency=2)
382
383 mov.l @(MMU_TTB-MMU_PTEH,k0), k1 ! 18 LS (latency=2)
384
385 mov k2, k0 ! 5 MT (latency=0)
386 shld k4, k0 ! 99 EX
387
388 and k3, k0 ! 78 EX
389
390 mov.l @(k0, k1), k1 ! 21 LS (latency=2)
391 mov #-(PAGE_SHIFT-2), k4 ! 6 EX
392
393 ! Load up the pte entry (k2)
394
395 mov k2, k0 ! 5 MT (latency=0)
396 shld k4, k0 ! 99 EX
397
398 tst k1, k1 ! 86 MT
399
400 bt 20f ! 110 BR
401
402 and k3, k0 ! 78 EX
403 mov.w 5f, k4 ! 8 LS (latency=2) _PAGE_PRESENT
404
405 mov.l @(k0, k1), k2 ! 21 LS (latency=2)
406 add k0, k1 ! 49 EX
407
408#ifdef CONFIG_CPU_HAS_PTEA
409 ! Test the entry for present and _PAGE_ACCESSED
410
411 mov #-28, k3 ! 6 EX
412 mov k2, k0 ! 5 MT (latency=0)
413
414 tst k4, k2 ! 68 MT
415 shld k3, k0 ! 99 EX
416
417 bt 20f ! 110 BR
418
419 ! Set PTEA register
420 ! MMU_PTEA = ((pteval >> 28) & 0xe) | (pteval & 0x1)
421 !
422 ! k0=pte>>28, k1=pte*, k2=pte, k3=<unused>, k4=_PAGE_PRESENT
423
424 and #0xe, k0 ! 79 EX
425
426 mov k0, k3 ! 5 MT (latency=0)
427 mov k2, k0 ! 5 MT (latency=0)
428
429 and #1, k0 ! 79 EX
430
431 or k0, k3 ! 82 EX
432
433 ldmmupteh(k0) ! 9 LS (latency=2)
434 shll2 k4 ! 101 EX _PAGE_ACCESSED
435
436 tst k4, k2 ! 68 MT
437
438 mov.l k3, @(MMU_PTEA-MMU_PTEH,k0) ! 27 LS
439
440 mov.l 7f, k3 ! 9 LS (latency=2) _PAGE_FLAGS_HARDWARE_MASK
441
442 ! k0=MMU_PTEH, k1=pte*, k2=pte, k3=_PAGE_FLAGS_HARDWARE, k4=_PAGE_ACCESSED
443#else
444
445 ! Test the entry for present and _PAGE_ACCESSED
446
447 mov.l 7f, k3 ! 9 LS (latency=2) _PAGE_FLAGS_HARDWARE_MASK
448 tst k4, k2 ! 68 MT
449
450 shll2 k4 ! 101 EX _PAGE_ACCESSED
451 ldmmupteh(k0) ! 9 LS (latency=2)
452
453 bt 20f ! 110 BR
454 tst k4, k2 ! 68 MT
455
456 ! k0=MMU_PTEH, k1=pte*, k2=pte, k3=_PAGE_FLAGS_HARDWARE, k4=_PAGE_ACCESSED
457
458#endif
459
460 ! Set up the entry
461
462 and k2, k3 ! 78 EX
463 bt/s 10f ! 108 BR
464
465 mov.l k3, @(MMU_PTEL-MMU_PTEH,k0) ! 27 LS
466
467 ldtlb ! 128 CO
468
469 ! At least one instruction between ldtlb and rte
470 nop ! 119 NOP
471
472 rte ! 126 CO
473
474 nop ! 119 NOP
475
476
47710: or k4, k2 ! 82 EX
478
479 ldtlb ! 128 CO
480
481 ! At least one instruction between ldtlb and rte
482 mov.l k2, @k1 ! 27 LS
483
484 rte ! 126 CO
485
486 ! Note we cannot execute mov here, because it is executed after
487 ! restoring SSR, so would be executed in user space.
488 nop ! 119 NOP
489
490
491 .align 5
492 ! Once cache line if possible...
4931: .long swapper_pg_dir
4944: .short (PTRS_PER_PGD-1) << 2
4955: .short _PAGE_PRESENT
4967: .long _PAGE_FLAGS_HARDWARE_MASK
4978: .long MMU_PTEH
498#ifdef COUNT_EXCEPTIONS
4999: .long exception_count_miss
500#endif
501
502 ! Either pgd or pte not present
50320: mov.l 1f, k2
350 mov.l 4f, k3 504 mov.l 4f, k3
351 bra handle_exception 505 bra handle_exception
352 mov.l @k2, k2 506 mov.l @k2, k2
@@ -496,6 +650,15 @@ skip_save:
496 bf interrupt_exception 650 bf interrupt_exception
497 shlr2 r8 651 shlr2 r8
498 shlr r8 652 shlr r8
653
654#ifdef COUNT_EXCEPTIONS
655 mov.l 5f, r9
656 add r8, r9
657 mov.l @r9, r10
658 add #1, r10
659 mov.l r10, @r9
660#endif
661
499 mov.l 4f, r9 662 mov.l 4f, r9
500 add r8, r9 663 add r8, r9
501 mov.l @r9, r9 664 mov.l @r9, r9
@@ -509,6 +672,9 @@ skip_save:
5092: .long 0x000080f0 ! FD=1, IMASK=15 6722: .long 0x000080f0 ! FD=1, IMASK=15
5103: .long 0xcfffffff ! RB=0, BL=0 6733: .long 0xcfffffff ! RB=0, BL=0
5114: .long exception_handling_table 6744: .long exception_handling_table
675#ifdef COUNT_EXCEPTIONS
6765: .long exception_count_table
677#endif
512 678
513interrupt_exception: 679interrupt_exception:
514 mov.l 1f, r9 680 mov.l 1f, r9
diff --git a/arch/sh/kernel/cpu/sh4/probe.c b/arch/sh/kernel/cpu/sh4/probe.c
index c294de1e14a3..afe0f1b1c030 100644
--- a/arch/sh/kernel/cpu/sh4/probe.c
+++ b/arch/sh/kernel/cpu/sh4/probe.c
@@ -79,16 +79,16 @@ int __init detect_cpu_and_cache_system(void)
79 case 0x205: 79 case 0x205:
80 cpu_data->type = CPU_SH7750; 80 cpu_data->type = CPU_SH7750;
81 cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU | 81 cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU |
82 CPU_HAS_PERF_COUNTER | CPU_HAS_PTEA; 82 CPU_HAS_PERF_COUNTER;
83 break; 83 break;
84 case 0x206: 84 case 0x206:
85 cpu_data->type = CPU_SH7750S; 85 cpu_data->type = CPU_SH7750S;
86 cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU | 86 cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU |
87 CPU_HAS_PERF_COUNTER | CPU_HAS_PTEA; 87 CPU_HAS_PERF_COUNTER;
88 break; 88 break;
89 case 0x1100: 89 case 0x1100:
90 cpu_data->type = CPU_SH7751; 90 cpu_data->type = CPU_SH7751;
91 cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA; 91 cpu_data->flags |= CPU_HAS_FPU;
92 break; 92 break;
93 case 0x2000: 93 case 0x2000:
94 cpu_data->type = CPU_SH73180; 94 cpu_data->type = CPU_SH73180;
@@ -126,23 +126,22 @@ int __init detect_cpu_and_cache_system(void)
126 break; 126 break;
127 case 0x8000: 127 case 0x8000:
128 cpu_data->type = CPU_ST40RA; 128 cpu_data->type = CPU_ST40RA;
129 cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA; 129 cpu_data->flags |= CPU_HAS_FPU;
130 break; 130 break;
131 case 0x8100: 131 case 0x8100:
132 cpu_data->type = CPU_ST40GX1; 132 cpu_data->type = CPU_ST40GX1;
133 cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA; 133 cpu_data->flags |= CPU_HAS_FPU;
134 break; 134 break;
135 case 0x700: 135 case 0x700:
136 cpu_data->type = CPU_SH4_501; 136 cpu_data->type = CPU_SH4_501;
137 cpu_data->icache.ways = 2; 137 cpu_data->icache.ways = 2;
138 cpu_data->dcache.ways = 2; 138 cpu_data->dcache.ways = 2;
139 cpu_data->flags |= CPU_HAS_PTEA;
140 break; 139 break;
141 case 0x600: 140 case 0x600:
142 cpu_data->type = CPU_SH4_202; 141 cpu_data->type = CPU_SH4_202;
143 cpu_data->icache.ways = 2; 142 cpu_data->icache.ways = 2;
144 cpu_data->dcache.ways = 2; 143 cpu_data->dcache.ways = 2;
145 cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA; 144 cpu_data->flags |= CPU_HAS_FPU;
146 break; 145 break;
147 case 0x500 ... 0x501: 146 case 0x500 ... 0x501:
148 switch (prr) { 147 switch (prr) {
@@ -160,7 +159,7 @@ int __init detect_cpu_and_cache_system(void)
160 cpu_data->icache.ways = 2; 159 cpu_data->icache.ways = 2;
161 cpu_data->dcache.ways = 2; 160 cpu_data->dcache.ways = 2;
162 161
163 cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA; 162 cpu_data->flags |= CPU_HAS_FPU;
164 163
165 break; 164 break;
166 default: 165 default:
@@ -173,6 +172,10 @@ int __init detect_cpu_and_cache_system(void)
173 cpu_data->dcache.ways = 1; 172 cpu_data->dcache.ways = 1;
174#endif 173#endif
175 174
175#ifdef CONFIG_CPU_HAS_PTEA
176 cpu_data->flags |= CPU_HAS_PTEA;
177#endif
178
176 /* 179 /*
177 * On anything that's not a direct-mapped cache, look to the CVR 180 * On anything that's not a direct-mapped cache, look to the CVR
178 * for I/D-cache specifics. 181 * for I/D-cache specifics.
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index 88e9663fc9fc..6cd6d0045d16 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -20,6 +20,7 @@ config CPU_SH4
20 bool 20 bool
21 select CPU_HAS_INTEVT 21 select CPU_HAS_INTEVT
22 select CPU_HAS_SR_RB 22 select CPU_HAS_SR_RB
23 select CPU_HAS_PTEA if !CPU_SUBTYPE_ST40
23 24
24config CPU_SH4A 25config CPU_SH4A
25 bool 26 bool
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 128907ef7fcd..123fb80c859d 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -223,89 +223,3 @@ do_sigbus:
223 if (!user_mode(regs)) 223 if (!user_mode(regs))
224 goto no_context; 224 goto no_context;
225} 225}
226
227#ifdef CONFIG_SH_STORE_QUEUES
228/*
229 * This is a special case for the SH-4 store queues, as pages for this
230 * space still need to be faulted in before it's possible to flush the
231 * store queue cache for writeout to the remapped region.
232 */
233#define P3_ADDR_MAX (P4SEG_STORE_QUE + 0x04000000)
234#else
235#define P3_ADDR_MAX P4SEG
236#endif
237
238/*
239 * Called with interrupts disabled.
240 */
241asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
242 unsigned long writeaccess,
243 unsigned long address)
244{
245 pgd_t *pgd;
246 pud_t *pud;
247 pmd_t *pmd;
248 pte_t *pte;
249 pte_t entry;
250 struct mm_struct *mm = current->mm;
251 spinlock_t *ptl;
252 int ret = 1;
253
254#ifdef CONFIG_SH_KGDB
255 if (kgdb_nofault && kgdb_bus_err_hook)
256 kgdb_bus_err_hook();
257#endif
258
259 /*
260 * We don't take page faults for P1, P2, and parts of P4, these
261 * are always mapped, whether it be due to legacy behaviour in
262 * 29-bit mode, or due to PMB configuration in 32-bit mode.
263 */
264 if (address >= P3SEG && address < P3_ADDR_MAX) {
265 pgd = pgd_offset_k(address);
266 mm = NULL;
267 } else {
268 if (unlikely(address >= TASK_SIZE || !mm))
269 return 1;
270
271 pgd = pgd_offset(mm, address);
272 }
273
274 pud = pud_offset(pgd, address);
275 if (pud_none_or_clear_bad(pud))
276 return 1;
277 pmd = pmd_offset(pud, address);
278 if (pmd_none_or_clear_bad(pmd))
279 return 1;
280
281 if (mm)
282 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
283 else
284 pte = pte_offset_kernel(pmd, address);
285
286 entry = *pte;
287 if (unlikely(pte_none(entry) || pte_not_present(entry)))
288 goto unlock;
289 if (unlikely(writeaccess && !pte_write(entry)))
290 goto unlock;
291
292 if (writeaccess)
293 entry = pte_mkdirty(entry);
294 entry = pte_mkyoung(entry);
295
296#ifdef CONFIG_CPU_SH4
297 /*
298 * ITLB is not affected by "ldtlb" instruction.
299 * So, we need to flush the entry by ourselves.
300 */
301 __flush_tlb_page(get_asid(), address & PAGE_MASK);
302#endif
303
304 set_pte(pte, entry);
305 update_mmu_cache(NULL, address, entry);
306 ret = 0;
307unlock:
308 if (mm)
309 pte_unmap_unlock(pte, ptl);
310 return ret;
311}
diff --git a/include/asm-sh/pgtable.h b/include/asm-sh/pgtable.h
index b1f21e765640..fa625245051d 100644
--- a/include/asm-sh/pgtable.h
+++ b/include/asm-sh/pgtable.h
@@ -43,12 +43,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
43/* PGD bits */ 43/* PGD bits */
44#define PGDIR_SHIFT (PTE_SHIFT + PTE_BITS) 44#define PGDIR_SHIFT (PTE_SHIFT + PTE_BITS)
45#define PGDIR_BITS (32 - PGDIR_SHIFT) 45#define PGDIR_BITS (32 - PGDIR_SHIFT)
46#define PGDIR_SIZE (1UL << PGDIR_SHIFT) 46#define PGDIR_SIZE (1 << PGDIR_SHIFT)
47#define PGDIR_MASK (~(PGDIR_SIZE-1)) 47#define PGDIR_MASK (~(PGDIR_SIZE-1))
48 48
49/* Entries per level */ 49/* Entries per level */
50#define PTRS_PER_PTE (1UL << PTE_BITS) 50#define PTRS_PER_PTE (1 << PTE_BITS)
51#define PTRS_PER_PGD (1UL << PGDIR_BITS) 51#define PTRS_PER_PGD (1 << PGDIR_BITS)
52 52
53#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) 53#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
54#define FIRST_USER_ADDRESS 0 54#define FIRST_USER_ADDRESS 0