diff options
author | Stuart Menefy <stuart.menefy@st.com> | 2006-11-23 21:42:24 -0500 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2006-12-05 20:45:38 -0500 |
commit | 9b3a53ab76771e3669e50086c131e1574fe25847 (patch) | |
tree | 07dab1cd3972c7b82ddd5b7ad1e28628d7756dbb /arch/sh | |
parent | 9daa0c257d6c200b58092e0bfc32b77c4618a8af (diff) |
sh: TLB miss fast-path optimizations.
Handle simple TLB miss faults which can be resolved completely
from the page table in assembler.
Signed-off-by: Stuart Menefy <stuart.menefy@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/sh')
-rw-r--r-- | arch/sh/Kconfig | 3 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh3/entry.S | 206 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4/probe.c | 19 | ||||
-rw-r--r-- | arch/sh/mm/Kconfig | 1 | ||||
-rw-r--r-- | arch/sh/mm/fault.c | 86 |
5 files changed, 201 insertions, 114 deletions
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index a03f155571c8..48308dc86e33 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig | |||
@@ -379,6 +379,9 @@ config CPU_HAS_SR_RB | |||
379 | See <file:Documentation/sh/register-banks.txt> for further | 379 | See <file:Documentation/sh/register-banks.txt> for further |
380 | information on SR.RB and register banking in the kernel in general. | 380 | information on SR.RB and register banking in the kernel in general. |
381 | 381 | ||
382 | config CPU_HAS_PTEA | ||
383 | bool | ||
384 | |||
382 | endmenu | 385 | endmenu |
383 | 386 | ||
384 | menu "Timer support" | 387 | menu "Timer support" |
diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S index 869d56fb7d63..5de99b498738 100644 --- a/arch/sh/kernel/cpu/sh3/entry.S +++ b/arch/sh/kernel/cpu/sh3/entry.S | |||
@@ -13,8 +13,10 @@ | |||
13 | #include <linux/linkage.h> | 13 | #include <linux/linkage.h> |
14 | #include <asm/asm-offsets.h> | 14 | #include <asm/asm-offsets.h> |
15 | #include <asm/thread_info.h> | 15 | #include <asm/thread_info.h> |
16 | #include <asm/cpu/mmu_context.h> | ||
17 | #include <asm/unistd.h> | 16 | #include <asm/unistd.h> |
17 | #include <asm/cpu/mmu_context.h> | ||
18 | #include <asm/pgtable.h> | ||
19 | #include <asm/page.h> | ||
18 | 20 | ||
19 | ! NOTE: | 21 | ! NOTE: |
20 | ! GNU as (as of 2.9.1) changes bf/s into bt/s and bra, when the address | 22 | ! GNU as (as of 2.9.1) changes bf/s into bt/s and bra, when the address |
@@ -136,29 +138,14 @@ ENTRY(tlb_protection_violation_store) | |||
136 | 138 | ||
137 | call_dpf: | 139 | call_dpf: |
138 | mov.l 1f, r0 | 140 | mov.l 1f, r0 |
139 | mov r5, r8 | 141 | mov.l @r0, r6 ! address |
140 | mov.l @r0, r6 | ||
141 | mov r6, r9 | ||
142 | mov.l 2f, r0 | ||
143 | sts pr, r10 | ||
144 | jsr @r0 | ||
145 | mov r15, r4 | ||
146 | ! | ||
147 | tst r0, r0 | ||
148 | bf/s 0f | ||
149 | lds r10, pr | ||
150 | rts | ||
151 | nop | ||
152 | 0: sti | ||
153 | mov.l 3f, r0 | 142 | mov.l 3f, r0 |
154 | mov r9, r6 | 143 | sti |
155 | mov r8, r5 | ||
156 | jmp @r0 | 144 | jmp @r0 |
157 | mov r15, r4 | 145 | mov r15, r4 ! regs |
158 | 146 | ||
159 | .align 2 | 147 | .align 2 |
160 | 1: .long MMU_TEA | 148 | 1: .long MMU_TEA |
161 | 2: .long __do_page_fault | ||
162 | 3: .long do_page_fault | 149 | 3: .long do_page_fault |
163 | 150 | ||
164 | .align 2 | 151 | .align 2 |
@@ -344,9 +331,176 @@ general_exception: | |||
344 | 2: .long ret_from_exception | 331 | 2: .long ret_from_exception |
345 | ! | 332 | ! |
346 | ! | 333 | ! |
334 | |||
335 | /* This code makes some assumptions to improve performance. | ||
336 | * Make sure they are stil true. */ | ||
337 | #if PTRS_PER_PGD != PTRS_PER_PTE | ||
338 | #error PDG and PTE sizes don't match | ||
339 | #endif | ||
340 | |||
341 | /* gas doesn't flag impossible values for mov #immediate as an error */ | ||
342 | #if (_PAGE_PRESENT >> 2) > 0x7f | ||
343 | #error cannot load PAGE_PRESENT as an immediate | ||
344 | #endif | ||
345 | #if _PAGE_DIRTY > 0x7f | ||
346 | #error cannot load PAGE_DIRTY as an immediate | ||
347 | #endif | ||
348 | #if (_PAGE_PRESENT << 2) != _PAGE_ACCESSED | ||
349 | #error cannot derive PAGE_ACCESSED from PAGE_PRESENT | ||
350 | #endif | ||
351 | |||
352 | #if defined(CONFIG_CPU_SH4) | ||
353 | #define ldmmupteh(r) mov.l 8f, r | ||
354 | #else | ||
355 | #define ldmmupteh(r) mov #MMU_PTEH, r | ||
356 | #endif | ||
357 | |||
347 | .balign 1024,0,1024 | 358 | .balign 1024,0,1024 |
348 | tlb_miss: | 359 | tlb_miss: |
349 | mov.l 1f, k2 | 360 | #ifdef COUNT_EXCEPTIONS |
361 | ! Increment the counts | ||
362 | mov.l 9f, k1 | ||
363 | mov.l @k1, k2 | ||
364 | add #1, k2 | ||
365 | mov.l k2, @k1 | ||
366 | #endif | ||
367 | |||
368 | ! k0 scratch | ||
369 | ! k1 pgd and pte pointers | ||
370 | ! k2 faulting address | ||
371 | ! k3 pgd and pte index masks | ||
372 | ! k4 shift | ||
373 | |||
374 | ! Load up the pgd entry (k1) | ||
375 | |||
376 | ldmmupteh(k0) ! 9 LS (latency=2) MMU_PTEH | ||
377 | |||
378 | mov.w 4f, k3 ! 8 LS (latency=2) (PTRS_PER_PGD-1) << 2 | ||
379 | mov #-(PGDIR_SHIFT-2), k4 ! 6 EX | ||
380 | |||
381 | mov.l @(MMU_TEA-MMU_PTEH,k0), k2 ! 18 LS (latency=2) | ||
382 | |||
383 | mov.l @(MMU_TTB-MMU_PTEH,k0), k1 ! 18 LS (latency=2) | ||
384 | |||
385 | mov k2, k0 ! 5 MT (latency=0) | ||
386 | shld k4, k0 ! 99 EX | ||
387 | |||
388 | and k3, k0 ! 78 EX | ||
389 | |||
390 | mov.l @(k0, k1), k1 ! 21 LS (latency=2) | ||
391 | mov #-(PAGE_SHIFT-2), k4 ! 6 EX | ||
392 | |||
393 | ! Load up the pte entry (k2) | ||
394 | |||
395 | mov k2, k0 ! 5 MT (latency=0) | ||
396 | shld k4, k0 ! 99 EX | ||
397 | |||
398 | tst k1, k1 ! 86 MT | ||
399 | |||
400 | bt 20f ! 110 BR | ||
401 | |||
402 | and k3, k0 ! 78 EX | ||
403 | mov.w 5f, k4 ! 8 LS (latency=2) _PAGE_PRESENT | ||
404 | |||
405 | mov.l @(k0, k1), k2 ! 21 LS (latency=2) | ||
406 | add k0, k1 ! 49 EX | ||
407 | |||
408 | #ifdef CONFIG_CPU_HAS_PTEA | ||
409 | ! Test the entry for present and _PAGE_ACCESSED | ||
410 | |||
411 | mov #-28, k3 ! 6 EX | ||
412 | mov k2, k0 ! 5 MT (latency=0) | ||
413 | |||
414 | tst k4, k2 ! 68 MT | ||
415 | shld k3, k0 ! 99 EX | ||
416 | |||
417 | bt 20f ! 110 BR | ||
418 | |||
419 | ! Set PTEA register | ||
420 | ! MMU_PTEA = ((pteval >> 28) & 0xe) | (pteval & 0x1) | ||
421 | ! | ||
422 | ! k0=pte>>28, k1=pte*, k2=pte, k3=<unused>, k4=_PAGE_PRESENT | ||
423 | |||
424 | and #0xe, k0 ! 79 EX | ||
425 | |||
426 | mov k0, k3 ! 5 MT (latency=0) | ||
427 | mov k2, k0 ! 5 MT (latency=0) | ||
428 | |||
429 | and #1, k0 ! 79 EX | ||
430 | |||
431 | or k0, k3 ! 82 EX | ||
432 | |||
433 | ldmmupteh(k0) ! 9 LS (latency=2) | ||
434 | shll2 k4 ! 101 EX _PAGE_ACCESSED | ||
435 | |||
436 | tst k4, k2 ! 68 MT | ||
437 | |||
438 | mov.l k3, @(MMU_PTEA-MMU_PTEH,k0) ! 27 LS | ||
439 | |||
440 | mov.l 7f, k3 ! 9 LS (latency=2) _PAGE_FLAGS_HARDWARE_MASK | ||
441 | |||
442 | ! k0=MMU_PTEH, k1=pte*, k2=pte, k3=_PAGE_FLAGS_HARDWARE, k4=_PAGE_ACCESSED | ||
443 | #else | ||
444 | |||
445 | ! Test the entry for present and _PAGE_ACCESSED | ||
446 | |||
447 | mov.l 7f, k3 ! 9 LS (latency=2) _PAGE_FLAGS_HARDWARE_MASK | ||
448 | tst k4, k2 ! 68 MT | ||
449 | |||
450 | shll2 k4 ! 101 EX _PAGE_ACCESSED | ||
451 | ldmmupteh(k0) ! 9 LS (latency=2) | ||
452 | |||
453 | bt 20f ! 110 BR | ||
454 | tst k4, k2 ! 68 MT | ||
455 | |||
456 | ! k0=MMU_PTEH, k1=pte*, k2=pte, k3=_PAGE_FLAGS_HARDWARE, k4=_PAGE_ACCESSED | ||
457 | |||
458 | #endif | ||
459 | |||
460 | ! Set up the entry | ||
461 | |||
462 | and k2, k3 ! 78 EX | ||
463 | bt/s 10f ! 108 BR | ||
464 | |||
465 | mov.l k3, @(MMU_PTEL-MMU_PTEH,k0) ! 27 LS | ||
466 | |||
467 | ldtlb ! 128 CO | ||
468 | |||
469 | ! At least one instruction between ldtlb and rte | ||
470 | nop ! 119 NOP | ||
471 | |||
472 | rte ! 126 CO | ||
473 | |||
474 | nop ! 119 NOP | ||
475 | |||
476 | |||
477 | 10: or k4, k2 ! 82 EX | ||
478 | |||
479 | ldtlb ! 128 CO | ||
480 | |||
481 | ! At least one instruction between ldtlb and rte | ||
482 | mov.l k2, @k1 ! 27 LS | ||
483 | |||
484 | rte ! 126 CO | ||
485 | |||
486 | ! Note we cannot execute mov here, because it is executed after | ||
487 | ! restoring SSR, so would be executed in user space. | ||
488 | nop ! 119 NOP | ||
489 | |||
490 | |||
491 | .align 5 | ||
492 | ! Once cache line if possible... | ||
493 | 1: .long swapper_pg_dir | ||
494 | 4: .short (PTRS_PER_PGD-1) << 2 | ||
495 | 5: .short _PAGE_PRESENT | ||
496 | 7: .long _PAGE_FLAGS_HARDWARE_MASK | ||
497 | 8: .long MMU_PTEH | ||
498 | #ifdef COUNT_EXCEPTIONS | ||
499 | 9: .long exception_count_miss | ||
500 | #endif | ||
501 | |||
502 | ! Either pgd or pte not present | ||
503 | 20: mov.l 1f, k2 | ||
350 | mov.l 4f, k3 | 504 | mov.l 4f, k3 |
351 | bra handle_exception | 505 | bra handle_exception |
352 | mov.l @k2, k2 | 506 | mov.l @k2, k2 |
@@ -496,6 +650,15 @@ skip_save: | |||
496 | bf interrupt_exception | 650 | bf interrupt_exception |
497 | shlr2 r8 | 651 | shlr2 r8 |
498 | shlr r8 | 652 | shlr r8 |
653 | |||
654 | #ifdef COUNT_EXCEPTIONS | ||
655 | mov.l 5f, r9 | ||
656 | add r8, r9 | ||
657 | mov.l @r9, r10 | ||
658 | add #1, r10 | ||
659 | mov.l r10, @r9 | ||
660 | #endif | ||
661 | |||
499 | mov.l 4f, r9 | 662 | mov.l 4f, r9 |
500 | add r8, r9 | 663 | add r8, r9 |
501 | mov.l @r9, r9 | 664 | mov.l @r9, r9 |
@@ -509,6 +672,9 @@ skip_save: | |||
509 | 2: .long 0x000080f0 ! FD=1, IMASK=15 | 672 | 2: .long 0x000080f0 ! FD=1, IMASK=15 |
510 | 3: .long 0xcfffffff ! RB=0, BL=0 | 673 | 3: .long 0xcfffffff ! RB=0, BL=0 |
511 | 4: .long exception_handling_table | 674 | 4: .long exception_handling_table |
675 | #ifdef COUNT_EXCEPTIONS | ||
676 | 5: .long exception_count_table | ||
677 | #endif | ||
512 | 678 | ||
513 | interrupt_exception: | 679 | interrupt_exception: |
514 | mov.l 1f, r9 | 680 | mov.l 1f, r9 |
diff --git a/arch/sh/kernel/cpu/sh4/probe.c b/arch/sh/kernel/cpu/sh4/probe.c index c294de1e14a3..afe0f1b1c030 100644 --- a/arch/sh/kernel/cpu/sh4/probe.c +++ b/arch/sh/kernel/cpu/sh4/probe.c | |||
@@ -79,16 +79,16 @@ int __init detect_cpu_and_cache_system(void) | |||
79 | case 0x205: | 79 | case 0x205: |
80 | cpu_data->type = CPU_SH7750; | 80 | cpu_data->type = CPU_SH7750; |
81 | cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU | | 81 | cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU | |
82 | CPU_HAS_PERF_COUNTER | CPU_HAS_PTEA; | 82 | CPU_HAS_PERF_COUNTER; |
83 | break; | 83 | break; |
84 | case 0x206: | 84 | case 0x206: |
85 | cpu_data->type = CPU_SH7750S; | 85 | cpu_data->type = CPU_SH7750S; |
86 | cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU | | 86 | cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU | |
87 | CPU_HAS_PERF_COUNTER | CPU_HAS_PTEA; | 87 | CPU_HAS_PERF_COUNTER; |
88 | break; | 88 | break; |
89 | case 0x1100: | 89 | case 0x1100: |
90 | cpu_data->type = CPU_SH7751; | 90 | cpu_data->type = CPU_SH7751; |
91 | cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA; | 91 | cpu_data->flags |= CPU_HAS_FPU; |
92 | break; | 92 | break; |
93 | case 0x2000: | 93 | case 0x2000: |
94 | cpu_data->type = CPU_SH73180; | 94 | cpu_data->type = CPU_SH73180; |
@@ -126,23 +126,22 @@ int __init detect_cpu_and_cache_system(void) | |||
126 | break; | 126 | break; |
127 | case 0x8000: | 127 | case 0x8000: |
128 | cpu_data->type = CPU_ST40RA; | 128 | cpu_data->type = CPU_ST40RA; |
129 | cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA; | 129 | cpu_data->flags |= CPU_HAS_FPU; |
130 | break; | 130 | break; |
131 | case 0x8100: | 131 | case 0x8100: |
132 | cpu_data->type = CPU_ST40GX1; | 132 | cpu_data->type = CPU_ST40GX1; |
133 | cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA; | 133 | cpu_data->flags |= CPU_HAS_FPU; |
134 | break; | 134 | break; |
135 | case 0x700: | 135 | case 0x700: |
136 | cpu_data->type = CPU_SH4_501; | 136 | cpu_data->type = CPU_SH4_501; |
137 | cpu_data->icache.ways = 2; | 137 | cpu_data->icache.ways = 2; |
138 | cpu_data->dcache.ways = 2; | 138 | cpu_data->dcache.ways = 2; |
139 | cpu_data->flags |= CPU_HAS_PTEA; | ||
140 | break; | 139 | break; |
141 | case 0x600: | 140 | case 0x600: |
142 | cpu_data->type = CPU_SH4_202; | 141 | cpu_data->type = CPU_SH4_202; |
143 | cpu_data->icache.ways = 2; | 142 | cpu_data->icache.ways = 2; |
144 | cpu_data->dcache.ways = 2; | 143 | cpu_data->dcache.ways = 2; |
145 | cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA; | 144 | cpu_data->flags |= CPU_HAS_FPU; |
146 | break; | 145 | break; |
147 | case 0x500 ... 0x501: | 146 | case 0x500 ... 0x501: |
148 | switch (prr) { | 147 | switch (prr) { |
@@ -160,7 +159,7 @@ int __init detect_cpu_and_cache_system(void) | |||
160 | cpu_data->icache.ways = 2; | 159 | cpu_data->icache.ways = 2; |
161 | cpu_data->dcache.ways = 2; | 160 | cpu_data->dcache.ways = 2; |
162 | 161 | ||
163 | cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA; | 162 | cpu_data->flags |= CPU_HAS_FPU; |
164 | 163 | ||
165 | break; | 164 | break; |
166 | default: | 165 | default: |
@@ -173,6 +172,10 @@ int __init detect_cpu_and_cache_system(void) | |||
173 | cpu_data->dcache.ways = 1; | 172 | cpu_data->dcache.ways = 1; |
174 | #endif | 173 | #endif |
175 | 174 | ||
175 | #ifdef CONFIG_CPU_HAS_PTEA | ||
176 | cpu_data->flags |= CPU_HAS_PTEA; | ||
177 | #endif | ||
178 | |||
176 | /* | 179 | /* |
177 | * On anything that's not a direct-mapped cache, look to the CVR | 180 | * On anything that's not a direct-mapped cache, look to the CVR |
178 | * for I/D-cache specifics. | 181 | * for I/D-cache specifics. |
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig index 88e9663fc9fc..6cd6d0045d16 100644 --- a/arch/sh/mm/Kconfig +++ b/arch/sh/mm/Kconfig | |||
@@ -20,6 +20,7 @@ config CPU_SH4 | |||
20 | bool | 20 | bool |
21 | select CPU_HAS_INTEVT | 21 | select CPU_HAS_INTEVT |
22 | select CPU_HAS_SR_RB | 22 | select CPU_HAS_SR_RB |
23 | select CPU_HAS_PTEA if !CPU_SUBTYPE_ST40 | ||
23 | 24 | ||
24 | config CPU_SH4A | 25 | config CPU_SH4A |
25 | bool | 26 | bool |
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 128907ef7fcd..123fb80c859d 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c | |||
@@ -223,89 +223,3 @@ do_sigbus: | |||
223 | if (!user_mode(regs)) | 223 | if (!user_mode(regs)) |
224 | goto no_context; | 224 | goto no_context; |
225 | } | 225 | } |
226 | |||
227 | #ifdef CONFIG_SH_STORE_QUEUES | ||
228 | /* | ||
229 | * This is a special case for the SH-4 store queues, as pages for this | ||
230 | * space still need to be faulted in before it's possible to flush the | ||
231 | * store queue cache for writeout to the remapped region. | ||
232 | */ | ||
233 | #define P3_ADDR_MAX (P4SEG_STORE_QUE + 0x04000000) | ||
234 | #else | ||
235 | #define P3_ADDR_MAX P4SEG | ||
236 | #endif | ||
237 | |||
238 | /* | ||
239 | * Called with interrupts disabled. | ||
240 | */ | ||
241 | asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs, | ||
242 | unsigned long writeaccess, | ||
243 | unsigned long address) | ||
244 | { | ||
245 | pgd_t *pgd; | ||
246 | pud_t *pud; | ||
247 | pmd_t *pmd; | ||
248 | pte_t *pte; | ||
249 | pte_t entry; | ||
250 | struct mm_struct *mm = current->mm; | ||
251 | spinlock_t *ptl; | ||
252 | int ret = 1; | ||
253 | |||
254 | #ifdef CONFIG_SH_KGDB | ||
255 | if (kgdb_nofault && kgdb_bus_err_hook) | ||
256 | kgdb_bus_err_hook(); | ||
257 | #endif | ||
258 | |||
259 | /* | ||
260 | * We don't take page faults for P1, P2, and parts of P4, these | ||
261 | * are always mapped, whether it be due to legacy behaviour in | ||
262 | * 29-bit mode, or due to PMB configuration in 32-bit mode. | ||
263 | */ | ||
264 | if (address >= P3SEG && address < P3_ADDR_MAX) { | ||
265 | pgd = pgd_offset_k(address); | ||
266 | mm = NULL; | ||
267 | } else { | ||
268 | if (unlikely(address >= TASK_SIZE || !mm)) | ||
269 | return 1; | ||
270 | |||
271 | pgd = pgd_offset(mm, address); | ||
272 | } | ||
273 | |||
274 | pud = pud_offset(pgd, address); | ||
275 | if (pud_none_or_clear_bad(pud)) | ||
276 | return 1; | ||
277 | pmd = pmd_offset(pud, address); | ||
278 | if (pmd_none_or_clear_bad(pmd)) | ||
279 | return 1; | ||
280 | |||
281 | if (mm) | ||
282 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); | ||
283 | else | ||
284 | pte = pte_offset_kernel(pmd, address); | ||
285 | |||
286 | entry = *pte; | ||
287 | if (unlikely(pte_none(entry) || pte_not_present(entry))) | ||
288 | goto unlock; | ||
289 | if (unlikely(writeaccess && !pte_write(entry))) | ||
290 | goto unlock; | ||
291 | |||
292 | if (writeaccess) | ||
293 | entry = pte_mkdirty(entry); | ||
294 | entry = pte_mkyoung(entry); | ||
295 | |||
296 | #ifdef CONFIG_CPU_SH4 | ||
297 | /* | ||
298 | * ITLB is not affected by "ldtlb" instruction. | ||
299 | * So, we need to flush the entry by ourselves. | ||
300 | */ | ||
301 | __flush_tlb_page(get_asid(), address & PAGE_MASK); | ||
302 | #endif | ||
303 | |||
304 | set_pte(pte, entry); | ||
305 | update_mmu_cache(NULL, address, entry); | ||
306 | ret = 0; | ||
307 | unlock: | ||
308 | if (mm) | ||
309 | pte_unmap_unlock(pte, ptl); | ||
310 | return ret; | ||
311 | } | ||