diff options
author | David S. Miller <davem@davemloft.net> | 2006-01-31 21:29:18 -0500 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2006-03-20 04:11:13 -0500 |
commit | 74bf4312fff083ab25c3f357cc653ada7995e5f6 (patch) | |
tree | c23dea461e32485f4cd7ca4b8c33c632655eb906 /arch/sparc64/mm | |
parent | 30d4d1ffed7098afe2641536d67eef150499da02 (diff) |
[SPARC64]: Move away from virtual page tables, part 1.
We now use the TSB hardware assist features of the UltraSPARC
MMUs.
SMP is currently knowingly broken, we need to find another place
to store the per-cpu base pointers. We hid them away in the TSB
base register, and that obviously will not work any more :-)
Another known broken case is non-8KB base page size.
Also noticed that flush_tlb_all() is not referenced anywhere, only
the internal __flush_tlb_all() (local cpu only) is used by the
sparc64 port, so we can get rid of flush_tlb_all().
The kernel gets it's own 8KB TSB (swapper_tsb) and each address space
gets it's own private 8K TSB. Later we can add code to dynamically
increase the size of per-process TSB as the RSS grows. An 8KB TSB is
good enough for up to about a 4MB RSS, after which the TSB starts to
incur many capacity and conflict misses.
We even accumulate OBP translations into the kernel TSB.
Another area for refinement is large page size support. We could use
a secondary address space TSB to handle those.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc64/mm')
-rw-r--r-- | arch/sparc64/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/sparc64/mm/init.c | 91 | ||||
-rw-r--r-- | arch/sparc64/mm/tlb.c | 61 | ||||
-rw-r--r-- | arch/sparc64/mm/tsb.c | 84 | ||||
-rw-r--r-- | arch/sparc64/mm/ultra.S | 58 |
5 files changed, 92 insertions, 204 deletions
diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile index 9d0960e69f48..e415bf942bcd 100644 --- a/arch/sparc64/mm/Makefile +++ b/arch/sparc64/mm/Makefile | |||
@@ -5,6 +5,6 @@ | |||
5 | EXTRA_AFLAGS := -ansi | 5 | EXTRA_AFLAGS := -ansi |
6 | EXTRA_CFLAGS := -Werror | 6 | EXTRA_CFLAGS := -Werror |
7 | 7 | ||
8 | obj-y := ultra.o tlb.o fault.o init.o generic.o | 8 | obj-y := ultra.o tlb.o tsb.o fault.o init.o generic.o |
9 | 9 | ||
10 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o | 10 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o |
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 1e44ee26cee8..da068f6b2595 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c | |||
@@ -408,8 +408,7 @@ unsigned long prom_virt_to_phys(unsigned long promva, int *error) | |||
408 | 408 | ||
409 | /* The obp translations are saved based on 8k pagesize, since obp can | 409 | /* The obp translations are saved based on 8k pagesize, since obp can |
410 | * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS -> | 410 | * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS -> |
411 | * HI_OBP_ADDRESS range are handled in ktlb.S and do not use the vpte | 411 | * HI_OBP_ADDRESS range are handled in ktlb.S. |
412 | * scheme (also, see rant in inherit_locked_prom_mappings()). | ||
413 | */ | 412 | */ |
414 | static inline int in_obp_range(unsigned long vaddr) | 413 | static inline int in_obp_range(unsigned long vaddr) |
415 | { | 414 | { |
@@ -539,75 +538,6 @@ static void __init inherit_prom_mappings(void) | |||
539 | prom_printf("done.\n"); | 538 | prom_printf("done.\n"); |
540 | } | 539 | } |
541 | 540 | ||
542 | /* The OBP specifications for sun4u mark 0xfffffffc00000000 and | ||
543 | * upwards as reserved for use by the firmware (I wonder if this | ||
544 | * will be the same on Cheetah...). We use this virtual address | ||
545 | * range for the VPTE table mappings of the nucleus so we need | ||
546 | * to zap them when we enter the PROM. -DaveM | ||
547 | */ | ||
548 | static void __flush_nucleus_vptes(void) | ||
549 | { | ||
550 | unsigned long prom_reserved_base = 0xfffffffc00000000UL; | ||
551 | int i; | ||
552 | |||
553 | /* Only DTLB must be checked for VPTE entries. */ | ||
554 | if (tlb_type == spitfire) { | ||
555 | for (i = 0; i < 63; i++) { | ||
556 | unsigned long tag; | ||
557 | |||
558 | /* Spitfire Errata #32 workaround */ | ||
559 | /* NOTE: Always runs on spitfire, so no cheetah+ | ||
560 | * page size encodings. | ||
561 | */ | ||
562 | __asm__ __volatile__("stxa %0, [%1] %2\n\t" | ||
563 | "flush %%g6" | ||
564 | : /* No outputs */ | ||
565 | : "r" (0), | ||
566 | "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); | ||
567 | |||
568 | tag = spitfire_get_dtlb_tag(i); | ||
569 | if (((tag & ~(PAGE_MASK)) == 0) && | ||
570 | ((tag & (PAGE_MASK)) >= prom_reserved_base)) { | ||
571 | __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" | ||
572 | "membar #Sync" | ||
573 | : /* no outputs */ | ||
574 | : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); | ||
575 | spitfire_put_dtlb_data(i, 0x0UL); | ||
576 | } | ||
577 | } | ||
578 | } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { | ||
579 | for (i = 0; i < 512; i++) { | ||
580 | unsigned long tag = cheetah_get_dtlb_tag(i, 2); | ||
581 | |||
582 | if ((tag & ~PAGE_MASK) == 0 && | ||
583 | (tag & PAGE_MASK) >= prom_reserved_base) { | ||
584 | __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" | ||
585 | "membar #Sync" | ||
586 | : /* no outputs */ | ||
587 | : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); | ||
588 | cheetah_put_dtlb_data(i, 0x0UL, 2); | ||
589 | } | ||
590 | |||
591 | if (tlb_type != cheetah_plus) | ||
592 | continue; | ||
593 | |||
594 | tag = cheetah_get_dtlb_tag(i, 3); | ||
595 | |||
596 | if ((tag & ~PAGE_MASK) == 0 && | ||
597 | (tag & PAGE_MASK) >= prom_reserved_base) { | ||
598 | __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" | ||
599 | "membar #Sync" | ||
600 | : /* no outputs */ | ||
601 | : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); | ||
602 | cheetah_put_dtlb_data(i, 0x0UL, 3); | ||
603 | } | ||
604 | } | ||
605 | } else { | ||
606 | /* Implement me :-) */ | ||
607 | BUG(); | ||
608 | } | ||
609 | } | ||
610 | |||
611 | static int prom_ditlb_set; | 541 | static int prom_ditlb_set; |
612 | struct prom_tlb_entry { | 542 | struct prom_tlb_entry { |
613 | int tlb_ent; | 543 | int tlb_ent; |
@@ -635,9 +565,6 @@ void prom_world(int enter) | |||
635 | : "i" (PSTATE_IE)); | 565 | : "i" (PSTATE_IE)); |
636 | 566 | ||
637 | if (enter) { | 567 | if (enter) { |
638 | /* Kick out nucleus VPTEs. */ | ||
639 | __flush_nucleus_vptes(); | ||
640 | |||
641 | /* Install PROM world. */ | 568 | /* Install PROM world. */ |
642 | for (i = 0; i < 16; i++) { | 569 | for (i = 0; i < 16; i++) { |
643 | if (prom_dtlb[i].tlb_ent != -1) { | 570 | if (prom_dtlb[i].tlb_ent != -1) { |
@@ -1039,18 +966,7 @@ out: | |||
1039 | struct pgtable_cache_struct pgt_quicklists; | 966 | struct pgtable_cache_struct pgt_quicklists; |
1040 | #endif | 967 | #endif |
1041 | 968 | ||
1042 | /* OK, we have to color these pages. The page tables are accessed | 969 | /* XXX We don't need to color these things in the D-cache any longer. */ |
1043 | * by non-Dcache enabled mapping in the VPTE area by the dtlb_backend.S | ||
1044 | * code, as well as by PAGE_OFFSET range direct-mapped addresses by | ||
1045 | * other parts of the kernel. By coloring, we make sure that the tlbmiss | ||
1046 | * fast handlers do not get data from old/garbage dcache lines that | ||
1047 | * correspond to an old/stale virtual address (user/kernel) that | ||
1048 | * previously mapped the pagetable page while accessing vpte range | ||
1049 | * addresses. The idea is that if the vpte color and PAGE_OFFSET range | ||
1050 | * color is the same, then when the kernel initializes the pagetable | ||
1051 | * using the later address range, accesses with the first address | ||
1052 | * range will see the newly initialized data rather than the garbage. | ||
1053 | */ | ||
1054 | #ifdef DCACHE_ALIASING_POSSIBLE | 970 | #ifdef DCACHE_ALIASING_POSSIBLE |
1055 | #define DC_ALIAS_SHIFT 1 | 971 | #define DC_ALIAS_SHIFT 1 |
1056 | #else | 972 | #else |
@@ -1419,6 +1335,9 @@ void kernel_map_pages(struct page *page, int numpages, int enable) | |||
1419 | kernel_map_range(phys_start, phys_end, | 1335 | kernel_map_range(phys_start, phys_end, |
1420 | (enable ? PAGE_KERNEL : __pgprot(0))); | 1336 | (enable ? PAGE_KERNEL : __pgprot(0))); |
1421 | 1337 | ||
1338 | flush_tsb_kernel_range(PAGE_OFFSET + phys_start, | ||
1339 | PAGE_OFFSET + phys_end); | ||
1340 | |||
1422 | /* we should perform an IPI and flush all tlbs, | 1341 | /* we should perform an IPI and flush all tlbs, |
1423 | * but that can deadlock->flush only current cpu. | 1342 | * but that can deadlock->flush only current cpu. |
1424 | */ | 1343 | */ |
diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c index 8b104be4662b..78357cc2a0b7 100644 --- a/arch/sparc64/mm/tlb.c +++ b/arch/sparc64/mm/tlb.c | |||
@@ -25,6 +25,8 @@ void flush_tlb_pending(void) | |||
25 | struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); | 25 | struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); |
26 | 26 | ||
27 | if (mp->tlb_nr) { | 27 | if (mp->tlb_nr) { |
28 | flush_tsb_user(mp); | ||
29 | |||
28 | if (CTX_VALID(mp->mm->context)) { | 30 | if (CTX_VALID(mp->mm->context)) { |
29 | #ifdef CONFIG_SMP | 31 | #ifdef CONFIG_SMP |
30 | smp_flush_tlb_pending(mp->mm, mp->tlb_nr, | 32 | smp_flush_tlb_pending(mp->mm, mp->tlb_nr, |
@@ -89,62 +91,3 @@ no_cache_flush: | |||
89 | if (nr >= TLB_BATCH_NR) | 91 | if (nr >= TLB_BATCH_NR) |
90 | flush_tlb_pending(); | 92 | flush_tlb_pending(); |
91 | } | 93 | } |
92 | |||
93 | void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end) | ||
94 | { | ||
95 | struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); | ||
96 | unsigned long nr = mp->tlb_nr; | ||
97 | long s = start, e = end, vpte_base; | ||
98 | |||
99 | if (mp->fullmm) | ||
100 | return; | ||
101 | |||
102 | /* If start is greater than end, that is a real problem. */ | ||
103 | BUG_ON(start > end); | ||
104 | |||
105 | /* However, straddling the VA space hole is quite normal. */ | ||
106 | s &= PMD_MASK; | ||
107 | e = (e + PMD_SIZE - 1) & PMD_MASK; | ||
108 | |||
109 | vpte_base = (tlb_type == spitfire ? | ||
110 | VPTE_BASE_SPITFIRE : | ||
111 | VPTE_BASE_CHEETAH); | ||
112 | |||
113 | if (unlikely(nr != 0 && mm != mp->mm)) { | ||
114 | flush_tlb_pending(); | ||
115 | nr = 0; | ||
116 | } | ||
117 | |||
118 | if (nr == 0) | ||
119 | mp->mm = mm; | ||
120 | |||
121 | start = vpte_base + (s >> (PAGE_SHIFT - 3)); | ||
122 | end = vpte_base + (e >> (PAGE_SHIFT - 3)); | ||
123 | |||
124 | /* If the request straddles the VA space hole, we | ||
125 | * need to swap start and end. The reason this | ||
126 | * occurs is that "vpte_base" is the center of | ||
127 | * the linear page table mapping area. Thus, | ||
128 | * high addresses with the sign bit set map to | ||
129 | * addresses below vpte_base and non-sign bit | ||
130 | * addresses map to addresses above vpte_base. | ||
131 | */ | ||
132 | if (end < start) { | ||
133 | unsigned long tmp = start; | ||
134 | |||
135 | start = end; | ||
136 | end = tmp; | ||
137 | } | ||
138 | |||
139 | while (start < end) { | ||
140 | mp->vaddrs[nr] = start; | ||
141 | mp->tlb_nr = ++nr; | ||
142 | if (nr >= TLB_BATCH_NR) { | ||
143 | flush_tlb_pending(); | ||
144 | nr = 0; | ||
145 | } | ||
146 | start += PAGE_SIZE; | ||
147 | } | ||
148 | if (nr) | ||
149 | flush_tlb_pending(); | ||
150 | } | ||
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c new file mode 100644 index 000000000000..15e8af58b1d2 --- /dev/null +++ b/arch/sparc64/mm/tsb.c | |||
@@ -0,0 +1,84 @@ | |||
1 | /* arch/sparc64/mm/tsb.c | ||
2 | * | ||
3 | * Copyright (C) 2006 David S. Miller <davem@davemloft.net> | ||
4 | */ | ||
5 | |||
6 | #include <linux/kernel.h> | ||
7 | #include <asm/system.h> | ||
8 | #include <asm/page.h> | ||
9 | #include <asm/tlbflush.h> | ||
10 | #include <asm/tlb.h> | ||
11 | |||
12 | #define TSB_ENTRY_ALIGNMENT 16 | ||
13 | |||
14 | struct tsb { | ||
15 | unsigned long tag; | ||
16 | unsigned long pte; | ||
17 | } __attribute__((aligned(TSB_ENTRY_ALIGNMENT))); | ||
18 | |||
19 | /* We use an 8K TSB for the whole kernel, this allows to | ||
20 | * handle about 4MB of modules and vmalloc mappings without | ||
21 | * incurring many hash conflicts. | ||
22 | */ | ||
23 | #define KERNEL_TSB_SIZE_BYTES 8192 | ||
24 | #define KERNEL_TSB_NENTRIES \ | ||
25 | (KERNEL_TSB_SIZE_BYTES / sizeof(struct tsb)) | ||
26 | |||
27 | extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; | ||
28 | |||
29 | static inline unsigned long tsb_hash(unsigned long vaddr) | ||
30 | { | ||
31 | vaddr >>= PAGE_SHIFT; | ||
32 | return vaddr & (KERNEL_TSB_NENTRIES - 1); | ||
33 | } | ||
34 | |||
35 | static inline int tag_compare(struct tsb *entry, unsigned long vaddr, unsigned long context) | ||
36 | { | ||
37 | if (context == ~0UL) | ||
38 | return 1; | ||
39 | |||
40 | return (entry->tag == ((vaddr >> 22) | (context << 48))); | ||
41 | } | ||
42 | |||
43 | /* TSB flushes need only occur on the processor initiating the address | ||
44 | * space modification, not on each cpu the address space has run on. | ||
45 | * Only the TLB flush needs that treatment. | ||
46 | */ | ||
47 | |||
48 | void flush_tsb_kernel_range(unsigned long start, unsigned long end) | ||
49 | { | ||
50 | unsigned long v; | ||
51 | |||
52 | for (v = start; v < end; v += PAGE_SIZE) { | ||
53 | struct tsb *ent = &swapper_tsb[tsb_hash(v)]; | ||
54 | |||
55 | if (tag_compare(ent, v, 0)) { | ||
56 | ent->tag = 0UL; | ||
57 | membar_storeload_storestore(); | ||
58 | } | ||
59 | } | ||
60 | } | ||
61 | |||
62 | void flush_tsb_user(struct mmu_gather *mp) | ||
63 | { | ||
64 | struct mm_struct *mm = mp->mm; | ||
65 | struct tsb *tsb = (struct tsb *) mm->context.sparc64_tsb; | ||
66 | unsigned long ctx = ~0UL; | ||
67 | int i; | ||
68 | |||
69 | if (CTX_VALID(mm->context)) | ||
70 | ctx = CTX_HWBITS(mm->context); | ||
71 | |||
72 | for (i = 0; i < mp->tlb_nr; i++) { | ||
73 | unsigned long v = mp->vaddrs[i]; | ||
74 | struct tsb *ent; | ||
75 | |||
76 | v &= ~0x1UL; | ||
77 | |||
78 | ent = &tsb[tsb_hash(v)]; | ||
79 | if (tag_compare(ent, v, ctx)) { | ||
80 | ent->tag = 0UL; | ||
81 | membar_storeload_storestore(); | ||
82 | } | ||
83 | } | ||
84 | } | ||
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index e4c9151fa116..22791f29552e 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S | |||
@@ -453,64 +453,6 @@ xcall_flush_dcache_page_spitfire: /* %g1 == physical page address | |||
453 | nop | 453 | nop |
454 | nop | 454 | nop |
455 | 455 | ||
456 | .data | ||
457 | |||
458 | errata32_hwbug: | ||
459 | .xword 0 | ||
460 | |||
461 | .text | ||
462 | |||
463 | /* These two are not performance critical... */ | ||
464 | .globl xcall_flush_tlb_all_spitfire | ||
465 | xcall_flush_tlb_all_spitfire: | ||
466 | /* Spitfire Errata #32 workaround. */ | ||
467 | sethi %hi(errata32_hwbug), %g4 | ||
468 | stx %g0, [%g4 + %lo(errata32_hwbug)] | ||
469 | |||
470 | clr %g2 | ||
471 | clr %g3 | ||
472 | 1: ldxa [%g3] ASI_DTLB_DATA_ACCESS, %g4 | ||
473 | and %g4, _PAGE_L, %g5 | ||
474 | brnz,pn %g5, 2f | ||
475 | mov TLB_TAG_ACCESS, %g7 | ||
476 | |||
477 | stxa %g0, [%g7] ASI_DMMU | ||
478 | membar #Sync | ||
479 | stxa %g0, [%g3] ASI_DTLB_DATA_ACCESS | ||
480 | membar #Sync | ||
481 | |||
482 | /* Spitfire Errata #32 workaround. */ | ||
483 | sethi %hi(errata32_hwbug), %g4 | ||
484 | stx %g0, [%g4 + %lo(errata32_hwbug)] | ||
485 | |||
486 | 2: ldxa [%g3] ASI_ITLB_DATA_ACCESS, %g4 | ||
487 | and %g4, _PAGE_L, %g5 | ||
488 | brnz,pn %g5, 2f | ||
489 | mov TLB_TAG_ACCESS, %g7 | ||
490 | |||
491 | stxa %g0, [%g7] ASI_IMMU | ||
492 | membar #Sync | ||
493 | stxa %g0, [%g3] ASI_ITLB_DATA_ACCESS | ||
494 | membar #Sync | ||
495 | |||
496 | /* Spitfire Errata #32 workaround. */ | ||
497 | sethi %hi(errata32_hwbug), %g4 | ||
498 | stx %g0, [%g4 + %lo(errata32_hwbug)] | ||
499 | |||
500 | 2: add %g2, 1, %g2 | ||
501 | cmp %g2, SPITFIRE_HIGHEST_LOCKED_TLBENT | ||
502 | ble,pt %icc, 1b | ||
503 | sll %g2, 3, %g3 | ||
504 | flush %g6 | ||
505 | retry | ||
506 | |||
507 | .globl xcall_flush_tlb_all_cheetah | ||
508 | xcall_flush_tlb_all_cheetah: | ||
509 | mov 0x80, %g2 | ||
510 | stxa %g0, [%g2] ASI_DMMU_DEMAP | ||
511 | stxa %g0, [%g2] ASI_IMMU_DEMAP | ||
512 | retry | ||
513 | |||
514 | /* These just get rescheduled to PIL vectors. */ | 456 | /* These just get rescheduled to PIL vectors. */ |
515 | .globl xcall_call_function | 457 | .globl xcall_call_function |
516 | xcall_call_function: | 458 | xcall_call_function: |