[SPARC64]: Move away from virtual page tables, part 1.

We now use the TSB hardware assist features of the UltraSPARC MMUs. SMP is currently knowingly broken, we need to find another place to store the per-cpu base pointers. We hid them away in the TSB base register, and that obviously will not work any more :-) Another known broken case is non-8KB base page size. Also noticed that flush_tlb_all() is not referenced anywhere, only the internal __flush_tlb_all() (local cpu only) is used by the sparc64 port, so we can get rid of flush_tlb_all(). The kernel gets it's own 8KB TSB (swapper_tsb) and each address space gets it's own private 8K TSB. Later we can add code to dynamically increase the size of per-process TSB as the RSS grows. An 8KB TSB is good enough for up to about a 4MB RSS, after which the TSB starts to incur many capacity and conflict misses. We even accumulate OBP translations into the kernel TSB. Another area for refinement is large page size support. We could use a secondary address space TSB to handle those. Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2006-01-31 21:29:18 -0500
committer: David S. Miller <davem@sunset.davemloft.net> 2006-03-20 04:11:13 -0500
commit: 74bf4312fff083ab25c3f357cc653ada7995e5f6 (patch)
tree: c23dea461e32485f4cd7ca4b8c33c632655eb906 /arch/sparc64/mm
parent: 30d4d1ffed7098afe2641536d67eef150499da02 (diff)
5 files changed, 92 insertions, 204 deletions
diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile
index 9d0960e69f48..e415bf942bcd 100644
--- a/arch/sparc64/mm/Makefile
+++ b/arch/sparc64/mm/Makefile
@@ -5,6 +5,6 @@
 EXTRA_AFLAGS := -ansi
 EXTRA_CFLAGS := -Werror
-obj-y    := ultra.o tlb.o fault.o init.o generic.o
+obj-y    := ultra.o tlb.o tsb.o fault.o init.o generic.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 1e44ee26cee8..da068f6b2595 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -408,8 +408,7 @@ unsigned long prom_virt_to_phys(unsigned long promva, int *error)
 /* The obp translations are saved based on 8k pagesize, since obp can
 * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS ->
- * HI_OBP_ADDRESS range are handled in ktlb.S and do not use the vpte
+ * HI_OBP_ADDRESS range are handled in ktlb.S.
- * scheme (also, see rant in inherit_locked_prom_mappings()).
 */
 static inline int in_obp_range(unsigned long vaddr)
 {
@@ -539,75 +538,6 @@ static void __init inherit_prom_mappings(void)
        prom_printf("done.\n");
 }
-/* The OBP specifications for sun4u mark 0xfffffffc00000000 and
- * upwards as reserved for use by the firmware (I wonder if this
- * will be the same on Cheetah...).  We use this virtual address
- * range for the VPTE table mappings of the nucleus so we need
- * to zap them when we enter the PROM.  -DaveM
- */
-static void __flush_nucleus_vptes(void)
-{
-        unsigned long prom_reserved_base = 0xfffffffc00000000UL;
-        int i;
-        /* Only DTLB must be checked for VPTE entries. */
-        if (tlb_type == spitfire) {
-                for (i = 0; i < 63; i++) {
-                        unsigned long tag;
-                        /* Spitfire Errata #32 workaround */
-                        /* NOTE: Always runs on spitfire, so no cheetah+
-                         *       page size encodings.
-                         */
-                        __asm__ __volatile__("stxa      %0, [%1] %2\n\t"
-                                             "flush     %%g6"
-                                             : /* No outputs */
-                                             : "r" (0),
-                                             "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
-                        tag = spitfire_get_dtlb_tag(i);
-                        if (((tag & ~(PAGE_MASK)) == 0) &&
-                            ((tag &  (PAGE_MASK)) >= prom_reserved_base)) {
-                                __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
-                                                     "membar #Sync"
-                                                     : /* no outputs */
-                                                     : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
-                                spitfire_put_dtlb_data(i, 0x0UL);
-                        }
-                }
-        } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
-                for (i = 0; i < 512; i++) {
-                        unsigned long tag = cheetah_get_dtlb_tag(i, 2);
-                        if ((tag & ~PAGE_MASK) == 0 &&
-                            (tag & PAGE_MASK) >= prom_reserved_base) {
-                                __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
-                                                     "membar #Sync"
-                                                     : /* no outputs */
-                                                     : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
-                                cheetah_put_dtlb_data(i, 0x0UL, 2);
-                        }
-                        if (tlb_type != cheetah_plus)
-                                continue;
-                        tag = cheetah_get_dtlb_tag(i, 3);
-                        if ((tag & ~PAGE_MASK) == 0 &&
-                            (tag & PAGE_MASK) >= prom_reserved_base) {
-                                __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
-                                                     "membar #Sync"
-                                                     : /* no outputs */
-                                                     : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
-                                cheetah_put_dtlb_data(i, 0x0UL, 3);
-                        }
-                }
-        } else {
-                /* Implement me :-) */
-                BUG();
-        }
-}
 static int prom_ditlb_set;
 struct prom_tlb_entry {
        int             tlb_ent;
@@ -635,9 +565,6 @@ void prom_world(int enter)
                             : "i" (PSTATE_IE));
        if (enter) {
-                /* Kick out nucleus VPTEs. */
-                __flush_nucleus_vptes();
                /* Install PROM world. */
                for (i = 0; i < 16; i++) {
                        if (prom_dtlb[i].tlb_ent != -1) {
@@ -1039,18 +966,7 @@ out:
 struct pgtable_cache_struct pgt_quicklists;
 #endif
-/* OK, we have to color these pages. The page tables are accessed
+/* XXX We don't need to color these things in the D-cache any longer.  */
- * by non-Dcache enabled mapping in the VPTE area by the dtlb_backend.S
- * code, as well as by PAGE_OFFSET range direct-mapped addresses by 
- * other parts of the kernel. By coloring, we make sure that the tlbmiss 
- * fast handlers do not get data from old/garbage dcache lines that 
- * correspond to an old/stale virtual address (user/kernel) that 
- * previously mapped the pagetable page while accessing vpte range 
- * addresses. The idea is that if the vpte color and PAGE_OFFSET range 
- * color is the same, then when the kernel initializes the pagetable 
- * using the later address range, accesses with the first address
- * range will see the newly initialized data rather than the garbage.
- */
 #ifdef DCACHE_ALIASING_POSSIBLE
 #define DC_ALIAS_SHIFT  1
 #else
@@ -1419,6 +1335,9 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
        kernel_map_range(phys_start, phys_end,
                         (enable ? PAGE_KERNEL : __pgprot(0)));
+        flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
+                               PAGE_OFFSET + phys_end);
        /* we should perform an IPI and flush all tlbs,
         * but that can deadlock->flush only current cpu.
         */
diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
index 8b104be4662b..78357cc2a0b7 100644
--- a/arch/sparc64/mm/tlb.c
+++ b/arch/sparc64/mm/tlb.c
@@ -25,6 +25,8 @@ void flush_tlb_pending(void)
        struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
        if (mp->tlb_nr) {
+                flush_tsb_user(mp);
                if (CTX_VALID(mp->mm->context)) {
 #ifdef CONFIG_SMP
                        smp_flush_tlb_pending(mp->mm, mp->tlb_nr,
@@ -89,62 +91,3 @@ no_cache_flush:
        if (nr >= TLB_BATCH_NR)
                flush_tlb_pending();
 }
-void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-        struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
-        unsigned long nr = mp->tlb_nr;
-        long s = start, e = end, vpte_base;
-        if (mp->fullmm)
-                return;
-        /* If start is greater than end, that is a real problem.  */
-        BUG_ON(start > end);
-        /* However, straddling the VA space hole is quite normal. */
-        s &= PMD_MASK;
-        e = (e + PMD_SIZE - 1) & PMD_MASK;
-        vpte_base = (tlb_type == spitfire ?
-                     VPTE_BASE_SPITFIRE :
-                     VPTE_BASE_CHEETAH);
-        if (unlikely(nr != 0 && mm != mp->mm)) {
-                flush_tlb_pending();
-                nr = 0;
-        }
-        if (nr == 0)
-                mp->mm = mm;
-        start = vpte_base + (s >> (PAGE_SHIFT - 3));
-        end = vpte_base + (e >> (PAGE_SHIFT - 3));
-        /* If the request straddles the VA space hole, we
-         * need to swap start and end.  The reason this
-         * occurs is that "vpte_base" is the center of
-         * the linear page table mapping area.  Thus,
-         * high addresses with the sign bit set map to
-         * addresses below vpte_base and non-sign bit
-         * addresses map to addresses above vpte_base.
-         */
-        if (end < start) {
-                unsigned long tmp = start;
-                start = end;
-                end = tmp;
-        }
-        while (start < end) {
-                mp->vaddrs[nr] = start;
-                mp->tlb_nr = ++nr;
-                if (nr >= TLB_BATCH_NR) {
-                        flush_tlb_pending();
-                        nr = 0;
-                }
-                start += PAGE_SIZE;
-        }
-        if (nr)
-                flush_tlb_pending();
-}
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
new file mode 100644
index 000000000000..15e8af58b1d2
--- /dev/null
+++ b/arch/sparc64/mm/tsb.c
@@ -0,0 +1,84 @@
+/* arch/sparc64/mm/tsb.c
+ *
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ */
+#include <linux/kernel.h>
+#include <asm/system.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#define TSB_ENTRY_ALIGNMENT     16
+struct tsb {
+        unsigned long tag;
+        unsigned long pte;
+} __attribute__((aligned(TSB_ENTRY_ALIGNMENT)));
+/* We use an 8K TSB for the whole kernel, this allows to
+ * handle about 4MB of modules and vmalloc mappings without
+ * incurring many hash conflicts.
+ */
+#define KERNEL_TSB_SIZE_BYTES   8192
+#define KERNEL_TSB_NENTRIES \
+        (KERNEL_TSB_SIZE_BYTES / sizeof(struct tsb))
+extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
+static inline unsigned long tsb_hash(unsigned long vaddr)
+{
+        vaddr >>= PAGE_SHIFT;
+        return vaddr & (KERNEL_TSB_NENTRIES - 1);
+}
+static inline int tag_compare(struct tsb *entry, unsigned long vaddr, unsigned long context)
+{
+        if (context == ~0UL)
+                return 1;
+        return (entry->tag == ((vaddr >> 22) | (context << 48)));
+}
+/* TSB flushes need only occur on the processor initiating the address
+ * space modification, not on each cpu the address space has run on.
+ * Only the TLB flush needs that treatment.
+ */
+void flush_tsb_kernel_range(unsigned long start, unsigned long end)
+{
+        unsigned long v;
+        for (v = start; v < end; v += PAGE_SIZE) {
+                struct tsb *ent = &swapper_tsb[tsb_hash(v)];
+                if (tag_compare(ent, v, 0)) {
+                        ent->tag = 0UL;
+                        membar_storeload_storestore();
+                }
+        }
+}
+void flush_tsb_user(struct mmu_gather *mp)
+{
+        struct mm_struct *mm = mp->mm;
+        struct tsb *tsb = (struct tsb *) mm->context.sparc64_tsb;
+        unsigned long ctx = ~0UL;
+        int i;
+        if (CTX_VALID(mm->context))
+                ctx = CTX_HWBITS(mm->context);
+        for (i = 0; i < mp->tlb_nr; i++) {
+                unsigned long v = mp->vaddrs[i];
+                struct tsb *ent;
+                v &= ~0x1UL;
+                ent = &tsb[tsb_hash(v)];
+                if (tag_compare(ent, v, ctx)) {
+                        ent->tag = 0UL;
+                        membar_storeload_storestore();
+                }
+        }
+}
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index e4c9151fa116..22791f29552e 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -453,64 +453,6 @@ xcall_flush_dcache_page_spitfire: /* %g1 == physical page address
        nop
        nop
-        .data
-errata32_hwbug:
-        .xword  0
-        .text
-        /* These two are not performance critical... */
-        .globl          xcall_flush_tlb_all_spitfire
-xcall_flush_tlb_all_spitfire:
-        /* Spitfire Errata #32 workaround. */
-        sethi           %hi(errata32_hwbug), %g4
-        stx             %g0, [%g4 + %lo(errata32_hwbug)]
-        clr             %g2
-        clr             %g3
-1:      ldxa            [%g3] ASI_DTLB_DATA_ACCESS, %g4
-        and             %g4, _PAGE_L, %g5
-        brnz,pn         %g5, 2f
-         mov            TLB_TAG_ACCESS, %g7
-        stxa            %g0, [%g7] ASI_DMMU
-        membar          #Sync
-        stxa            %g0, [%g3] ASI_DTLB_DATA_ACCESS
-        membar          #Sync
-        /* Spitfire Errata #32 workaround. */
-        sethi           %hi(errata32_hwbug), %g4
-        stx             %g0, [%g4 + %lo(errata32_hwbug)]
-2:      ldxa            [%g3] ASI_ITLB_DATA_ACCESS, %g4
-        and             %g4, _PAGE_L, %g5
-        brnz,pn         %g5, 2f
-         mov            TLB_TAG_ACCESS, %g7
-        stxa            %g0, [%g7] ASI_IMMU
-        membar          #Sync
-        stxa            %g0, [%g3] ASI_ITLB_DATA_ACCESS
-        membar          #Sync
-        /* Spitfire Errata #32 workaround. */
-        sethi           %hi(errata32_hwbug), %g4
-        stx             %g0, [%g4 + %lo(errata32_hwbug)]
-2:      add             %g2, 1, %g2
-        cmp             %g2, SPITFIRE_HIGHEST_LOCKED_TLBENT
-        ble,pt          %icc, 1b
-         sll            %g2, 3, %g3
-        flush           %g6
-        retry
-        .globl          xcall_flush_tlb_all_cheetah
-xcall_flush_tlb_all_cheetah:
-        mov             0x80, %g2
-        stxa            %g0, [%g2] ASI_DMMU_DEMAP
-        stxa            %g0, [%g2] ASI_IMMU_DEMAP
-        retry
        /* These just get rescheduled to PIL vectors. */
        .globl          xcall_call_function
 xcall_call_function:
author	David S. Miller <davem@davemloft.net>	2006-01-31 21:29:18 -0500
committer	David S. Miller <davem@sunset.davemloft.net>	2006-03-20 04:11:13 -0500
commit	74bf4312fff083ab25c3f357cc653ada7995e5f6 (patch)
tree	c23dea461e32485f4cd7ca4b8c33c632655eb906 /arch/sparc64/mm
parent	30d4d1ffed7098afe2641536d67eef150499da02 (diff)

diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile index 9d0960e69f48..e415bf942bcd 100644 --- a/arch/sparc64/mm/Makefile +++ b/arch/sparc64/mm/Makefile
@@ -5,6 +5,6 @@
5	EXTRA_AFLAGS := -ansi	5	EXTRA_AFLAGS := -ansi
6	EXTRA_CFLAGS := -Werror	6	EXTRA_CFLAGS := -Werror
7		7
8	obj-y := ultra.o tlb.o fault.o init.o generic.o	8	obj-y := ultra.o tlb.o tsb.o fault.o init.o generic.o
9		9
10	obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o	10	obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o


diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 1e44ee26cee8..da068f6b2595 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c
@@ -408,8 +408,7 @@ unsigned long prom_virt_to_phys(unsigned long promva, int *error)
408		408
409	/* The obp translations are saved based on 8k pagesize, since obp can	409	/* The obp translations are saved based on 8k pagesize, since obp can
410	* use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS ->	410	* use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS ->
411	* HI_OBP_ADDRESS range are handled in ktlb.S and do not use the vpte	411	* HI_OBP_ADDRESS range are handled in ktlb.S.
412	* scheme (also, see rant in inherit_locked_prom_mappings()).
413	*/	412	*/
414	static inline int in_obp_range(unsigned long vaddr)	413	static inline int in_obp_range(unsigned long vaddr)
415	{	414	{
@@ -539,75 +538,6 @@ static void __init inherit_prom_mappings(void)
539	prom_printf("done.\n");	538	prom_printf("done.\n");
540	}	539	}
541		540
542	/* The OBP specifications for sun4u mark 0xfffffffc00000000 and
543	* upwards as reserved for use by the firmware (I wonder if this
544	* will be the same on Cheetah...). We use this virtual address
545	* range for the VPTE table mappings of the nucleus so we need
546	* to zap them when we enter the PROM. -DaveM
547	*/
548	static void __flush_nucleus_vptes(void)
549	{
550	unsigned long prom_reserved_base = 0xfffffffc00000000UL;
551	int i;
552
553	/* Only DTLB must be checked for VPTE entries. */
554	if (tlb_type == spitfire) {
555	for (i = 0; i < 63; i++) {
556	unsigned long tag;
557
558	/* Spitfire Errata #32 workaround */
559	/* NOTE: Always runs on spitfire, so no cheetah+
560	* page size encodings.
561	*/
562	__asm__ __volatile__("stxa %0, [%1] %2\n\t"
563	"flush %%g6"
564	: /* No outputs */
565	: "r" (0),
566	"r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
567
568	tag = spitfire_get_dtlb_tag(i);
569	if (((tag & ~(PAGE_MASK)) == 0) &&
570	((tag & (PAGE_MASK)) >= prom_reserved_base)) {
571	__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
572	"membar #Sync"
573	: /* no outputs */
574	: "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
575	spitfire_put_dtlb_data(i, 0x0UL);
576	}
577	}
578	} else if (tlb_type == cheetah \|\| tlb_type == cheetah_plus) {
579	for (i = 0; i < 512; i++) {
580	unsigned long tag = cheetah_get_dtlb_tag(i, 2);
581
582	if ((tag & ~PAGE_MASK) == 0 &&
583	(tag & PAGE_MASK) >= prom_reserved_base) {
584	__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
585	"membar #Sync"
586	: /* no outputs */
587	: "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
588	cheetah_put_dtlb_data(i, 0x0UL, 2);
589	}
590
591	if (tlb_type != cheetah_plus)
592	continue;
593
594	tag = cheetah_get_dtlb_tag(i, 3);
595
596	if ((tag & ~PAGE_MASK) == 0 &&
597	(tag & PAGE_MASK) >= prom_reserved_base) {
598	__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
599	"membar #Sync"
600	: /* no outputs */
601	: "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
602	cheetah_put_dtlb_data(i, 0x0UL, 3);
603	}
604	}
605	} else {
606	/* Implement me :-) */
607	BUG();
608	}
609	}
610
611	static int prom_ditlb_set;	541	static int prom_ditlb_set;
612	struct prom_tlb_entry {	542	struct prom_tlb_entry {
613	int tlb_ent;	543	int tlb_ent;
@@ -635,9 +565,6 @@ void prom_world(int enter)
635	: "i" (PSTATE_IE));	565	: "i" (PSTATE_IE));
636		566
637	if (enter) {	567	if (enter) {
638	/* Kick out nucleus VPTEs. */
639	__flush_nucleus_vptes();
640
641	/* Install PROM world. */	568	/* Install PROM world. */
642	for (i = 0; i < 16; i++) {	569	for (i = 0; i < 16; i++) {
643	if (prom_dtlb[i].tlb_ent != -1) {	570	if (prom_dtlb[i].tlb_ent != -1) {
@@ -1039,18 +966,7 @@ out:
1039	struct pgtable_cache_struct pgt_quicklists;	966	struct pgtable_cache_struct pgt_quicklists;
1040	#endif	967	#endif
1041		968
1042	/* OK, we have to color these pages. The page tables are accessed	969	/* XXX We don't need to color these things in the D-cache any longer. */
1043	* by non-Dcache enabled mapping in the VPTE area by the dtlb_backend.S
1044	* code, as well as by PAGE_OFFSET range direct-mapped addresses by
1045	* other parts of the kernel. By coloring, we make sure that the tlbmiss
1046	* fast handlers do not get data from old/garbage dcache lines that
1047	* correspond to an old/stale virtual address (user/kernel) that
1048	* previously mapped the pagetable page while accessing vpte range
1049	* addresses. The idea is that if the vpte color and PAGE_OFFSET range
1050	* color is the same, then when the kernel initializes the pagetable
1051	* using the later address range, accesses with the first address
1052	* range will see the newly initialized data rather than the garbage.
1053	*/
1054	#ifdef DCACHE_ALIASING_POSSIBLE	970	#ifdef DCACHE_ALIASING_POSSIBLE
1055	#define DC_ALIAS_SHIFT 1	971	#define DC_ALIAS_SHIFT 1
1056	#else	972	#else
@@ -1419,6 +1335,9 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
1419	kernel_map_range(phys_start, phys_end,	1335	kernel_map_range(phys_start, phys_end,
1420	(enable ? PAGE_KERNEL : __pgprot(0)));	1336	(enable ? PAGE_KERNEL : __pgprot(0)));
1421		1337
		1338	flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
		1339	PAGE_OFFSET + phys_end);
		1340
1422	/* we should perform an IPI and flush all tlbs,	1341	/* we should perform an IPI and flush all tlbs,
1423	* but that can deadlock->flush only current cpu.	1342	* but that can deadlock->flush only current cpu.
1424	*/	1343	*/


diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c index 8b104be4662b..78357cc2a0b7 100644 --- a/arch/sparc64/mm/tlb.c +++ b/arch/sparc64/mm/tlb.c
@@ -25,6 +25,8 @@ void flush_tlb_pending(void)
25	struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);	25	struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
26		26
27	if (mp->tlb_nr) {	27	if (mp->tlb_nr) {
		28	flush_tsb_user(mp);
		29
28	if (CTX_VALID(mp->mm->context)) {	30	if (CTX_VALID(mp->mm->context)) {
29	#ifdef CONFIG_SMP	31	#ifdef CONFIG_SMP
30	smp_flush_tlb_pending(mp->mm, mp->tlb_nr,	32	smp_flush_tlb_pending(mp->mm, mp->tlb_nr,
@@ -89,62 +91,3 @@ no_cache_flush:
89	if (nr >= TLB_BATCH_NR)	91	if (nr >= TLB_BATCH_NR)
90	flush_tlb_pending();	92	flush_tlb_pending();
91	}	93	}
92
93	void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
94	{
95	struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
96	unsigned long nr = mp->tlb_nr;
97	long s = start, e = end, vpte_base;
98
99	if (mp->fullmm)
100	return;
101
102	/* If start is greater than end, that is a real problem. */
103	BUG_ON(start > end);
104
105	/* However, straddling the VA space hole is quite normal. */
106	s &= PMD_MASK;
107	e = (e + PMD_SIZE - 1) & PMD_MASK;
108
109	vpte_base = (tlb_type == spitfire ?
110	VPTE_BASE_SPITFIRE :
111	VPTE_BASE_CHEETAH);
112
113	if (unlikely(nr != 0 && mm != mp->mm)) {
114	flush_tlb_pending();
115	nr = 0;
116	}
117
118	if (nr == 0)
119	mp->mm = mm;
120
121	start = vpte_base + (s >> (PAGE_SHIFT - 3));
122	end = vpte_base + (e >> (PAGE_SHIFT - 3));
123
124	/* If the request straddles the VA space hole, we
125	* need to swap start and end. The reason this
126	* occurs is that "vpte_base" is the center of
127	* the linear page table mapping area. Thus,
128	* high addresses with the sign bit set map to
129	* addresses below vpte_base and non-sign bit
130	* addresses map to addresses above vpte_base.
131	*/
132	if (end < start) {
133	unsigned long tmp = start;
134
135	start = end;
136	end = tmp;
137	}
138
139	while (start < end) {
140	mp->vaddrs[nr] = start;
141	mp->tlb_nr = ++nr;
142	if (nr >= TLB_BATCH_NR) {
143	flush_tlb_pending();
144	nr = 0;
145	}
146	start += PAGE_SIZE;
147	}
148	if (nr)
149	flush_tlb_pending();
150	}


diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c new file mode 100644 index 000000000000..15e8af58b1d2 --- /dev/null +++ b/arch/sparc64/mm/tsb.c
@@ -0,0 +1,84 @@
		1	/* arch/sparc64/mm/tsb.c
		2	*
		3	* Copyright (C) 2006 David S. Miller <davem@davemloft.net>
		4	*/
		5
		6	#include <linux/kernel.h>
		7	#include <asm/system.h>
		8	#include <asm/page.h>
		9	#include <asm/tlbflush.h>
		10	#include <asm/tlb.h>
		11
		12	#define TSB_ENTRY_ALIGNMENT 16
		13
		14	struct tsb {
		15	unsigned long tag;
		16	unsigned long pte;
		17	} __attribute__((aligned(TSB_ENTRY_ALIGNMENT)));
		18
		19	/* We use an 8K TSB for the whole kernel, this allows to
		20	* handle about 4MB of modules and vmalloc mappings without
		21	* incurring many hash conflicts.
		22	*/
		23	#define KERNEL_TSB_SIZE_BYTES 8192
		24	#define KERNEL_TSB_NENTRIES \
		25	(KERNEL_TSB_SIZE_BYTES / sizeof(struct tsb))
		26
		27	extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
		28
		29	static inline unsigned long tsb_hash(unsigned long vaddr)
		30	{
		31	vaddr >>= PAGE_SHIFT;
		32	return vaddr & (KERNEL_TSB_NENTRIES - 1);
		33	}
		34
		35	static inline int tag_compare(struct tsb *entry, unsigned long vaddr, unsigned long context)
		36	{
		37	if (context == ~0UL)
		38	return 1;
		39
		40	return (entry->tag == ((vaddr >> 22) \| (context << 48)));
		41	}
		42
		43	/* TSB flushes need only occur on the processor initiating the address
		44	* space modification, not on each cpu the address space has run on.
		45	* Only the TLB flush needs that treatment.
		46	*/
		47
		48	void flush_tsb_kernel_range(unsigned long start, unsigned long end)
		49	{
		50	unsigned long v;
		51
		52	for (v = start; v < end; v += PAGE_SIZE) {
		53	struct tsb *ent = &swapper_tsb[tsb_hash(v)];
		54
		55	if (tag_compare(ent, v, 0)) {
		56	ent->tag = 0UL;
		57	membar_storeload_storestore();
		58	}
		59	}
		60	}
		61
		62	void flush_tsb_user(struct mmu_gather *mp)
		63	{
		64	struct mm_struct *mm = mp->mm;
		65	struct tsb tsb = (struct tsb ) mm->context.sparc64_tsb;
		66	unsigned long ctx = ~0UL;
		67	int i;
		68
		69	if (CTX_VALID(mm->context))
		70	ctx = CTX_HWBITS(mm->context);
		71
		72	for (i = 0; i < mp->tlb_nr; i++) {
		73	unsigned long v = mp->vaddrs[i];
		74	struct tsb *ent;
		75
		76	v &= ~0x1UL;
		77
		78	ent = &tsb[tsb_hash(v)];
		79	if (tag_compare(ent, v, ctx)) {
		80	ent->tag = 0UL;
		81	membar_storeload_storestore();
		82	}
		83	}
		84	}


diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index e4c9151fa116..22791f29552e 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S
@@ -453,64 +453,6 @@ xcall_flush_dcache_page_spitfire: /* %g1 == physical page address
453	nop	453	nop
454	nop	454	nop
455		455
456	.data
457
458	errata32_hwbug:
459	.xword 0
460
461	.text
462
463	/* These two are not performance critical... */
464	.globl xcall_flush_tlb_all_spitfire
465	xcall_flush_tlb_all_spitfire:
466	/* Spitfire Errata #32 workaround. */
467	sethi %hi(errata32_hwbug), %g4
468	stx %g0, [%g4 + %lo(errata32_hwbug)]
469
470	clr %g2
471	clr %g3
472	1: ldxa [%g3] ASI_DTLB_DATA_ACCESS, %g4
473	and %g4, _PAGE_L, %g5
474	brnz,pn %g5, 2f
475	mov TLB_TAG_ACCESS, %g7
476
477	stxa %g0, [%g7] ASI_DMMU
478	membar #Sync
479	stxa %g0, [%g3] ASI_DTLB_DATA_ACCESS
480	membar #Sync
481
482	/* Spitfire Errata #32 workaround. */
483	sethi %hi(errata32_hwbug), %g4
484	stx %g0, [%g4 + %lo(errata32_hwbug)]
485
486	2: ldxa [%g3] ASI_ITLB_DATA_ACCESS, %g4
487	and %g4, _PAGE_L, %g5
488	brnz,pn %g5, 2f
489	mov TLB_TAG_ACCESS, %g7
490
491	stxa %g0, [%g7] ASI_IMMU
492	membar #Sync
493	stxa %g0, [%g3] ASI_ITLB_DATA_ACCESS
494	membar #Sync
495
496	/* Spitfire Errata #32 workaround. */
497	sethi %hi(errata32_hwbug), %g4
498	stx %g0, [%g4 + %lo(errata32_hwbug)]
499
500	2: add %g2, 1, %g2
501	cmp %g2, SPITFIRE_HIGHEST_LOCKED_TLBENT
502	ble,pt %icc, 1b
503	sll %g2, 3, %g3
504	flush %g6
505	retry
506
507	.globl xcall_flush_tlb_all_cheetah
508	xcall_flush_tlb_all_cheetah:
509	mov 0x80, %g2
510	stxa %g0, [%g2] ASI_DMMU_DEMAP
511	stxa %g0, [%g2] ASI_IMMU_DEMAP
512	retry
513
514	/* These just get rescheduled to PIL vectors. */	456	/* These just get rescheduled to PIL vectors. */
515	.globl xcall_call_function	457	.globl xcall_call_function
516	xcall_call_function:	458	xcall_call_function: