aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-sparc64/mmu.h1
-rw-r--r--include/asm-sparc64/mmu_context.h46
-rw-r--r--include/asm-sparc64/pgalloc.h1
-rw-r--r--include/asm-sparc64/pgtable.h9
-rw-r--r--include/asm-sparc64/processor.h14
-rw-r--r--include/asm-sparc64/tlbflush.h25
-rw-r--r--include/asm-sparc64/tsb.h165
7 files changed, 204 insertions, 57 deletions
diff --git a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h
index 8627eed6e83d..36384cf7faa6 100644
--- a/include/asm-sparc64/mmu.h
+++ b/include/asm-sparc64/mmu.h
@@ -92,6 +92,7 @@
92 92
93typedef struct { 93typedef struct {
94 unsigned long sparc64_ctx_val; 94 unsigned long sparc64_ctx_val;
95 unsigned long *sparc64_tsb;
95} mm_context_t; 96} mm_context_t;
96 97
97#endif /* !__ASSEMBLY__ */ 98#endif /* !__ASSEMBLY__ */
diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h
index 57ee7b306189..34640a370ab4 100644
--- a/include/asm-sparc64/mmu_context.h
+++ b/include/asm-sparc64/mmu_context.h
@@ -25,7 +25,13 @@ extern void get_new_mmu_context(struct mm_struct *mm);
25 * This just needs to set mm->context to an invalid context. 25 * This just needs to set mm->context to an invalid context.
26 */ 26 */
27#define init_new_context(__tsk, __mm) \ 27#define init_new_context(__tsk, __mm) \
28 (((__mm)->context.sparc64_ctx_val = 0UL), 0) 28({ unsigned long __pg = get_zeroed_page(GFP_KERNEL); \
29 (__mm)->context.sparc64_ctx_val = 0UL; \
30 (__mm)->context.sparc64_tsb = \
31 (unsigned long *) __pg; \
32 (__pg ? 0 : -ENOMEM); \
33})
34
29 35
30/* Destroy a dead context. This occurs when mmput drops the 36/* Destroy a dead context. This occurs when mmput drops the
31 * mm_users count to zero, the mmaps have been released, and 37 * mm_users count to zero, the mmaps have been released, and
@@ -35,7 +41,8 @@ extern void get_new_mmu_context(struct mm_struct *mm);
35 * this task if valid. 41 * this task if valid.
36 */ 42 */
37#define destroy_context(__mm) \ 43#define destroy_context(__mm) \
38do { spin_lock(&ctx_alloc_lock); \ 44do { free_page((unsigned long)(__mm)->context.sparc64_tsb); \
45 spin_lock(&ctx_alloc_lock); \
39 if (CTX_VALID((__mm)->context)) { \ 46 if (CTX_VALID((__mm)->context)) { \
40 unsigned long nr = CTX_NRBITS((__mm)->context); \ 47 unsigned long nr = CTX_NRBITS((__mm)->context); \
41 mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); \ 48 mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); \
@@ -43,35 +50,7 @@ do { spin_lock(&ctx_alloc_lock); \
43 spin_unlock(&ctx_alloc_lock); \ 50 spin_unlock(&ctx_alloc_lock); \
44} while(0) 51} while(0)
45 52
46/* Reload the two core values used by TLB miss handler 53extern unsigned long tsb_context_switch(unsigned long pgd_pa, unsigned long *tsb);
47 * processing on sparc64. They are:
48 * 1) The physical address of mm->pgd, when full page
49 * table walks are necessary, this is where the
50 * search begins.
51 * 2) A "PGD cache". For 32-bit tasks only pgd[0] is
52 * ever used since that maps the entire low 4GB
53 * completely. To speed up TLB miss processing we
54 * make this value available to the handlers. This
55 * decreases the amount of memory traffic incurred.
56 */
57#define reload_tlbmiss_state(__tsk, __mm) \
58do { \
59 register unsigned long paddr asm("o5"); \
60 register unsigned long pgd_cache asm("o4"); \
61 paddr = __pa((__mm)->pgd); \
62 pgd_cache = 0UL; \
63 if (task_thread_info(__tsk)->flags & _TIF_32BIT) \
64 pgd_cache = get_pgd_cache((__mm)->pgd); \
65 __asm__ __volatile__("wrpr %%g0, 0x494, %%pstate\n\t" \
66 "mov %3, %%g4\n\t" \
67 "mov %0, %%g7\n\t" \
68 "stxa %1, [%%g4] %2\n\t" \
69 "membar #Sync\n\t" \
70 "wrpr %%g0, 0x096, %%pstate" \
71 : /* no outputs */ \
72 : "r" (paddr), "r" (pgd_cache),\
73 "i" (ASI_DMMU), "i" (TSB_REG)); \
74} while(0)
75 54
76/* Set MMU context in the actual hardware. */ 55/* Set MMU context in the actual hardware. */
77#define load_secondary_context(__mm) \ 56#define load_secondary_context(__mm) \
@@ -101,7 +80,8 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
101 80
102 if (!ctx_valid || (old_mm != mm)) { 81 if (!ctx_valid || (old_mm != mm)) {
103 load_secondary_context(mm); 82 load_secondary_context(mm);
104 reload_tlbmiss_state(tsk, mm); 83 tsb_context_switch(__pa(mm->pgd),
84 mm->context.sparc64_tsb);
105 } 85 }
106 86
107 /* Even if (mm == old_mm) we _must_ check 87 /* Even if (mm == old_mm) we _must_ check
@@ -139,7 +119,7 @@ static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm
139 119
140 load_secondary_context(mm); 120 load_secondary_context(mm);
141 __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); 121 __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
142 reload_tlbmiss_state(current, mm); 122 tsb_context_switch(__pa(mm->pgd), mm->context.sparc64_tsb);
143} 123}
144 124
145#endif /* !(__ASSEMBLY__) */ 125#endif /* !(__ASSEMBLY__) */
diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
index a96067cca963..baf59c00ea47 100644
--- a/include/asm-sparc64/pgalloc.h
+++ b/include/asm-sparc64/pgalloc.h
@@ -61,6 +61,7 @@ static __inline__ void free_pgd_slow(pgd_t *pgd)
61 free_page((unsigned long)pgd); 61 free_page((unsigned long)pgd);
62} 62}
63 63
64/* XXX This crap can die, no longer using virtual page tables... */
64#ifdef DCACHE_ALIASING_POSSIBLE 65#ifdef DCACHE_ALIASING_POSSIBLE
65#define VPTE_COLOR(address) (((address) >> (PAGE_SHIFT + 10)) & 1UL) 66#define VPTE_COLOR(address) (((address) >> (PAGE_SHIFT + 10)) & 1UL)
66#define DCACHE_COLOR(address) (((address) >> PAGE_SHIFT) & 1UL) 67#define DCACHE_COLOR(address) (((address) >> PAGE_SHIFT) & 1UL)
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index f0a9b44d3eb5..f3ba1e058195 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -25,7 +25,8 @@
25#include <asm/const.h> 25#include <asm/const.h>
26 26
27/* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 32MB). 27/* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 32MB).
28 * The page copy blockops can use 0x2000000 to 0x10000000. 28 * The page copy blockops can use 0x2000000 to 0x4000000.
29 * The TSB is mapped in the 0x4000000 to 0x6000000 range.
29 * The PROM resides in an area spanning 0xf0000000 to 0x100000000. 30 * The PROM resides in an area spanning 0xf0000000 to 0x100000000.
30 * The vmalloc area spans 0x100000000 to 0x200000000. 31 * The vmalloc area spans 0x100000000 to 0x200000000.
31 * Since modules need to be in the lowest 32-bits of the address space, 32 * Since modules need to be in the lowest 32-bits of the address space,
@@ -34,6 +35,7 @@
34 * 0x400000000. 35 * 0x400000000.
35 */ 36 */
36#define TLBTEMP_BASE _AC(0x0000000002000000,UL) 37#define TLBTEMP_BASE _AC(0x0000000002000000,UL)
38#define TSBMAP_BASE _AC(0x0000000004000000,UL)
37#define MODULES_VADDR _AC(0x0000000010000000,UL) 39#define MODULES_VADDR _AC(0x0000000010000000,UL)
38#define MODULES_LEN _AC(0x00000000e0000000,UL) 40#define MODULES_LEN _AC(0x00000000e0000000,UL)
39#define MODULES_END _AC(0x00000000f0000000,UL) 41#define MODULES_END _AC(0x00000000f0000000,UL)
@@ -296,11 +298,6 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
296/* to find an entry in a kernel page-table-directory */ 298/* to find an entry in a kernel page-table-directory */
297#define pgd_offset_k(address) pgd_offset(&init_mm, address) 299#define pgd_offset_k(address) pgd_offset(&init_mm, address)
298 300
299/* extract the pgd cache used for optimizing the tlb miss
300 * slow path when executing 32-bit compat processes
301 */
302#define get_pgd_cache(pgd) ((unsigned long) pgd_val(*pgd) << 11)
303
304/* Find an entry in the second-level page table.. */ 301/* Find an entry in the second-level page table.. */
305#define pmd_offset(pudp, address) \ 302#define pmd_offset(pudp, address) \
306 ((pmd_t *) pud_page(*(pudp)) + \ 303 ((pmd_t *) pud_page(*(pudp)) + \
diff --git a/include/asm-sparc64/processor.h b/include/asm-sparc64/processor.h
index cd8d9b4c8658..b3889f3f943a 100644
--- a/include/asm-sparc64/processor.h
+++ b/include/asm-sparc64/processor.h
@@ -28,6 +28,8 @@
28 * User lives in his very own context, and cannot reference us. Note 28 * User lives in his very own context, and cannot reference us. Note
29 * that TASK_SIZE is a misnomer, it really gives maximum user virtual 29 * that TASK_SIZE is a misnomer, it really gives maximum user virtual
30 * address that the kernel will allocate out. 30 * address that the kernel will allocate out.
31 *
32 * XXX No longer using virtual page tables, kill this upper limit...
31 */ 33 */
32#define VA_BITS 44 34#define VA_BITS 44
33#ifndef __ASSEMBLY__ 35#ifndef __ASSEMBLY__
@@ -37,18 +39,6 @@
37#endif 39#endif
38#define TASK_SIZE ((unsigned long)-VPTE_SIZE) 40#define TASK_SIZE ((unsigned long)-VPTE_SIZE)
39 41
40/*
41 * The vpte base must be able to hold the entire vpte, half
42 * of which lives above, and half below, the base. And it
43 * is placed as close to the highest address range as possible.
44 */
45#define VPTE_BASE_SPITFIRE (-(VPTE_SIZE/2))
46#if 1
47#define VPTE_BASE_CHEETAH VPTE_BASE_SPITFIRE
48#else
49#define VPTE_BASE_CHEETAH 0xffe0000000000000
50#endif
51
52#ifndef __ASSEMBLY__ 42#ifndef __ASSEMBLY__
53 43
54typedef struct { 44typedef struct {
diff --git a/include/asm-sparc64/tlbflush.h b/include/asm-sparc64/tlbflush.h
index 3ef9909ac3ac..9ad5d9c51d42 100644
--- a/include/asm-sparc64/tlbflush.h
+++ b/include/asm-sparc64/tlbflush.h
@@ -5,6 +5,11 @@
5#include <linux/mm.h> 5#include <linux/mm.h>
6#include <asm/mmu_context.h> 6#include <asm/mmu_context.h>
7 7
8/* TSB flush operations. */
9struct mmu_gather;
10extern void flush_tsb_kernel_range(unsigned long start, unsigned long end);
11extern void flush_tsb_user(struct mmu_gather *mp);
12
8/* TLB flush operations. */ 13/* TLB flush operations. */
9 14
10extern void flush_tlb_pending(void); 15extern void flush_tlb_pending(void);
@@ -14,28 +19,36 @@ extern void flush_tlb_pending(void);
14#define flush_tlb_page(vma,addr) flush_tlb_pending() 19#define flush_tlb_page(vma,addr) flush_tlb_pending()
15#define flush_tlb_mm(mm) flush_tlb_pending() 20#define flush_tlb_mm(mm) flush_tlb_pending()
16 21
22/* Local cpu only. */
17extern void __flush_tlb_all(void); 23extern void __flush_tlb_all(void);
24
18extern void __flush_tlb_page(unsigned long context, unsigned long page, unsigned long r); 25extern void __flush_tlb_page(unsigned long context, unsigned long page, unsigned long r);
19 26
20extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end); 27extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end);
21 28
22#ifndef CONFIG_SMP 29#ifndef CONFIG_SMP
23 30
24#define flush_tlb_all() __flush_tlb_all()
25#define flush_tlb_kernel_range(start,end) \ 31#define flush_tlb_kernel_range(start,end) \
26 __flush_tlb_kernel_range(start,end) 32do { flush_tsb_kernel_range(start,end); \
33 __flush_tlb_kernel_range(start,end); \
34} while (0)
27 35
28#else /* CONFIG_SMP */ 36#else /* CONFIG_SMP */
29 37
30extern void smp_flush_tlb_all(void);
31extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end); 38extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end);
32 39
33#define flush_tlb_all() smp_flush_tlb_all()
34#define flush_tlb_kernel_range(start, end) \ 40#define flush_tlb_kernel_range(start, end) \
35 smp_flush_tlb_kernel_range(start, end) 41do { flush_tsb_kernel_range(start,end); \
42 smp_flush_tlb_kernel_range(start, end); \
43} while (0)
36 44
37#endif /* ! CONFIG_SMP */ 45#endif /* ! CONFIG_SMP */
38 46
39extern void flush_tlb_pgtables(struct mm_struct *, unsigned long, unsigned long); 47static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
48{
49 /* We don't use virtual page tables for TLB miss processing
50 * any more. Nowadays we use the TSB.
51 */
52}
40 53
41#endif /* _SPARC64_TLBFLUSH_H */ 54#endif /* _SPARC64_TLBFLUSH_H */
diff --git a/include/asm-sparc64/tsb.h b/include/asm-sparc64/tsb.h
new file mode 100644
index 000000000000..03d272e0e477
--- /dev/null
+++ b/include/asm-sparc64/tsb.h
@@ -0,0 +1,165 @@
1#ifndef _SPARC64_TSB_H
2#define _SPARC64_TSB_H
3
4/* The sparc64 TSB is similar to the powerpc hashtables. It's a
5 * power-of-2 sized table of TAG/PTE pairs. The cpu precomputes
6 * pointers into this table for 8K and 64K page sizes, and also a
7 * comparison TAG based upon the virtual address and context which
8 * faults.
9 *
10 * TLB miss trap handler software does the actual lookup via something
11 * of the form:
12 *
13 * ldxa [%g0] ASI_{D,I}MMU_TSB_8KB_PTR, %g1
14 * ldxa [%g0] ASI_{D,I}MMU, %g6
15 * ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4
16 * cmp %g4, %g6
17 * bne,pn %xcc, tsb_miss_{d,i}tlb
18 * mov FAULT_CODE_{D,I}TLB, %g3
19 * stxa %g5, [%g0] ASI_{D,I}TLB_DATA_IN
20 * retry
21 *
22
23 * Each 16-byte slot of the TSB is the 8-byte tag and then the 8-byte
24 * PTE. The TAG is of the same layout as the TLB TAG TARGET mmu
25 * register which is:
26 *
27 * -------------------------------------------------
28 * | - | CONTEXT | - | VADDR bits 63:22 |
29 * -------------------------------------------------
30 * 63 61 60 48 47 42 41 0
31 *
32 * Like the powerpc hashtables we need to use locking in order to
33 * synchronize while we update the entries. PTE updates need locking
34 * as well.
35 *
36 * We need to carefully choose a lock bits for the TSB entry. We
37 * choose to use bit 47 in the tag. Also, since we never map anything
38 * at page zero in context zero, we use zero as an invalid tag entry.
39 * When the lock bit is set, this forces a tag comparison failure.
40 *
41 * Currently, we allocate an 8K TSB per-process and we use it for both
42 * I-TLB and D-TLB misses. Perhaps at some point we'll add code that
43 * monitors the number of active pages in the process as we get
44 * major/minor faults, and grow the TSB in response. The only trick
45 * in implementing that is synchronizing the freeing of the old TSB
46 * wrt. parallel TSB updates occuring on other processors. On
47 * possible solution is to use RCU for the freeing of the TSB.
48 */
49
50#define TSB_TAG_LOCK (1 << (47 - 32))
51
52#define TSB_MEMBAR membar #StoreStore
53
54#define TSB_LOCK_TAG(TSB, REG1, REG2) \
5599: lduwa [TSB] ASI_N, REG1; \
56 sethi %hi(TSB_TAG_LOCK), REG2;\
57 andcc REG1, REG2, %g0; \
58 bne,pn %icc, 99b; \
59 nop; \
60 casa [TSB] ASI_N, REG1, REG2;\
61 cmp REG1, REG2; \
62 bne,pn %icc, 99b; \
63 nop; \
64 TSB_MEMBAR
65
66#define TSB_WRITE(TSB, TTE, TAG) \
67 stx TTE, [TSB + 0x08]; \
68 TSB_MEMBAR; \
69 stx TAG, [TSB + 0x00];
70
71 /* Do a kernel page table walk. Leaves physical PTE pointer in
72 * REG1. Jumps to FAIL_LABEL on early page table walk termination.
73 * VADDR will not be clobbered, but REG2 will.
74 */
75#define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \
76 sethi %hi(swapper_pg_dir), REG1; \
77 or REG1, %lo(swapper_pg_dir), REG1; \
78 sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
79 srlx REG2, 64 - PAGE_SHIFT, REG2; \
80 andn REG2, 0x3, REG2; \
81 lduw [REG1 + REG2], REG1; \
82 brz,pn REG1, FAIL_LABEL; \
83 sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
84 srlx REG2, 64 - PAGE_SHIFT, REG2; \
85 sllx REG1, 11, REG1; \
86 andn REG2, 0x3, REG2; \
87 lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
88 brz,pn REG1, FAIL_LABEL; \
89 sllx VADDR, 64 - PMD_SHIFT, REG2; \
90 srlx REG2, 64 - PAGE_SHIFT, REG2; \
91 sllx REG1, 11, REG1; \
92 andn REG2, 0x7, REG2; \
93 add REG1, REG2, REG1;
94
95 /* Do a user page table walk in MMU globals. Leaves physical PTE
96 * pointer in REG1. Jumps to FAIL_LABEL on early page table walk
97 * termination. Physical base of page tables is in PHYS_PGD which
98 * will not be modified.
99 *
100 * VADDR will not be clobbered, but REG1 and REG2 will.
101 */
102#define USER_PGTABLE_WALK_TL1(VADDR, PHYS_PGD, REG1, REG2, FAIL_LABEL) \
103 sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
104 srlx REG2, 64 - PAGE_SHIFT, REG2; \
105 andn REG2, 0x3, REG2; \
106 lduwa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \
107 brz,pn REG1, FAIL_LABEL; \
108 sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
109 srlx REG2, 64 - PAGE_SHIFT, REG2; \
110 sllx REG1, 11, REG1; \
111 andn REG2, 0x3, REG2; \
112 lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
113 brz,pn REG1, FAIL_LABEL; \
114 sllx VADDR, 64 - PMD_SHIFT, REG2; \
115 srlx REG2, 64 - PAGE_SHIFT, REG2; \
116 sllx REG1, 11, REG1; \
117 andn REG2, 0x7, REG2; \
118 add REG1, REG2, REG1;
119
120/* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0.
121 * If no entry is found, FAIL_LABEL will be branched to. On success
122 * the resulting PTE value will be left in REG1. VADDR is preserved
123 * by this routine.
124 */
125#define OBP_TRANS_LOOKUP(VADDR, REG1, REG2, REG3, FAIL_LABEL) \
126 sethi %hi(prom_trans), REG1; \
127 or REG1, %lo(prom_trans), REG1; \
12897: ldx [REG1 + 0x00], REG2; \
129 brz,pn REG2, FAIL_LABEL; \
130 nop; \
131 ldx [REG1 + 0x08], REG3; \
132 add REG2, REG3, REG3; \
133 cmp REG2, VADDR; \
134 bgu,pt %xcc, 98f; \
135 cmp VADDR, REG3; \
136 bgeu,pt %xcc, 98f; \
137 ldx [REG1 + 0x10], REG3; \
138 sub VADDR, REG2, REG2; \
139 ba,pt %xcc, 99f; \
140 add REG3, REG2, REG1; \
14198: ba,pt %xcc, 97b; \
142 add REG1, (3 * 8), REG1; \
14399:
144
145 /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL
146 * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries
147 * and the found TTE will be left in REG1. REG3 and REG4 must
148 * be an even/odd pair of registers.
149 *
150 * VADDR and TAG will be preserved and not clobbered by this macro.
151 */
152 /* XXX non-8K base page size support... */
153#define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
154 sethi %hi(swapper_tsb), REG1; \
155 or REG1, %lo(swapper_tsb), REG1; \
156 srlx VADDR, 13, REG2; \
157 and REG2, (512 - 1), REG2; \
158 sllx REG2, 4, REG2; \
159 add REG1, REG2, REG2; \
160 ldda [REG2] ASI_NUCLEUS_QUAD_LDD, REG3; \
161 cmp REG3, TAG; \
162 be,a,pt %xcc, OK_LABEL; \
163 mov REG4, REG1;
164
165#endif /* !(_SPARC64_TSB_H) */