aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2006-03-22 03:49:59 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2006-03-22 04:15:14 -0500
commitdcc1e8dd88d4bc55e32a26dad7633d20ffe606d2 (patch)
treea47592213d94f918867d3dd81bb91dac3e727dea
parent14778d9072e53d2171f66ffd9657daff41acfaed (diff)
[SPARC64]: Add a secondary TSB for hugepage mappings.
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/sparc64/Kconfig4
-rw-r--r--arch/sparc64/kernel/sun4v_tlb_miss.S39
-rw-r--r--arch/sparc64/kernel/traps.c21
-rw-r--r--arch/sparc64/kernel/tsb.S210
-rw-r--r--arch/sparc64/mm/fault.c15
-rw-r--r--arch/sparc64/mm/hugetlbpage.c28
-rw-r--r--arch/sparc64/mm/init.c21
-rw-r--r--arch/sparc64/mm/tsb.c234
-rw-r--r--include/asm-sparc64/cpudata.h5
-rw-r--r--include/asm-sparc64/mmu.h29
-rw-r--r--include/asm-sparc64/mmu_context.h21
-rw-r--r--include/asm-sparc64/page.h34
-rw-r--r--include/asm-sparc64/pgtable.h2
13 files changed, 462 insertions, 201 deletions
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index c3685b314d71..267afddf63cf 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -175,11 +175,11 @@ config HUGETLB_PAGE_SIZE_4MB
175 bool "4MB" 175 bool "4MB"
176 176
177config HUGETLB_PAGE_SIZE_512K 177config HUGETLB_PAGE_SIZE_512K
178 depends on !SPARC64_PAGE_SIZE_4MB 178 depends on !SPARC64_PAGE_SIZE_4MB && !SPARC64_PAGE_SIZE_512KB
179 bool "512K" 179 bool "512K"
180 180
181config HUGETLB_PAGE_SIZE_64K 181config HUGETLB_PAGE_SIZE_64K
182 depends on !SPARC64_PAGE_SIZE_4MB && !SPARC64_PAGE_SIZE_512KB 182 depends on !SPARC64_PAGE_SIZE_4MB && !SPARC64_PAGE_SIZE_512KB && !SPARC64_PAGE_SIZE_64K
183 bool "64K" 183 bool "64K"
184 184
185endchoice 185endchoice
diff --git a/arch/sparc64/kernel/sun4v_tlb_miss.S b/arch/sparc64/kernel/sun4v_tlb_miss.S
index ab23ddb7116e..b731881224e8 100644
--- a/arch/sparc64/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc64/kernel/sun4v_tlb_miss.S
@@ -29,15 +29,15 @@
29 * 29 *
30 * index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL; 30 * index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL;
31 * tsb_base = tsb_reg & ~0x7UL; 31 * tsb_base = tsb_reg & ~0x7UL;
32 * tsb_index = ((vaddr >> PAGE_SHIFT) & tsb_mask); 32 * tsb_index = ((vaddr >> HASH_SHIFT) & tsb_mask);
33 * tsb_ptr = tsb_base + (tsb_index * 16); 33 * tsb_ptr = tsb_base + (tsb_index * 16);
34 */ 34 */
35#define COMPUTE_TSB_PTR(TSB_PTR, VADDR, TMP1, TMP2) \ 35#define COMPUTE_TSB_PTR(TSB_PTR, VADDR, HASH_SHIFT, TMP1, TMP2) \
36 and TSB_PTR, 0x7, TMP1; \ 36 and TSB_PTR, 0x7, TMP1; \
37 mov 512, TMP2; \ 37 mov 512, TMP2; \
38 andn TSB_PTR, 0x7, TSB_PTR; \ 38 andn TSB_PTR, 0x7, TSB_PTR; \
39 sllx TMP2, TMP1, TMP2; \ 39 sllx TMP2, TMP1, TMP2; \
40 srlx VADDR, PAGE_SHIFT, TMP1; \ 40 srlx VADDR, HASH_SHIFT, TMP1; \
41 sub TMP2, 1, TMP2; \ 41 sub TMP2, 1, TMP2; \
42 and TMP1, TMP2, TMP1; \ 42 and TMP1, TMP2, TMP1; \
43 sllx TMP1, 4, TMP1; \ 43 sllx TMP1, 4, TMP1; \
@@ -53,7 +53,7 @@ sun4v_itlb_miss:
53 53
54 LOAD_ITLB_INFO(%g2, %g4, %g5) 54 LOAD_ITLB_INFO(%g2, %g4, %g5)
55 COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_itlb_4v) 55 COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_itlb_4v)
56 COMPUTE_TSB_PTR(%g1, %g4, %g3, %g7) 56 COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g3, %g7)
57 57
58 /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ 58 /* Load TSB tag/pte into %g2/%g3 and compare the tag. */
59 ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2 59 ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2
@@ -99,7 +99,7 @@ sun4v_dtlb_miss:
99 99
100 LOAD_DTLB_INFO(%g2, %g4, %g5) 100 LOAD_DTLB_INFO(%g2, %g4, %g5)
101 COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_dtlb_4v) 101 COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_dtlb_4v)
102 COMPUTE_TSB_PTR(%g1, %g4, %g3, %g7) 102 COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g3, %g7)
103 103
104 /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ 104 /* Load TSB tag/pte into %g2/%g3 and compare the tag. */
105 ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2 105 ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2
@@ -171,21 +171,26 @@ sun4v_dtsb_miss:
171 171
172 /* fallthrough */ 172 /* fallthrough */
173 173
174 /* Create TSB pointer into %g1. This is something like:
175 *
176 * index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL;
177 * tsb_base = tsb_reg & ~0x7UL;
178 * tsb_index = ((vaddr >> PAGE_SHIFT) & tsb_mask);
179 * tsb_ptr = tsb_base + (tsb_index * 16);
180 */
181sun4v_tsb_miss_common: 174sun4v_tsb_miss_common:
182 COMPUTE_TSB_PTR(%g1, %g4, %g5, %g7) 175 COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g5, %g7)
183 176
184 /* Branch directly to page table lookup. We have SCRATCHPAD_MMU_MISS
185 * still in %g2, so it's quite trivial to get at the PGD PHYS value
186 * so we can preload it into %g7.
187 */
188 sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2 177 sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2
178
179#ifdef CONFIG_HUGETLB_PAGE
180 mov SCRATCHPAD_UTSBREG2, %g5
181 ldxa [%g5] ASI_SCRATCHPAD, %g5
182 cmp %g5, -1
183 be,pt %xcc, 80f
184 nop
185 COMPUTE_TSB_PTR(%g5, %g4, HPAGE_SHIFT, %g2, %g7)
186
187 /* That clobbered %g2, reload it. */
188 ldxa [%g0] ASI_SCRATCHPAD, %g2
189 sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2
190
19180: stx %g5, [%g2 + TRAP_PER_CPU_TSB_HUGE_TEMP]
192#endif
193
189 ba,pt %xcc, tsb_miss_page_table_walk_sun4v_fastpath 194 ba,pt %xcc, tsb_miss_page_table_walk_sun4v_fastpath
190 ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7 195 ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7
191 196
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 7f7dba0ca96a..df612e4f75f9 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -2482,6 +2482,7 @@ void init_cur_cpu_trap(struct thread_info *t)
2482 2482
2483extern void thread_info_offsets_are_bolixed_dave(void); 2483extern void thread_info_offsets_are_bolixed_dave(void);
2484extern void trap_per_cpu_offsets_are_bolixed_dave(void); 2484extern void trap_per_cpu_offsets_are_bolixed_dave(void);
2485extern void tsb_config_offsets_are_bolixed_dave(void);
2485 2486
2486/* Only invoked on boot processor. */ 2487/* Only invoked on boot processor. */
2487void __init trap_init(void) 2488void __init trap_init(void)
@@ -2535,9 +2536,27 @@ void __init trap_init(void)
2535 (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA != 2536 (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
2536 offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) || 2537 offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) ||
2537 (TRAP_PER_CPU_CPU_LIST_PA != 2538 (TRAP_PER_CPU_CPU_LIST_PA !=
2538 offsetof(struct trap_per_cpu, cpu_list_pa))) 2539 offsetof(struct trap_per_cpu, cpu_list_pa)) ||
2540 (TRAP_PER_CPU_TSB_HUGE !=
2541 offsetof(struct trap_per_cpu, tsb_huge)) ||
2542 (TRAP_PER_CPU_TSB_HUGE_TEMP !=
2543 offsetof(struct trap_per_cpu, tsb_huge_temp)))
2539 trap_per_cpu_offsets_are_bolixed_dave(); 2544 trap_per_cpu_offsets_are_bolixed_dave();
2540 2545
2546 if ((TSB_CONFIG_TSB !=
2547 offsetof(struct tsb_config, tsb)) ||
2548 (TSB_CONFIG_RSS_LIMIT !=
2549 offsetof(struct tsb_config, tsb_rss_limit)) ||
2550 (TSB_CONFIG_NENTRIES !=
2551 offsetof(struct tsb_config, tsb_nentries)) ||
2552 (TSB_CONFIG_REG_VAL !=
2553 offsetof(struct tsb_config, tsb_reg_val)) ||
2554 (TSB_CONFIG_MAP_VADDR !=
2555 offsetof(struct tsb_config, tsb_map_vaddr)) ||
2556 (TSB_CONFIG_MAP_PTE !=
2557 offsetof(struct tsb_config, tsb_map_pte)))
2558 tsb_config_offsets_are_bolixed_dave();
2559
2541 /* Attach to the address space of init_task. On SMP we 2560 /* Attach to the address space of init_task. On SMP we
2542 * do this in smp.c:smp_callin for other cpus. 2561 * do this in smp.c:smp_callin for other cpus.
2543 */ 2562 */
diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S
index 118baea44f69..a0c8ba58920b 100644
--- a/arch/sparc64/kernel/tsb.S
+++ b/arch/sparc64/kernel/tsb.S
@@ -3,8 +3,13 @@
3 * Copyright (C) 2006 David S. Miller <davem@davemloft.net> 3 * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
4 */ 4 */
5 5
6#include <linux/config.h>
7
6#include <asm/tsb.h> 8#include <asm/tsb.h>
7#include <asm/hypervisor.h> 9#include <asm/hypervisor.h>
10#include <asm/page.h>
11#include <asm/cpudata.h>
12#include <asm/mmu.h>
8 13
9 .text 14 .text
10 .align 32 15 .align 32
@@ -34,34 +39,124 @@ tsb_miss_itlb:
34 ldxa [%g4] ASI_IMMU, %g4 39 ldxa [%g4] ASI_IMMU, %g4
35 40
36 /* At this point we have: 41 /* At this point we have:
37 * %g1 -- TSB entry address 42 * %g1 -- PAGE_SIZE TSB entry address
38 * %g3 -- FAULT_CODE_{D,I}TLB 43 * %g3 -- FAULT_CODE_{D,I}TLB
39 * %g4 -- missing virtual address 44 * %g4 -- missing virtual address
40 * %g6 -- TAG TARGET (vaddr >> 22) 45 * %g6 -- TAG TARGET (vaddr >> 22)
41 */ 46 */
42tsb_miss_page_table_walk: 47tsb_miss_page_table_walk:
43 TRAP_LOAD_PGD_PHYS(%g7, %g5) 48 TRAP_LOAD_TRAP_BLOCK(%g7, %g5)
44 49
45 /* And now we have the PGD base physical address in %g7. */ 50 /* Before committing to a full page table walk,
46tsb_miss_page_table_walk_sun4v_fastpath: 51 * check the huge page TSB.
47 USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) 52 */
53#ifdef CONFIG_HUGETLB_PAGE
54
55661: ldx [%g7 + TRAP_PER_CPU_TSB_HUGE], %g5
56 nop
57 .section .sun4v_2insn_patch, "ax"
58 .word 661b
59 mov SCRATCHPAD_UTSBREG2, %g5
60 ldxa [%g5] ASI_SCRATCHPAD, %g5
61 .previous
62
63 cmp %g5, -1
64 be,pt %xcc, 80f
65 nop
66
67 /* We need an aligned pair of registers containing 2 values
68 * which can be easily rematerialized. %g6 and %g7 foot the
69 * bill just nicely. We'll save %g6 away into %g2 for the
70 * huge page TSB TAG comparison.
71 *
72 * Perform a huge page TSB lookup.
73 */
74 mov %g6, %g2
75 and %g5, 0x7, %g6
76 mov 512, %g7
77 andn %g5, 0x7, %g5
78 sllx %g7, %g6, %g7
79 srlx %g4, HPAGE_SHIFT, %g6
80 sub %g7, 1, %g7
81 and %g6, %g7, %g6
82 sllx %g6, 4, %g6
83 add %g5, %g6, %g5
84
85 TSB_LOAD_QUAD(%g5, %g6)
86 cmp %g6, %g2
87 be,a,pt %xcc, tsb_tlb_reload
88 mov %g7, %g5
89
90 /* No match, remember the huge page TSB entry address,
91 * and restore %g6 and %g7.
92 */
93 TRAP_LOAD_TRAP_BLOCK(%g7, %g6)
94 srlx %g4, 22, %g6
9580: stx %g5, [%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP]
96
97#endif
98
99 ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7
48 100
49 /* At this point we have: 101 /* At this point we have:
50 * %g1 -- TSB entry address 102 * %g1 -- TSB entry address
51 * %g3 -- FAULT_CODE_{D,I}TLB 103 * %g3 -- FAULT_CODE_{D,I}TLB
52 * %g5 -- physical address of PTE in Linux page tables 104 * %g4 -- missing virtual address
53 * %g6 -- TAG TARGET (vaddr >> 22) 105 * %g6 -- TAG TARGET (vaddr >> 22)
106 * %g7 -- page table physical address
107 *
108 * We know that both the base PAGE_SIZE TSB and the HPAGE_SIZE
109 * TSB both lack a matching entry.
54 */ 110 */
55tsb_reload: 111tsb_miss_page_table_walk_sun4v_fastpath:
56 TSB_LOCK_TAG(%g1, %g2, %g7) 112 USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
57 113
58 /* Load and check PTE. */ 114 /* Load and check PTE. */
59 ldxa [%g5] ASI_PHYS_USE_EC, %g5 115 ldxa [%g5] ASI_PHYS_USE_EC, %g5
60 mov 1, %g7 116 brgez,pn %g5, tsb_do_fault
61 sllx %g7, TSB_TAG_INVALID_BIT, %g7 117 nop
62 brgez,a,pn %g5, tsb_do_fault 118
63 TSB_STORE(%g1, %g7) 119#ifdef CONFIG_HUGETLB_PAGE
120661: sethi %uhi(_PAGE_SZALL_4U), %g7
121 sllx %g7, 32, %g7
122 .section .sun4v_2insn_patch, "ax"
123 .word 661b
124 mov _PAGE_SZALL_4V, %g7
125 nop
126 .previous
127
128 and %g5, %g7, %g2
129
130661: sethi %uhi(_PAGE_SZHUGE_4U), %g7
131 sllx %g7, 32, %g7
132 .section .sun4v_2insn_patch, "ax"
133 .word 661b
134 mov _PAGE_SZHUGE_4V, %g7
135 nop
136 .previous
137
138 cmp %g2, %g7
139 bne,pt %xcc, 60f
140 nop
141
142 /* It is a huge page, use huge page TSB entry address we
143 * calculated above.
144 */
145 TRAP_LOAD_TRAP_BLOCK(%g7, %g2)
146 ldx [%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP], %g2
147 cmp %g2, -1
148 movne %xcc, %g2, %g1
14960:
150#endif
64 151
152 /* At this point we have:
153 * %g1 -- TSB entry address
154 * %g3 -- FAULT_CODE_{D,I}TLB
155 * %g5 -- valid PTE
156 * %g6 -- TAG TARGET (vaddr >> 22)
157 */
158tsb_reload:
159 TSB_LOCK_TAG(%g1, %g2, %g7)
65 TSB_WRITE(%g1, %g5, %g6) 160 TSB_WRITE(%g1, %g5, %g6)
66 161
67 /* Finally, load TLB and return from trap. */ 162 /* Finally, load TLB and return from trap. */
@@ -240,10 +335,9 @@ tsb_flush:
240 * schedule() time. 335 * schedule() time.
241 * 336 *
242 * %o0: page table physical address 337 * %o0: page table physical address
243 * %o1: TSB register value 338 * %o1: TSB base config pointer
244 * %o2: TSB virtual address 339 * %o2: TSB huge config pointer, or NULL if none
245 * %o3: TSB mapping locked PTE 340 * %o3: Hypervisor TSB descriptor physical address
246 * %o4: Hypervisor TSB descriptor physical address
247 * 341 *
248 * We have to run this whole thing with interrupts 342 * We have to run this whole thing with interrupts
249 * disabled so that the current cpu doesn't change 343 * disabled so that the current cpu doesn't change
@@ -253,63 +347,79 @@ tsb_flush:
253 .globl __tsb_context_switch 347 .globl __tsb_context_switch
254 .type __tsb_context_switch,#function 348 .type __tsb_context_switch,#function
255__tsb_context_switch: 349__tsb_context_switch:
256 rdpr %pstate, %o5 350 rdpr %pstate, %g1
257 wrpr %o5, PSTATE_IE, %pstate 351 wrpr %g1, PSTATE_IE, %pstate
352
353 TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
258 354
259 ldub [%g6 + TI_CPU], %g1
260 sethi %hi(trap_block), %g2
261 sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g1
262 or %g2, %lo(trap_block), %g2
263 add %g2, %g1, %g2
264 stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR] 355 stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR]
265 356
266 sethi %hi(tlb_type), %g1 357 ldx [%o1 + TSB_CONFIG_REG_VAL], %o0
267 lduw [%g1 + %lo(tlb_type)], %g1 358 brz,pt %o2, 1f
268 cmp %g1, 3 359 mov -1, %g3
269 bne,pt %icc, 1f 360
361 ldx [%o2 + TSB_CONFIG_REG_VAL], %g3
362
3631: stx %g3, [%g2 + TRAP_PER_CPU_TSB_HUGE]
364
365 sethi %hi(tlb_type), %g2
366 lduw [%g2 + %lo(tlb_type)], %g2
367 cmp %g2, 3
368 bne,pt %icc, 50f
270 nop 369 nop
271 370
272 /* Hypervisor TSB switch. */ 371 /* Hypervisor TSB switch. */
273 mov SCRATCHPAD_UTSBREG1, %g1 372 mov SCRATCHPAD_UTSBREG1, %o5
274 stxa %o1, [%g1] ASI_SCRATCHPAD 373 stxa %o0, [%o5] ASI_SCRATCHPAD
275 mov -1, %g2 374 mov SCRATCHPAD_UTSBREG2, %o5
276 mov SCRATCHPAD_UTSBREG2, %g1 375 stxa %g3, [%o5] ASI_SCRATCHPAD
277 stxa %g2, [%g1] ASI_SCRATCHPAD 376
278 377 mov 2, %o0
279 /* Save away %o5's %pstate, we have to use %o5 for 378 cmp %g3, -1
280 * the hypervisor call. 379 move %xcc, 1, %o0
281 */
282 mov %o5, %g1
283 380
284 mov HV_FAST_MMU_TSB_CTXNON0, %o5 381 mov HV_FAST_MMU_TSB_CTXNON0, %o5
285 mov 1, %o0 382 mov %o3, %o1
286 mov %o4, %o1
287 ta HV_FAST_TRAP 383 ta HV_FAST_TRAP
288 384
289 /* Finish up and restore %o5. */ 385 /* Finish up. */
290 ba,pt %xcc, 9f 386 ba,pt %xcc, 9f
291 mov %g1, %o5 387 nop
292 388
293 /* SUN4U TSB switch. */ 389 /* SUN4U TSB switch. */
2941: mov TSB_REG, %g1 39050: mov TSB_REG, %o5
295 stxa %o1, [%g1] ASI_DMMU 391 stxa %o0, [%o5] ASI_DMMU
296 membar #Sync 392 membar #Sync
297 stxa %o1, [%g1] ASI_IMMU 393 stxa %o0, [%o5] ASI_IMMU
298 membar #Sync 394 membar #Sync
299 395
3002: brz %o2, 9f 3962: ldx [%o1 + TSB_CONFIG_MAP_VADDR], %o4
301 nop 397 brz %o4, 9f
398 ldx [%o1 + TSB_CONFIG_MAP_PTE], %o5
302 399
303 sethi %hi(sparc64_highest_unlocked_tlb_ent), %g2 400 sethi %hi(sparc64_highest_unlocked_tlb_ent), %g2
304 mov TLB_TAG_ACCESS, %g1 401 mov TLB_TAG_ACCESS, %g3
305 lduw [%g2 + %lo(sparc64_highest_unlocked_tlb_ent)], %g2 402 lduw [%g2 + %lo(sparc64_highest_unlocked_tlb_ent)], %g2
306 stxa %o2, [%g1] ASI_DMMU 403 stxa %o4, [%g3] ASI_DMMU
307 membar #Sync 404 membar #Sync
308 sllx %g2, 3, %g2 405 sllx %g2, 3, %g2
309 stxa %o3, [%g2] ASI_DTLB_DATA_ACCESS 406 stxa %o5, [%g2] ASI_DTLB_DATA_ACCESS
407 membar #Sync
408
409 brz,pt %o2, 9f
410 nop
411
412 ldx [%o2 + TSB_CONFIG_MAP_VADDR], %o4
413 ldx [%o2 + TSB_CONFIG_MAP_PTE], %o5
414 mov TLB_TAG_ACCESS, %g3
415 stxa %o4, [%g3] ASI_DMMU
416 membar #Sync
417 sub %g2, (1 << 3), %g2
418 stxa %o5, [%g2] ASI_DTLB_DATA_ACCESS
310 membar #Sync 419 membar #Sync
420
3119: 4219:
312 wrpr %o5, %pstate 422 wrpr %g1, %pstate
313 423
314 retl 424 retl
315 nop 425 nop
diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
index 63b6cc0cd5d5..d21ff3230c02 100644
--- a/arch/sparc64/mm/fault.c
+++ b/arch/sparc64/mm/fault.c
@@ -410,9 +410,18 @@ good_area:
410 up_read(&mm->mmap_sem); 410 up_read(&mm->mmap_sem);
411 411
412 mm_rss = get_mm_rss(mm); 412 mm_rss = get_mm_rss(mm);
413 if (unlikely(mm_rss >= mm->context.tsb_rss_limit)) 413#ifdef CONFIG_HUGETLB_PAGE
414 tsb_grow(mm, mm_rss); 414 mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE));
415 415#endif
416 if (unlikely(mm_rss >=
417 mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
418 tsb_grow(mm, MM_TSB_BASE, mm_rss);
419#ifdef CONFIG_HUGETLB_PAGE
420 mm_rss = mm->context.huge_pte_count;
421 if (unlikely(mm_rss >=
422 mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit))
423 tsb_grow(mm, MM_TSB_HUGE, mm_rss);
424#endif
416 return; 425 return;
417 426
418 /* 427 /*
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index a7a24869d045..0a1d4cd24cda 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -199,13 +199,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
199 pte_t *pte = NULL; 199 pte_t *pte = NULL;
200 200
201 pgd = pgd_offset(mm, addr); 201 pgd = pgd_offset(mm, addr);
202 if (pgd) { 202 pud = pud_alloc(mm, pgd, addr);
203 pud = pud_offset(pgd, addr); 203 if (pud) {
204 if (pud) { 204 pmd = pmd_alloc(mm, pud, addr);
205 pmd = pmd_alloc(mm, pud, addr); 205 if (pmd)
206 if (pmd) 206 pte = pte_alloc_map(mm, pmd, addr);
207 pte = pte_alloc_map(mm, pmd, addr);
208 }
209 } 207 }
210 return pte; 208 return pte;
211} 209}
@@ -231,13 +229,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
231 return pte; 229 return pte;
232} 230}
233 231
234#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
235
236void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 232void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
237 pte_t *ptep, pte_t entry) 233 pte_t *ptep, pte_t entry)
238{ 234{
239 int i; 235 int i;
240 236
237 if (!pte_present(*ptep) && pte_present(entry))
238 mm->context.huge_pte_count++;
239
241 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { 240 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
242 set_pte_at(mm, addr, ptep, entry); 241 set_pte_at(mm, addr, ptep, entry);
243 ptep++; 242 ptep++;
@@ -253,6 +252,8 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
253 int i; 252 int i;
254 253
255 entry = *ptep; 254 entry = *ptep;
255 if (pte_present(entry))
256 mm->context.huge_pte_count--;
256 257
257 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { 258 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
258 pte_clear(mm, addr, ptep); 259 pte_clear(mm, addr, ptep);
@@ -302,6 +303,15 @@ static void context_reload(void *__data)
302 303
303void hugetlb_prefault_arch_hook(struct mm_struct *mm) 304void hugetlb_prefault_arch_hook(struct mm_struct *mm)
304{ 305{
306 struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
307
308 if (likely(tp->tsb != NULL))
309 return;
310
311 tsb_grow(mm, MM_TSB_HUGE, 0);
312 tsb_context_switch(mm);
313 smp_tsb_sync(mm);
314
305 /* On UltraSPARC-III+ and later, configure the second half of 315 /* On UltraSPARC-III+ and later, configure the second half of
306 * the Data-TLB for huge pages. 316 * the Data-TLB for huge pages.
307 */ 317 */
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index c2b556106fc1..16d231703d6a 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -283,6 +283,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p
283 struct mm_struct *mm; 283 struct mm_struct *mm;
284 struct tsb *tsb; 284 struct tsb *tsb;
285 unsigned long tag, flags; 285 unsigned long tag, flags;
286 unsigned long tsb_index, tsb_hash_shift;
286 287
287 if (tlb_type != hypervisor) { 288 if (tlb_type != hypervisor) {
288 unsigned long pfn = pte_pfn(pte); 289 unsigned long pfn = pte_pfn(pte);
@@ -312,10 +313,26 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p
312 313
313 mm = vma->vm_mm; 314 mm = vma->vm_mm;
314 315
316 tsb_index = MM_TSB_BASE;
317 tsb_hash_shift = PAGE_SHIFT;
318
315 spin_lock_irqsave(&mm->context.lock, flags); 319 spin_lock_irqsave(&mm->context.lock, flags);
316 320
317 tsb = &mm->context.tsb[(address >> PAGE_SHIFT) & 321#ifdef CONFIG_HUGETLB_PAGE
318 (mm->context.tsb_nentries - 1UL)]; 322 if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) {
323 if ((tlb_type == hypervisor &&
324 (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
325 (tlb_type != hypervisor &&
326 (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U)) {
327 tsb_index = MM_TSB_HUGE;
328 tsb_hash_shift = HPAGE_SHIFT;
329 }
330 }
331#endif
332
333 tsb = mm->context.tsb_block[tsb_index].tsb;
334 tsb += ((address >> tsb_hash_shift) &
335 (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
319 tag = (address >> 22UL); 336 tag = (address >> 22UL);
320 tsb_insert(tsb, tag, pte_val(pte)); 337 tsb_insert(tsb, tag, pte_val(pte));
321 338
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
index b2064e2a44d6..beaa02810f0e 100644
--- a/arch/sparc64/mm/tsb.c
+++ b/arch/sparc64/mm/tsb.c
@@ -15,9 +15,9 @@
15 15
16extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; 16extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
17 17
18static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long nentries) 18static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries)
19{ 19{
20 vaddr >>= PAGE_SHIFT; 20 vaddr >>= hash_shift;
21 return vaddr & (nentries - 1); 21 return vaddr & (nentries - 1);
22} 22}
23 23
@@ -36,7 +36,8 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
36 unsigned long v; 36 unsigned long v;
37 37
38 for (v = start; v < end; v += PAGE_SIZE) { 38 for (v = start; v < end; v += PAGE_SIZE) {
39 unsigned long hash = tsb_hash(v, KERNEL_TSB_NENTRIES); 39 unsigned long hash = tsb_hash(v, PAGE_SHIFT,
40 KERNEL_TSB_NENTRIES);
40 struct tsb *ent = &swapper_tsb[hash]; 41 struct tsb *ent = &swapper_tsb[hash];
41 42
42 if (tag_compare(ent->tag, v)) { 43 if (tag_compare(ent->tag, v)) {
@@ -46,49 +47,91 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
46 } 47 }
47} 48}
48 49
49void flush_tsb_user(struct mmu_gather *mp) 50static void __flush_tsb_one(struct mmu_gather *mp, unsigned long hash_shift, unsigned long tsb, unsigned long nentries)
50{ 51{
51 struct mm_struct *mm = mp->mm; 52 unsigned long i;
52 unsigned long nentries, base, flags;
53 struct tsb *tsb;
54 int i;
55
56 spin_lock_irqsave(&mm->context.lock, flags);
57
58 tsb = mm->context.tsb;
59 nentries = mm->context.tsb_nentries;
60 53
61 if (tlb_type == cheetah_plus || tlb_type == hypervisor)
62 base = __pa(tsb);
63 else
64 base = (unsigned long) tsb;
65
66 for (i = 0; i < mp->tlb_nr; i++) { 54 for (i = 0; i < mp->tlb_nr; i++) {
67 unsigned long v = mp->vaddrs[i]; 55 unsigned long v = mp->vaddrs[i];
68 unsigned long tag, ent, hash; 56 unsigned long tag, ent, hash;
69 57
70 v &= ~0x1UL; 58 v &= ~0x1UL;
71 59
72 hash = tsb_hash(v, nentries); 60 hash = tsb_hash(v, hash_shift, nentries);
73 ent = base + (hash * sizeof(struct tsb)); 61 ent = tsb + (hash * sizeof(struct tsb));
74 tag = (v >> 22UL); 62 tag = (v >> 22UL);
75 63
76 tsb_flush(ent, tag); 64 tsb_flush(ent, tag);
77 } 65 }
66}
67
68void flush_tsb_user(struct mmu_gather *mp)
69{
70 struct mm_struct *mm = mp->mm;
71 unsigned long nentries, base, flags;
72
73 spin_lock_irqsave(&mm->context.lock, flags);
78 74
75 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
76 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
77 if (tlb_type == cheetah_plus || tlb_type == hypervisor)
78 base = __pa(base);
79 __flush_tsb_one(mp, PAGE_SHIFT, base, nentries);
80
81#ifdef CONFIG_HUGETLB_PAGE
82 if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
83 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
84 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
85 if (tlb_type == cheetah_plus || tlb_type == hypervisor)
86 base = __pa(base);
87 __flush_tsb_one(mp, HPAGE_SHIFT, base, nentries);
88 }
89#endif
79 spin_unlock_irqrestore(&mm->context.lock, flags); 90 spin_unlock_irqrestore(&mm->context.lock, flags);
80} 91}
81 92
82static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes) 93#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
94#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K
95#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K
96#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
97#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_64K
98#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_64K
99#elif defined(CONFIG_SPARC64_PAGE_SIZE_512KB)
100#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_512K
101#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_512K
102#elif defined(CONFIG_SPARC64_PAGE_SIZE_4MB)
103#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_4MB
104#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_4MB
105#else
106#error Broken base page size setting...
107#endif
108
109#ifdef CONFIG_HUGETLB_PAGE
110#if defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
111#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_64K
112#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_64K
113#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
114#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_512K
115#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_512K
116#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
117#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB
118#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB
119#else
120#error Broken huge page size setting...
121#endif
122#endif
123
124static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
83{ 125{
84 unsigned long tsb_reg, base, tsb_paddr; 126 unsigned long tsb_reg, base, tsb_paddr;
85 unsigned long page_sz, tte; 127 unsigned long page_sz, tte;
86 128
87 mm->context.tsb_nentries = tsb_bytes / sizeof(struct tsb); 129 mm->context.tsb_block[tsb_idx].tsb_nentries =
130 tsb_bytes / sizeof(struct tsb);
88 131
89 base = TSBMAP_BASE; 132 base = TSBMAP_BASE;
90 tte = pgprot_val(PAGE_KERNEL_LOCKED); 133 tte = pgprot_val(PAGE_KERNEL_LOCKED);
91 tsb_paddr = __pa(mm->context.tsb); 134 tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb);
92 BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); 135 BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
93 136
94 /* Use the smallest page size that can map the whole TSB 137 /* Use the smallest page size that can map the whole TSB
@@ -147,61 +190,49 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
147 /* Physical mapping, no locked TLB entry for TSB. */ 190 /* Physical mapping, no locked TLB entry for TSB. */
148 tsb_reg |= tsb_paddr; 191 tsb_reg |= tsb_paddr;
149 192
150 mm->context.tsb_reg_val = tsb_reg; 193 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
151 mm->context.tsb_map_vaddr = 0; 194 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0;
152 mm->context.tsb_map_pte = 0; 195 mm->context.tsb_block[tsb_idx].tsb_map_pte = 0;
153 } else { 196 } else {
154 tsb_reg |= base; 197 tsb_reg |= base;
155 tsb_reg |= (tsb_paddr & (page_sz - 1UL)); 198 tsb_reg |= (tsb_paddr & (page_sz - 1UL));
156 tte |= (tsb_paddr & ~(page_sz - 1UL)); 199 tte |= (tsb_paddr & ~(page_sz - 1UL));
157 200
158 mm->context.tsb_reg_val = tsb_reg; 201 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
159 mm->context.tsb_map_vaddr = base; 202 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base;
160 mm->context.tsb_map_pte = tte; 203 mm->context.tsb_block[tsb_idx].tsb_map_pte = tte;
161 } 204 }
162 205
163 /* Setup the Hypervisor TSB descriptor. */ 206 /* Setup the Hypervisor TSB descriptor. */
164 if (tlb_type == hypervisor) { 207 if (tlb_type == hypervisor) {
165 struct hv_tsb_descr *hp = &mm->context.tsb_descr; 208 struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx];
166 209
167 switch (PAGE_SIZE) { 210 switch (tsb_idx) {
168 case 8192: 211 case MM_TSB_BASE:
169 default: 212 hp->pgsz_idx = HV_PGSZ_IDX_BASE;
170 hp->pgsz_idx = HV_PGSZ_IDX_8K;
171 break; 213 break;
172 214#ifdef CONFIG_HUGETLB_PAGE
173 case 64 * 1024: 215 case MM_TSB_HUGE:
174 hp->pgsz_idx = HV_PGSZ_IDX_64K; 216 hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
175 break;
176
177 case 512 * 1024:
178 hp->pgsz_idx = HV_PGSZ_IDX_512K;
179 break;
180
181 case 4 * 1024 * 1024:
182 hp->pgsz_idx = HV_PGSZ_IDX_4MB;
183 break; 217 break;
218#endif
219 default:
220 BUG();
184 }; 221 };
185 hp->assoc = 1; 222 hp->assoc = 1;
186 hp->num_ttes = tsb_bytes / 16; 223 hp->num_ttes = tsb_bytes / 16;
187 hp->ctx_idx = 0; 224 hp->ctx_idx = 0;
188 switch (PAGE_SIZE) { 225 switch (tsb_idx) {
189 case 8192: 226 case MM_TSB_BASE:
190 default: 227 hp->pgsz_mask = HV_PGSZ_MASK_BASE;
191 hp->pgsz_mask = HV_PGSZ_MASK_8K;
192 break;
193
194 case 64 * 1024:
195 hp->pgsz_mask = HV_PGSZ_MASK_64K;
196 break;
197
198 case 512 * 1024:
199 hp->pgsz_mask = HV_PGSZ_MASK_512K;
200 break; 228 break;
201 229#ifdef CONFIG_HUGETLB_PAGE
202 case 4 * 1024 * 1024: 230 case MM_TSB_HUGE:
203 hp->pgsz_mask = HV_PGSZ_MASK_4MB; 231 hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
204 break; 232 break;
233#endif
234 default:
235 BUG();
205 }; 236 };
206 hp->tsb_base = tsb_paddr; 237 hp->tsb_base = tsb_paddr;
207 hp->resv = 0; 238 hp->resv = 0;
@@ -241,11 +272,11 @@ void __init tsb_cache_init(void)
241 } 272 }
242} 273}
243 274
244/* When the RSS of an address space exceeds mm->context.tsb_rss_limit, 275/* When the RSS of an address space exceeds tsb_rss_limit for a TSB,
245 * do_sparc64_fault() invokes this routine to try and grow the TSB. 276 * do_sparc64_fault() invokes this routine to try and grow it.
246 * 277 *
247 * When we reach the maximum TSB size supported, we stick ~0UL into 278 * When we reach the maximum TSB size supported, we stick ~0UL into
248 * mm->context.tsb_rss_limit so the grow checks in update_mmu_cache() 279 * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault()
249 * will not trigger any longer. 280 * will not trigger any longer.
250 * 281 *
251 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers 282 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
@@ -257,7 +288,7 @@ void __init tsb_cache_init(void)
257 * the number of entries that the current TSB can hold at once. Currently, 288 * the number of entries that the current TSB can hold at once. Currently,
258 * we trigger when the RSS hits 3/4 of the TSB capacity. 289 * we trigger when the RSS hits 3/4 of the TSB capacity.
259 */ 290 */
260void tsb_grow(struct mm_struct *mm, unsigned long rss) 291void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
261{ 292{
262 unsigned long max_tsb_size = 1 * 1024 * 1024; 293 unsigned long max_tsb_size = 1 * 1024 * 1024;
263 unsigned long new_size, old_size, flags; 294 unsigned long new_size, old_size, flags;
@@ -297,7 +328,8 @@ retry_tsb_alloc:
297 * down to a 0-order allocation and force no TSB 328 * down to a 0-order allocation and force no TSB
298 * growing for this address space. 329 * growing for this address space.
299 */ 330 */
300 if (mm->context.tsb == NULL && new_cache_index > 0) { 331 if (mm->context.tsb_block[tsb_index].tsb == NULL &&
332 new_cache_index > 0) {
301 new_cache_index = 0; 333 new_cache_index = 0;
302 new_size = 8192; 334 new_size = 8192;
303 new_rss_limit = ~0UL; 335 new_rss_limit = ~0UL;
@@ -307,8 +339,8 @@ retry_tsb_alloc:
307 /* If we failed on a TSB grow, we are under serious 339 /* If we failed on a TSB grow, we are under serious
308 * memory pressure so don't try to grow any more. 340 * memory pressure so don't try to grow any more.
309 */ 341 */
310 if (mm->context.tsb != NULL) 342 if (mm->context.tsb_block[tsb_index].tsb != NULL)
311 mm->context.tsb_rss_limit = ~0UL; 343 mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL;
312 return; 344 return;
313 } 345 }
314 346
@@ -339,23 +371,26 @@ retry_tsb_alloc:
339 */ 371 */
340 spin_lock_irqsave(&mm->context.lock, flags); 372 spin_lock_irqsave(&mm->context.lock, flags);
341 373
342 old_tsb = mm->context.tsb; 374 old_tsb = mm->context.tsb_block[tsb_index].tsb;
343 old_cache_index = (mm->context.tsb_reg_val & 0x7UL); 375 old_cache_index =
344 old_size = mm->context.tsb_nentries * sizeof(struct tsb); 376 (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL);
377 old_size = (mm->context.tsb_block[tsb_index].tsb_nentries *
378 sizeof(struct tsb));
345 379
346 380
347 /* Handle multiple threads trying to grow the TSB at the same time. 381 /* Handle multiple threads trying to grow the TSB at the same time.
348 * One will get in here first, and bump the size and the RSS limit. 382 * One will get in here first, and bump the size and the RSS limit.
349 * The others will get in here next and hit this check. 383 * The others will get in here next and hit this check.
350 */ 384 */
351 if (unlikely(old_tsb && (rss < mm->context.tsb_rss_limit))) { 385 if (unlikely(old_tsb &&
386 (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) {
352 spin_unlock_irqrestore(&mm->context.lock, flags); 387 spin_unlock_irqrestore(&mm->context.lock, flags);
353 388
354 kmem_cache_free(tsb_caches[new_cache_index], new_tsb); 389 kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
355 return; 390 return;
356 } 391 }
357 392
358 mm->context.tsb_rss_limit = new_rss_limit; 393 mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit;
359 394
360 if (old_tsb) { 395 if (old_tsb) {
361 extern void copy_tsb(unsigned long old_tsb_base, 396 extern void copy_tsb(unsigned long old_tsb_base,
@@ -372,8 +407,8 @@ retry_tsb_alloc:
372 copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size); 407 copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
373 } 408 }
374 409
375 mm->context.tsb = new_tsb; 410 mm->context.tsb_block[tsb_index].tsb = new_tsb;
376 setup_tsb_params(mm, new_size); 411 setup_tsb_params(mm, tsb_index, new_size);
377 412
378 spin_unlock_irqrestore(&mm->context.lock, flags); 413 spin_unlock_irqrestore(&mm->context.lock, flags);
379 414
@@ -394,40 +429,65 @@ retry_tsb_alloc:
394 429
395int init_new_context(struct task_struct *tsk, struct mm_struct *mm) 430int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
396{ 431{
432#ifdef CONFIG_HUGETLB_PAGE
433 unsigned long huge_pte_count;
434#endif
435 unsigned int i;
436
397 spin_lock_init(&mm->context.lock); 437 spin_lock_init(&mm->context.lock);
398 438
399 mm->context.sparc64_ctx_val = 0UL; 439 mm->context.sparc64_ctx_val = 0UL;
400 440
441#ifdef CONFIG_HUGETLB_PAGE
442 /* We reset it to zero because the fork() page copying
443 * will re-increment the counters as the parent PTEs are
444 * copied into the child address space.
445 */
446 huge_pte_count = mm->context.huge_pte_count;
447 mm->context.huge_pte_count = 0;
448#endif
449
401 /* copy_mm() copies over the parent's mm_struct before calling 450 /* copy_mm() copies over the parent's mm_struct before calling
402 * us, so we need to zero out the TSB pointer or else tsb_grow() 451 * us, so we need to zero out the TSB pointer or else tsb_grow()
403 * will be confused and think there is an older TSB to free up. 452 * will be confused and think there is an older TSB to free up.
404 */ 453 */
405 mm->context.tsb = NULL; 454 for (i = 0; i < MM_NUM_TSBS; i++)
455 mm->context.tsb_block[i].tsb = NULL;
406 456
407 /* If this is fork, inherit the parent's TSB size. We would 457 /* If this is fork, inherit the parent's TSB size. We would
408 * grow it to that size on the first page fault anyways. 458 * grow it to that size on the first page fault anyways.
409 */ 459 */
410 tsb_grow(mm, get_mm_rss(mm)); 460 tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
411 461
412 if (unlikely(!mm->context.tsb)) 462#ifdef CONFIG_HUGETLB_PAGE
463 if (unlikely(huge_pte_count))
464 tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
465#endif
466
467 if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
413 return -ENOMEM; 468 return -ENOMEM;
414 469
415 return 0; 470 return 0;
416} 471}
417 472
418void destroy_context(struct mm_struct *mm) 473static void tsb_destroy_one(struct tsb_config *tp)
419{ 474{
420 unsigned long flags, cache_index; 475 unsigned long cache_index;
421 476
422 cache_index = (mm->context.tsb_reg_val & 0x7UL); 477 if (!tp->tsb)
423 kmem_cache_free(tsb_caches[cache_index], mm->context.tsb); 478 return;
479 cache_index = tp->tsb_reg_val & 0x7UL;
480 kmem_cache_free(tsb_caches[cache_index], tp->tsb);
481 tp->tsb = NULL;
482 tp->tsb_reg_val = 0UL;
483}
424 484
425 /* We can remove these later, but for now it's useful 485void destroy_context(struct mm_struct *mm)
426 * to catch any bogus post-destroy_context() references 486{
427 * to the TSB. 487 unsigned long flags, i;
428 */ 488
429 mm->context.tsb = NULL; 489 for (i = 0; i < MM_NUM_TSBS; i++)
430 mm->context.tsb_reg_val = 0UL; 490 tsb_destroy_one(&mm->context.tsb_block[i]);
431 491
432 spin_lock_irqsave(&ctx_alloc_lock, flags); 492 spin_lock_irqsave(&ctx_alloc_lock, flags);
433 493
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
index c66a81bbc84d..9d6a6dbaf126 100644
--- a/include/asm-sparc64/cpudata.h
+++ b/include/asm-sparc64/cpudata.h
@@ -71,7 +71,8 @@ struct trap_per_cpu {
71/* Dcache line 7: Physical addresses of CPU send mondo block and CPU list. */ 71/* Dcache line 7: Physical addresses of CPU send mondo block and CPU list. */
72 unsigned long cpu_mondo_block_pa; 72 unsigned long cpu_mondo_block_pa;
73 unsigned long cpu_list_pa; 73 unsigned long cpu_list_pa;
74 unsigned long __pad1[2]; 74 unsigned long tsb_huge;
75 unsigned long tsb_huge_temp;
75 76
76/* Dcache line 8: Unused, needed to keep trap_block a power-of-2 in size. */ 77/* Dcache line 8: Unused, needed to keep trap_block a power-of-2 in size. */
77 unsigned long __pad2[4]; 78 unsigned long __pad2[4];
@@ -116,6 +117,8 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
116#define TRAP_PER_CPU_FAULT_INFO 0x40 117#define TRAP_PER_CPU_FAULT_INFO 0x40
117#define TRAP_PER_CPU_CPU_MONDO_BLOCK_PA 0xc0 118#define TRAP_PER_CPU_CPU_MONDO_BLOCK_PA 0xc0
118#define TRAP_PER_CPU_CPU_LIST_PA 0xc8 119#define TRAP_PER_CPU_CPU_LIST_PA 0xc8
120#define TRAP_PER_CPU_TSB_HUGE 0xd0
121#define TRAP_PER_CPU_TSB_HUGE_TEMP 0xd8
119 122
120#define TRAP_BLOCK_SZ_SHIFT 8 123#define TRAP_BLOCK_SZ_SHIFT 8
121 124
diff --git a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h
index 230ba678d3b0..2d4f2ea9568a 100644
--- a/include/asm-sparc64/mmu.h
+++ b/include/asm-sparc64/mmu.h
@@ -90,18 +90,39 @@ extern void __tsb_insert(unsigned long ent, unsigned long tag, unsigned long pte
90extern void tsb_flush(unsigned long ent, unsigned long tag); 90extern void tsb_flush(unsigned long ent, unsigned long tag);
91extern void tsb_init(struct tsb *tsb, unsigned long size); 91extern void tsb_init(struct tsb *tsb, unsigned long size);
92 92
93typedef struct { 93struct tsb_config {
94 spinlock_t lock;
95 unsigned long sparc64_ctx_val;
96 struct tsb *tsb; 94 struct tsb *tsb;
97 unsigned long tsb_rss_limit; 95 unsigned long tsb_rss_limit;
98 unsigned long tsb_nentries; 96 unsigned long tsb_nentries;
99 unsigned long tsb_reg_val; 97 unsigned long tsb_reg_val;
100 unsigned long tsb_map_vaddr; 98 unsigned long tsb_map_vaddr;
101 unsigned long tsb_map_pte; 99 unsigned long tsb_map_pte;
102 struct hv_tsb_descr tsb_descr; 100};
101
102#define MM_TSB_BASE 0
103
104#ifdef CONFIG_HUGETLB_PAGE
105#define MM_TSB_HUGE 1
106#define MM_NUM_TSBS 2
107#else
108#define MM_NUM_TSBS 1
109#endif
110
111typedef struct {
112 spinlock_t lock;
113 unsigned long sparc64_ctx_val;
114 unsigned long huge_pte_count;
115 struct tsb_config tsb_block[MM_NUM_TSBS];
116 struct hv_tsb_descr tsb_descr[MM_NUM_TSBS];
103} mm_context_t; 117} mm_context_t;
104 118
105#endif /* !__ASSEMBLY__ */ 119#endif /* !__ASSEMBLY__ */
106 120
121#define TSB_CONFIG_TSB 0x00
122#define TSB_CONFIG_RSS_LIMIT 0x08
123#define TSB_CONFIG_NENTRIES 0x10
124#define TSB_CONFIG_REG_VAL 0x18
125#define TSB_CONFIG_MAP_VADDR 0x20
126#define TSB_CONFIG_MAP_PTE 0x28
127
107#endif /* __MMU_H */ 128#endif /* __MMU_H */
diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h
index e7974321d052..2337eb487719 100644
--- a/include/asm-sparc64/mmu_context.h
+++ b/include/asm-sparc64/mmu_context.h
@@ -29,20 +29,25 @@ extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
29extern void destroy_context(struct mm_struct *mm); 29extern void destroy_context(struct mm_struct *mm);
30 30
31extern void __tsb_context_switch(unsigned long pgd_pa, 31extern void __tsb_context_switch(unsigned long pgd_pa,
32 unsigned long tsb_reg, 32 struct tsb_config *tsb_base,
33 unsigned long tsb_vaddr, 33 struct tsb_config *tsb_huge,
34 unsigned long tsb_pte,
35 unsigned long tsb_descr_pa); 34 unsigned long tsb_descr_pa);
36 35
37static inline void tsb_context_switch(struct mm_struct *mm) 36static inline void tsb_context_switch(struct mm_struct *mm)
38{ 37{
39 __tsb_context_switch(__pa(mm->pgd), mm->context.tsb_reg_val, 38 __tsb_context_switch(__pa(mm->pgd),
40 mm->context.tsb_map_vaddr, 39 &mm->context.tsb_block[0],
41 mm->context.tsb_map_pte, 40#ifdef CONFIG_HUGETLB_PAGE
42 __pa(&mm->context.tsb_descr)); 41 (mm->context.tsb_block[1].tsb ?
42 &mm->context.tsb_block[1] :
43 NULL)
44#else
45 NULL
46#endif
47 , __pa(&mm->context.tsb_descr[0]));
43} 48}
44 49
45extern void tsb_grow(struct mm_struct *mm, unsigned long mm_rss); 50extern void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long mm_rss);
46#ifdef CONFIG_SMP 51#ifdef CONFIG_SMP
47extern void smp_tsb_sync(struct mm_struct *mm); 52extern void smp_tsb_sync(struct mm_struct *mm);
48#else 53#else
diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h
index fcb2812265f4..66fe4ac59fd6 100644
--- a/include/asm-sparc64/page.h
+++ b/include/asm-sparc64/page.h
@@ -30,6 +30,23 @@
30 30
31#ifdef __KERNEL__ 31#ifdef __KERNEL__
32 32
33#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
34#define HPAGE_SHIFT 22
35#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
36#define HPAGE_SHIFT 19
37#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
38#define HPAGE_SHIFT 16
39#endif
40
41#ifdef CONFIG_HUGETLB_PAGE
42#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
43#define HPAGE_MASK (~(HPAGE_SIZE - 1UL))
44#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
45#define ARCH_HAS_SETCLEAR_HUGE_PTE
46#define ARCH_HAS_HUGETLB_PREFAULT_HOOK
47#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
48#endif
49
33#ifndef __ASSEMBLY__ 50#ifndef __ASSEMBLY__
34 51
35extern void _clear_page(void *page); 52extern void _clear_page(void *page);
@@ -90,23 +107,6 @@ typedef unsigned long pgprot_t;
90 107
91#endif /* (STRICT_MM_TYPECHECKS) */ 108#endif /* (STRICT_MM_TYPECHECKS) */
92 109
93#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
94#define HPAGE_SHIFT 22
95#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
96#define HPAGE_SHIFT 19
97#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
98#define HPAGE_SHIFT 16
99#endif
100
101#ifdef CONFIG_HUGETLB_PAGE
102#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
103#define HPAGE_MASK (~(HPAGE_SIZE - 1UL))
104#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
105#define ARCH_HAS_SETCLEAR_HUGE_PTE
106#define ARCH_HAS_HUGETLB_PREFAULT_HOOK
107#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
108#endif
109
110#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ 110#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
111 (_AC(0x0000000070000000,UL)) : \ 111 (_AC(0x0000000070000000,UL)) : \
112 (_AC(0xfffff80000000000,UL) + (1UL << 32UL))) 112 (_AC(0xfffff80000000000,UL) + (1UL << 32UL)))
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index ed4124edf837..c44e7466534e 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -105,6 +105,7 @@
105#define _PAGE_RES1_4U _AC(0x0002000000000000,UL) /* Reserved */ 105#define _PAGE_RES1_4U _AC(0x0002000000000000,UL) /* Reserved */
106#define _PAGE_SZ32MB_4U _AC(0x0001000000000000,UL) /* (Panther) 32MB page */ 106#define _PAGE_SZ32MB_4U _AC(0x0001000000000000,UL) /* (Panther) 32MB page */
107#define _PAGE_SZ256MB_4U _AC(0x2001000000000000,UL) /* (Panther) 256MB page */ 107#define _PAGE_SZ256MB_4U _AC(0x2001000000000000,UL) /* (Panther) 256MB page */
108#define _PAGE_SZALL_4U _AC(0x6001000000000000,UL) /* All pgsz bits */
108#define _PAGE_SN_4U _AC(0x0000800000000000,UL) /* (Cheetah) Snoop */ 109#define _PAGE_SN_4U _AC(0x0000800000000000,UL) /* (Cheetah) Snoop */
109#define _PAGE_RES2_4U _AC(0x0000780000000000,UL) /* Reserved */ 110#define _PAGE_RES2_4U _AC(0x0000780000000000,UL) /* Reserved */
110#define _PAGE_PADDR_4U _AC(0x000007FFFFFFE000,UL) /* (Cheetah) pa[42:13] */ 111#define _PAGE_PADDR_4U _AC(0x000007FFFFFFE000,UL) /* (Cheetah) pa[42:13] */
@@ -150,6 +151,7 @@
150#define _PAGE_SZ512K_4V _AC(0x0000000000000002,UL) /* 512K Page */ 151#define _PAGE_SZ512K_4V _AC(0x0000000000000002,UL) /* 512K Page */
151#define _PAGE_SZ64K_4V _AC(0x0000000000000001,UL) /* 64K Page */ 152#define _PAGE_SZ64K_4V _AC(0x0000000000000001,UL) /* 64K Page */
152#define _PAGE_SZ8K_4V _AC(0x0000000000000000,UL) /* 8K Page */ 153#define _PAGE_SZ8K_4V _AC(0x0000000000000000,UL) /* 8K Page */
154#define _PAGE_SZALL_4V _AC(0x0000000000000007,UL) /* All pgsz bits */
153 155
154#if PAGE_SHIFT == 13 156#if PAGE_SHIFT == 13
155#define _PAGE_SZBITS_4U _PAGE_SZ8K_4U 157#define _PAGE_SZBITS_4U _PAGE_SZ8K_4U