aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 13:16:16 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 13:16:16 -0400
commit5a148af66932c31814e263366094b5812210b501 (patch)
treec5155ae89d7109533b8b073631bd65a7dd394b9d /arch/powerpc/mm
parent99c6bcf46d2233d33e441834e958ed0bc22b190a (diff)
parent54d5999d98f2ab36ad71b9ef4d82cf5f399205f5 (diff)
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
Pull powerpc update from Benjamin Herrenschmidt: "The main highlights this time around are: - A pile of addition POWER8 bits and nits, such as updated performance counter support (Michael Ellerman), new branch history buffer support (Anshuman Khandual), base support for the new PCI host bridge when not using the hypervisor (Gavin Shan) and other random related bits and fixes from various contributors. - Some rework of our page table format by Aneesh Kumar which fixes a thing or two and paves the way for THP support. THP itself will not make it this time around however. - More Freescale updates, including Altivec support on the new e6500 cores, new PCI controller support, and a pile of new boards support and updates. - The usual batch of trivial cleanups & fixes" * 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (156 commits) powerpc: Fix build error for book3e powerpc: Context switch the new EBB SPRs powerpc: Turn on the EBB H/FSCR bits powerpc: Replace CPU_FTR_BCTAR with CPU_FTR_ARCH_207S powerpc: Setup BHRB instructions facility in HFSCR for POWER8 powerpc: Fix interrupt range check on debug exception powerpc: Update tlbie/tlbiel as per ISA doc powerpc: Print page size info during boot powerpc: print both base and actual page size on hash failure powerpc: Fix hpte_decode to use the correct decoding for page sizes powerpc: Decode the pte-lp-encoding bits correctly. powerpc: Use encode avpn where we need only avpn values powerpc: Reduce PTE table memory wastage powerpc: Move the pte free routines from common header powerpc: Reduce the PTE_INDEX_SIZE powerpc: Switch 16GB and 16MB explicit hugepages to a different page table format powerpc: New hugepage directory format powerpc: Don't truncate pgd_index wrongly powerpc: Don't hard code the size of pte page powerpc: Save DAR and DSISR in pt_regs on MCE ...
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/gup.c18
-rw-r--r--arch/powerpc/mm/hash_low_64.S22
-rw-r--r--arch/powerpc/mm/hash_native_64.c178
-rw-r--r--arch/powerpc/mm/hash_utils_64.c183
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c33
-rw-r--r--arch/powerpc/mm/hugetlbpage.c192
-rw-r--r--arch/powerpc/mm/icswx.c2
-rw-r--r--arch/powerpc/mm/init_64.c3
-rw-r--r--arch/powerpc/mm/mem.c3
-rw-r--r--arch/powerpc/mm/mmu_context_hash64.c37
-rw-r--r--arch/powerpc/mm/numa.c278
-rw-r--r--arch/powerpc/mm/pgtable_64.c118
-rw-r--r--arch/powerpc/mm/slice.c223
-rw-r--r--arch/powerpc/mm/tlb_nohash.c18
14 files changed, 929 insertions, 379 deletions
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
index d7efdbf640c7..4b921affa495 100644
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -68,7 +68,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
68 next = pmd_addr_end(addr, end); 68 next = pmd_addr_end(addr, end);
69 if (pmd_none(pmd)) 69 if (pmd_none(pmd))
70 return 0; 70 return 0;
71 if (is_hugepd(pmdp)) { 71 if (pmd_huge(pmd)) {
72 if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next,
73 write, pages, nr))
74 return 0;
75 } else if (is_hugepd(pmdp)) {
72 if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, 76 if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT,
73 addr, next, write, pages, nr)) 77 addr, next, write, pages, nr))
74 return 0; 78 return 0;
@@ -92,7 +96,11 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
92 next = pud_addr_end(addr, end); 96 next = pud_addr_end(addr, end);
93 if (pud_none(pud)) 97 if (pud_none(pud))
94 return 0; 98 return 0;
95 if (is_hugepd(pudp)) { 99 if (pud_huge(pud)) {
100 if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next,
101 write, pages, nr))
102 return 0;
103 } else if (is_hugepd(pudp)) {
96 if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT, 104 if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT,
97 addr, next, write, pages, nr)) 105 addr, next, write, pages, nr))
98 return 0; 106 return 0;
@@ -153,7 +161,11 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
153 next = pgd_addr_end(addr, end); 161 next = pgd_addr_end(addr, end);
154 if (pgd_none(pgd)) 162 if (pgd_none(pgd))
155 goto slow; 163 goto slow;
156 if (is_hugepd(pgdp)) { 164 if (pgd_huge(pgd)) {
165 if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next,
166 write, pages, &nr))
167 goto slow;
168 } else if (is_hugepd(pgdp)) {
157 if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, 169 if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
158 addr, next, write, pages, &nr)) 170 addr, next, write, pages, &nr))
159 goto slow; 171 goto slow;
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 7443481a315c..0e980acae67c 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -196,7 +196,8 @@ htab_insert_pte:
196 mr r4,r29 /* Retrieve vpn */ 196 mr r4,r29 /* Retrieve vpn */
197 li r7,0 /* !bolted, !secondary */ 197 li r7,0 /* !bolted, !secondary */
198 li r8,MMU_PAGE_4K /* page size */ 198 li r8,MMU_PAGE_4K /* page size */
199 ld r9,STK_PARAM(R9)(r1) /* segment size */ 199 li r9,MMU_PAGE_4K /* actual page size */
200 ld r10,STK_PARAM(R9)(r1) /* segment size */
200_GLOBAL(htab_call_hpte_insert1) 201_GLOBAL(htab_call_hpte_insert1)
201 bl . /* Patched by htab_finish_init() */ 202 bl . /* Patched by htab_finish_init() */
202 cmpdi 0,r3,0 203 cmpdi 0,r3,0
@@ -219,7 +220,8 @@ _GLOBAL(htab_call_hpte_insert1)
219 mr r4,r29 /* Retrieve vpn */ 220 mr r4,r29 /* Retrieve vpn */
220 li r7,HPTE_V_SECONDARY /* !bolted, secondary */ 221 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
221 li r8,MMU_PAGE_4K /* page size */ 222 li r8,MMU_PAGE_4K /* page size */
222 ld r9,STK_PARAM(R9)(r1) /* segment size */ 223 li r9,MMU_PAGE_4K /* actual page size */
224 ld r10,STK_PARAM(R9)(r1) /* segment size */
223_GLOBAL(htab_call_hpte_insert2) 225_GLOBAL(htab_call_hpte_insert2)
224 bl . /* Patched by htab_finish_init() */ 226 bl . /* Patched by htab_finish_init() */
225 cmpdi 0,r3,0 227 cmpdi 0,r3,0
@@ -490,7 +492,7 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
490 beq htab_inval_old_hpte 492 beq htab_inval_old_hpte
491 493
492 ld r6,STK_PARAM(R6)(r1) 494 ld r6,STK_PARAM(R6)(r1)
493 ori r26,r6,0x8000 /* Load the hidx mask */ 495 ori r26,r6,PTE_PAGE_HIDX_OFFSET /* Load the hidx mask. */
494 ld r26,0(r26) 496 ld r26,0(r26)
495 addi r5,r25,36 /* Check actual HPTE_SUB bit, this */ 497 addi r5,r25,36 /* Check actual HPTE_SUB bit, this */
496 rldcr. r0,r31,r5,0 /* must match pgtable.h definition */ 498 rldcr. r0,r31,r5,0 /* must match pgtable.h definition */
@@ -515,7 +517,8 @@ htab_special_pfn:
515 mr r4,r29 /* Retrieve vpn */ 517 mr r4,r29 /* Retrieve vpn */
516 li r7,0 /* !bolted, !secondary */ 518 li r7,0 /* !bolted, !secondary */
517 li r8,MMU_PAGE_4K /* page size */ 519 li r8,MMU_PAGE_4K /* page size */
518 ld r9,STK_PARAM(R9)(r1) /* segment size */ 520 li r9,MMU_PAGE_4K /* actual page size */
521 ld r10,STK_PARAM(R9)(r1) /* segment size */
519_GLOBAL(htab_call_hpte_insert1) 522_GLOBAL(htab_call_hpte_insert1)
520 bl . /* patched by htab_finish_init() */ 523 bl . /* patched by htab_finish_init() */
521 cmpdi 0,r3,0 524 cmpdi 0,r3,0
@@ -542,7 +545,8 @@ _GLOBAL(htab_call_hpte_insert1)
542 mr r4,r29 /* Retrieve vpn */ 545 mr r4,r29 /* Retrieve vpn */
543 li r7,HPTE_V_SECONDARY /* !bolted, secondary */ 546 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
544 li r8,MMU_PAGE_4K /* page size */ 547 li r8,MMU_PAGE_4K /* page size */
545 ld r9,STK_PARAM(R9)(r1) /* segment size */ 548 li r9,MMU_PAGE_4K /* actual page size */
549 ld r10,STK_PARAM(R9)(r1) /* segment size */
546_GLOBAL(htab_call_hpte_insert2) 550_GLOBAL(htab_call_hpte_insert2)
547 bl . /* patched by htab_finish_init() */ 551 bl . /* patched by htab_finish_init() */
548 cmpdi 0,r3,0 552 cmpdi 0,r3,0
@@ -607,7 +611,7 @@ htab_pte_insert_ok:
607 sld r4,r4,r5 611 sld r4,r4,r5
608 andc r26,r26,r4 612 andc r26,r26,r4
609 or r26,r26,r3 613 or r26,r26,r3
610 ori r5,r6,0x8000 614 ori r5,r6,PTE_PAGE_HIDX_OFFSET
611 std r26,0(r5) 615 std r26,0(r5)
612 lwsync 616 lwsync
613 std r30,0(r6) 617 std r30,0(r6)
@@ -840,7 +844,8 @@ ht64_insert_pte:
840 mr r4,r29 /* Retrieve vpn */ 844 mr r4,r29 /* Retrieve vpn */
841 li r7,0 /* !bolted, !secondary */ 845 li r7,0 /* !bolted, !secondary */
842 li r8,MMU_PAGE_64K 846 li r8,MMU_PAGE_64K
843 ld r9,STK_PARAM(R9)(r1) /* segment size */ 847 li r9,MMU_PAGE_64K /* actual page size */
848 ld r10,STK_PARAM(R9)(r1) /* segment size */
844_GLOBAL(ht64_call_hpte_insert1) 849_GLOBAL(ht64_call_hpte_insert1)
845 bl . /* patched by htab_finish_init() */ 850 bl . /* patched by htab_finish_init() */
846 cmpdi 0,r3,0 851 cmpdi 0,r3,0
@@ -863,7 +868,8 @@ _GLOBAL(ht64_call_hpte_insert1)
863 mr r4,r29 /* Retrieve vpn */ 868 mr r4,r29 /* Retrieve vpn */
864 li r7,HPTE_V_SECONDARY /* !bolted, secondary */ 869 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
865 li r8,MMU_PAGE_64K 870 li r8,MMU_PAGE_64K
866 ld r9,STK_PARAM(R9)(r1) /* segment size */ 871 li r9,MMU_PAGE_64K /* actual page size */
872 ld r10,STK_PARAM(R9)(r1) /* segment size */
867_GLOBAL(ht64_call_hpte_insert2) 873_GLOBAL(ht64_call_hpte_insert2)
868 bl . /* patched by htab_finish_init() */ 874 bl . /* patched by htab_finish_init() */
869 cmpdi 0,r3,0 875 cmpdi 0,r3,0
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index ffc1e00f7a22..6a2aead5b0e5 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -39,7 +39,7 @@
39 39
40DEFINE_RAW_SPINLOCK(native_tlbie_lock); 40DEFINE_RAW_SPINLOCK(native_tlbie_lock);
41 41
42static inline void __tlbie(unsigned long vpn, int psize, int ssize) 42static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
43{ 43{
44 unsigned long va; 44 unsigned long va;
45 unsigned int penc; 45 unsigned int penc;
@@ -61,17 +61,31 @@ static inline void __tlbie(unsigned long vpn, int psize, int ssize)
61 61
62 switch (psize) { 62 switch (psize) {
63 case MMU_PAGE_4K: 63 case MMU_PAGE_4K:
64 /* clear out bits after (52) [0....52.....63] */
65 va &= ~((1ul << (64 - 52)) - 1);
64 va |= ssize << 8; 66 va |= ssize << 8;
67 va |= mmu_psize_defs[apsize].sllp << 6;
65 asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) 68 asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
66 : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) 69 : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
67 : "memory"); 70 : "memory");
68 break; 71 break;
69 default: 72 default:
70 /* We need 14 to 14 + i bits of va */ 73 /* We need 14 to 14 + i bits of va */
71 penc = mmu_psize_defs[psize].penc; 74 penc = mmu_psize_defs[psize].penc[apsize];
72 va &= ~((1ul << mmu_psize_defs[psize].shift) - 1); 75 va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
73 va |= penc << 12; 76 va |= penc << 12;
74 va |= ssize << 8; 77 va |= ssize << 8;
78 /* Add AVAL part */
79 if (psize != apsize) {
80 /*
81 * MPSS, 64K base page size and 16MB parge page size
82 * We don't need all the bits, but rest of the bits
83 * must be ignored by the processor.
84 * vpn cover upto 65 bits of va. (0...65) and we need
85 * 58..64 bits of va.
86 */
87 va |= (vpn & 0xfe);
88 }
75 va |= 1; /* L */ 89 va |= 1; /* L */
76 asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2) 90 asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
77 : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) 91 : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
@@ -80,7 +94,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int ssize)
80 } 94 }
81} 95}
82 96
83static inline void __tlbiel(unsigned long vpn, int psize, int ssize) 97static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
84{ 98{
85 unsigned long va; 99 unsigned long va;
86 unsigned int penc; 100 unsigned int penc;
@@ -96,16 +110,30 @@ static inline void __tlbiel(unsigned long vpn, int psize, int ssize)
96 110
97 switch (psize) { 111 switch (psize) {
98 case MMU_PAGE_4K: 112 case MMU_PAGE_4K:
113 /* clear out bits after(52) [0....52.....63] */
114 va &= ~((1ul << (64 - 52)) - 1);
99 va |= ssize << 8; 115 va |= ssize << 8;
116 va |= mmu_psize_defs[apsize].sllp << 6;
100 asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)" 117 asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
101 : : "r"(va) : "memory"); 118 : : "r"(va) : "memory");
102 break; 119 break;
103 default: 120 default:
104 /* We need 14 to 14 + i bits of va */ 121 /* We need 14 to 14 + i bits of va */
105 penc = mmu_psize_defs[psize].penc; 122 penc = mmu_psize_defs[psize].penc[apsize];
106 va &= ~((1ul << mmu_psize_defs[psize].shift) - 1); 123 va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
107 va |= penc << 12; 124 va |= penc << 12;
108 va |= ssize << 8; 125 va |= ssize << 8;
126 /* Add AVAL part */
127 if (psize != apsize) {
128 /*
129 * MPSS, 64K base page size and 16MB parge page size
130 * We don't need all the bits, but rest of the bits
131 * must be ignored by the processor.
132 * vpn cover upto 65 bits of va. (0...65) and we need
133 * 58..64 bits of va.
134 */
135 va |= (vpn & 0xfe);
136 }
109 va |= 1; /* L */ 137 va |= 1; /* L */
110 asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)" 138 asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
111 : : "r"(va) : "memory"); 139 : : "r"(va) : "memory");
@@ -114,7 +142,8 @@ static inline void __tlbiel(unsigned long vpn, int psize, int ssize)
114 142
115} 143}
116 144
117static inline void tlbie(unsigned long vpn, int psize, int ssize, int local) 145static inline void tlbie(unsigned long vpn, int psize, int apsize,
146 int ssize, int local)
118{ 147{
119 unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL); 148 unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
120 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); 149 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
@@ -125,10 +154,10 @@ static inline void tlbie(unsigned long vpn, int psize, int ssize, int local)
125 raw_spin_lock(&native_tlbie_lock); 154 raw_spin_lock(&native_tlbie_lock);
126 asm volatile("ptesync": : :"memory"); 155 asm volatile("ptesync": : :"memory");
127 if (use_local) { 156 if (use_local) {
128 __tlbiel(vpn, psize, ssize); 157 __tlbiel(vpn, psize, apsize, ssize);
129 asm volatile("ptesync": : :"memory"); 158 asm volatile("ptesync": : :"memory");
130 } else { 159 } else {
131 __tlbie(vpn, psize, ssize); 160 __tlbie(vpn, psize, apsize, ssize);
132 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 161 asm volatile("eieio; tlbsync; ptesync": : :"memory");
133 } 162 }
134 if (lock_tlbie && !use_local) 163 if (lock_tlbie && !use_local)
@@ -156,7 +185,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep)
156 185
157static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, 186static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
158 unsigned long pa, unsigned long rflags, 187 unsigned long pa, unsigned long rflags,
159 unsigned long vflags, int psize, int ssize) 188 unsigned long vflags, int psize, int apsize, int ssize)
160{ 189{
161 struct hash_pte *hptep = htab_address + hpte_group; 190 struct hash_pte *hptep = htab_address + hpte_group;
162 unsigned long hpte_v, hpte_r; 191 unsigned long hpte_v, hpte_r;
@@ -183,8 +212,8 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
183 if (i == HPTES_PER_GROUP) 212 if (i == HPTES_PER_GROUP)
184 return -1; 213 return -1;
185 214
186 hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID; 215 hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
187 hpte_r = hpte_encode_r(pa, psize) | rflags; 216 hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
188 217
189 if (!(vflags & HPTE_V_BOLTED)) { 218 if (!(vflags & HPTE_V_BOLTED)) {
190 DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n", 219 DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
@@ -244,6 +273,51 @@ static long native_hpte_remove(unsigned long hpte_group)
244 return i; 273 return i;
245} 274}
246 275
276static inline int __hpte_actual_psize(unsigned int lp, int psize)
277{
278 int i, shift;
279 unsigned int mask;
280
281 /* start from 1 ignoring MMU_PAGE_4K */
282 for (i = 1; i < MMU_PAGE_COUNT; i++) {
283
284 /* invalid penc */
285 if (mmu_psize_defs[psize].penc[i] == -1)
286 continue;
287 /*
288 * encoding bits per actual page size
289 * PTE LP actual page size
290 * rrrr rrrz >=8KB
291 * rrrr rrzz >=16KB
292 * rrrr rzzz >=32KB
293 * rrrr zzzz >=64KB
294 * .......
295 */
296 shift = mmu_psize_defs[i].shift - LP_SHIFT;
297 if (shift > LP_BITS)
298 shift = LP_BITS;
299 mask = (1 << shift) - 1;
300 if ((lp & mask) == mmu_psize_defs[psize].penc[i])
301 return i;
302 }
303 return -1;
304}
305
306static inline int hpte_actual_psize(struct hash_pte *hptep, int psize)
307{
308 /* Look at the 8 bit LP value */
309 unsigned int lp = (hptep->r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
310
311 if (!(hptep->v & HPTE_V_VALID))
312 return -1;
313
314 /* First check if it is large page */
315 if (!(hptep->v & HPTE_V_LARGE))
316 return MMU_PAGE_4K;
317
318 return __hpte_actual_psize(lp, psize);
319}
320
247static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, 321static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
248 unsigned long vpn, int psize, int ssize, 322 unsigned long vpn, int psize, int ssize,
249 int local) 323 int local)
@@ -251,8 +325,9 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
251 struct hash_pte *hptep = htab_address + slot; 325 struct hash_pte *hptep = htab_address + slot;
252 unsigned long hpte_v, want_v; 326 unsigned long hpte_v, want_v;
253 int ret = 0; 327 int ret = 0;
328 int actual_psize;
254 329
255 want_v = hpte_encode_v(vpn, psize, ssize); 330 want_v = hpte_encode_avpn(vpn, psize, ssize);
256 331
257 DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", 332 DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
258 vpn, want_v & HPTE_V_AVPN, slot, newpp); 333 vpn, want_v & HPTE_V_AVPN, slot, newpp);
@@ -260,9 +335,13 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
260 native_lock_hpte(hptep); 335 native_lock_hpte(hptep);
261 336
262 hpte_v = hptep->v; 337 hpte_v = hptep->v;
263 338 actual_psize = hpte_actual_psize(hptep, psize);
339 if (actual_psize < 0) {
340 native_unlock_hpte(hptep);
341 return -1;
342 }
264 /* Even if we miss, we need to invalidate the TLB */ 343 /* Even if we miss, we need to invalidate the TLB */
265 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) { 344 if (!HPTE_V_COMPARE(hpte_v, want_v)) {
266 DBG_LOW(" -> miss\n"); 345 DBG_LOW(" -> miss\n");
267 ret = -1; 346 ret = -1;
268 } else { 347 } else {
@@ -274,7 +353,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
274 native_unlock_hpte(hptep); 353 native_unlock_hpte(hptep);
275 354
276 /* Ensure it is out of the tlb too. */ 355 /* Ensure it is out of the tlb too. */
277 tlbie(vpn, psize, ssize, local); 356 tlbie(vpn, psize, actual_psize, ssize, local);
278 357
279 return ret; 358 return ret;
280} 359}
@@ -288,7 +367,7 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
288 unsigned long want_v, hpte_v; 367 unsigned long want_v, hpte_v;
289 368
290 hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); 369 hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
291 want_v = hpte_encode_v(vpn, psize, ssize); 370 want_v = hpte_encode_avpn(vpn, psize, ssize);
292 371
293 /* Bolted mappings are only ever in the primary group */ 372 /* Bolted mappings are only ever in the primary group */
294 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 373 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -315,6 +394,7 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
315static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, 394static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
316 int psize, int ssize) 395 int psize, int ssize)
317{ 396{
397 int actual_psize;
318 unsigned long vpn; 398 unsigned long vpn;
319 unsigned long vsid; 399 unsigned long vsid;
320 long slot; 400 long slot;
@@ -327,13 +407,16 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
327 if (slot == -1) 407 if (slot == -1)
328 panic("could not find page to bolt\n"); 408 panic("could not find page to bolt\n");
329 hptep = htab_address + slot; 409 hptep = htab_address + slot;
410 actual_psize = hpte_actual_psize(hptep, psize);
411 if (actual_psize < 0)
412 return;
330 413
331 /* Update the HPTE */ 414 /* Update the HPTE */
332 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | 415 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
333 (newpp & (HPTE_R_PP | HPTE_R_N)); 416 (newpp & (HPTE_R_PP | HPTE_R_N));
334 417
335 /* Ensure it is out of the tlb too. */ 418 /* Ensure it is out of the tlb too. */
336 tlbie(vpn, psize, ssize, 0); 419 tlbie(vpn, psize, actual_psize, ssize, 0);
337} 420}
338 421
339static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, 422static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
@@ -343,64 +426,60 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
343 unsigned long hpte_v; 426 unsigned long hpte_v;
344 unsigned long want_v; 427 unsigned long want_v;
345 unsigned long flags; 428 unsigned long flags;
429 int actual_psize;
346 430
347 local_irq_save(flags); 431 local_irq_save(flags);
348 432
349 DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot); 433 DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
350 434
351 want_v = hpte_encode_v(vpn, psize, ssize); 435 want_v = hpte_encode_avpn(vpn, psize, ssize);
352 native_lock_hpte(hptep); 436 native_lock_hpte(hptep);
353 hpte_v = hptep->v; 437 hpte_v = hptep->v;
354 438
439 actual_psize = hpte_actual_psize(hptep, psize);
440 if (actual_psize < 0) {
441 native_unlock_hpte(hptep);
442 local_irq_restore(flags);
443 return;
444 }
355 /* Even if we miss, we need to invalidate the TLB */ 445 /* Even if we miss, we need to invalidate the TLB */
356 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) 446 if (!HPTE_V_COMPARE(hpte_v, want_v))
357 native_unlock_hpte(hptep); 447 native_unlock_hpte(hptep);
358 else 448 else
359 /* Invalidate the hpte. NOTE: this also unlocks it */ 449 /* Invalidate the hpte. NOTE: this also unlocks it */
360 hptep->v = 0; 450 hptep->v = 0;
361 451
362 /* Invalidate the TLB */ 452 /* Invalidate the TLB */
363 tlbie(vpn, psize, ssize, local); 453 tlbie(vpn, psize, actual_psize, ssize, local);
364 454
365 local_irq_restore(flags); 455 local_irq_restore(flags);
366} 456}
367 457
368#define LP_SHIFT 12
369#define LP_BITS 8
370#define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT)
371
372static void hpte_decode(struct hash_pte *hpte, unsigned long slot, 458static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
373 int *psize, int *ssize, unsigned long *vpn) 459 int *psize, int *apsize, int *ssize, unsigned long *vpn)
374{ 460{
375 unsigned long avpn, pteg, vpi; 461 unsigned long avpn, pteg, vpi;
376 unsigned long hpte_r = hpte->r;
377 unsigned long hpte_v = hpte->v; 462 unsigned long hpte_v = hpte->v;
378 unsigned long vsid, seg_off; 463 unsigned long vsid, seg_off;
379 int i, size, shift, penc; 464 int size, a_size, shift;
465 /* Look at the 8 bit LP value */
466 unsigned int lp = (hpte->r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
380 467
381 if (!(hpte_v & HPTE_V_LARGE)) 468 if (!(hpte_v & HPTE_V_LARGE)) {
382 size = MMU_PAGE_4K; 469 size = MMU_PAGE_4K;
383 else { 470 a_size = MMU_PAGE_4K;
384 for (i = 0; i < LP_BITS; i++) { 471 } else {
385 if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1))
386 break;
387 }
388 penc = LP_MASK(i+1) >> LP_SHIFT;
389 for (size = 0; size < MMU_PAGE_COUNT; size++) { 472 for (size = 0; size < MMU_PAGE_COUNT; size++) {
390 473
391 /* 4K pages are not represented by LP */
392 if (size == MMU_PAGE_4K)
393 continue;
394
395 /* valid entries have a shift value */ 474 /* valid entries have a shift value */
396 if (!mmu_psize_defs[size].shift) 475 if (!mmu_psize_defs[size].shift)
397 continue; 476 continue;
398 477
399 if (penc == mmu_psize_defs[size].penc) 478 a_size = __hpte_actual_psize(lp, size);
479 if (a_size != -1)
400 break; 480 break;
401 } 481 }
402 } 482 }
403
404 /* This works for all page sizes, and for 256M and 1T segments */ 483 /* This works for all page sizes, and for 256M and 1T segments */
405 *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; 484 *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
406 shift = mmu_psize_defs[size].shift; 485 shift = mmu_psize_defs[size].shift;
@@ -433,7 +512,8 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
433 default: 512 default:
434 *vpn = size = 0; 513 *vpn = size = 0;
435 } 514 }
436 *psize = size; 515 *psize = size;
516 *apsize = a_size;
437} 517}
438 518
439/* 519/*
@@ -451,7 +531,7 @@ static void native_hpte_clear(void)
451 struct hash_pte *hptep = htab_address; 531 struct hash_pte *hptep = htab_address;
452 unsigned long hpte_v; 532 unsigned long hpte_v;
453 unsigned long pteg_count; 533 unsigned long pteg_count;
454 int psize, ssize; 534 int psize, apsize, ssize;
455 535
456 pteg_count = htab_hash_mask + 1; 536 pteg_count = htab_hash_mask + 1;
457 537
@@ -477,9 +557,9 @@ static void native_hpte_clear(void)
477 * already hold the native_tlbie_lock. 557 * already hold the native_tlbie_lock.
478 */ 558 */
479 if (hpte_v & HPTE_V_VALID) { 559 if (hpte_v & HPTE_V_VALID) {
480 hpte_decode(hptep, slot, &psize, &ssize, &vpn); 560 hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
481 hptep->v = 0; 561 hptep->v = 0;
482 __tlbie(vpn, psize, ssize); 562 __tlbie(vpn, psize, apsize, ssize);
483 } 563 }
484 } 564 }
485 565
@@ -520,7 +600,7 @@ static void native_flush_hash_range(unsigned long number, int local)
520 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 600 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
521 slot += hidx & _PTEIDX_GROUP_IX; 601 slot += hidx & _PTEIDX_GROUP_IX;
522 hptep = htab_address + slot; 602 hptep = htab_address + slot;
523 want_v = hpte_encode_v(vpn, psize, ssize); 603 want_v = hpte_encode_avpn(vpn, psize, ssize);
524 native_lock_hpte(hptep); 604 native_lock_hpte(hptep);
525 hpte_v = hptep->v; 605 hpte_v = hptep->v;
526 if (!HPTE_V_COMPARE(hpte_v, want_v) || 606 if (!HPTE_V_COMPARE(hpte_v, want_v) ||
@@ -540,7 +620,7 @@ static void native_flush_hash_range(unsigned long number, int local)
540 620
541 pte_iterate_hashed_subpages(pte, psize, 621 pte_iterate_hashed_subpages(pte, psize,
542 vpn, index, shift) { 622 vpn, index, shift) {
543 __tlbiel(vpn, psize, ssize); 623 __tlbiel(vpn, psize, psize, ssize);
544 } pte_iterate_hashed_end(); 624 } pte_iterate_hashed_end();
545 } 625 }
546 asm volatile("ptesync":::"memory"); 626 asm volatile("ptesync":::"memory");
@@ -557,7 +637,7 @@ static void native_flush_hash_range(unsigned long number, int local)
557 637
558 pte_iterate_hashed_subpages(pte, psize, 638 pte_iterate_hashed_subpages(pte, psize,
559 vpn, index, shift) { 639 vpn, index, shift) {
560 __tlbie(vpn, psize, ssize); 640 __tlbie(vpn, psize, psize, ssize);
561 } pte_iterate_hashed_end(); 641 } pte_iterate_hashed_end();
562 } 642 }
563 asm volatile("eieio; tlbsync; ptesync":::"memory"); 643 asm volatile("eieio; tlbsync; ptesync":::"memory");
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index f410c3e12c1e..3e4c4ed19335 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -126,7 +126,7 @@ static struct mmu_psize_def mmu_psize_defaults_old[] = {
126 [MMU_PAGE_4K] = { 126 [MMU_PAGE_4K] = {
127 .shift = 12, 127 .shift = 12,
128 .sllp = 0, 128 .sllp = 0,
129 .penc = 0, 129 .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
130 .avpnm = 0, 130 .avpnm = 0,
131 .tlbiel = 0, 131 .tlbiel = 0,
132 }, 132 },
@@ -140,14 +140,15 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = {
140 [MMU_PAGE_4K] = { 140 [MMU_PAGE_4K] = {
141 .shift = 12, 141 .shift = 12,
142 .sllp = 0, 142 .sllp = 0,
143 .penc = 0, 143 .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
144 .avpnm = 0, 144 .avpnm = 0,
145 .tlbiel = 1, 145 .tlbiel = 1,
146 }, 146 },
147 [MMU_PAGE_16M] = { 147 [MMU_PAGE_16M] = {
148 .shift = 24, 148 .shift = 24,
149 .sllp = SLB_VSID_L, 149 .sllp = SLB_VSID_L,
150 .penc = 0, 150 .penc = {[0 ... MMU_PAGE_16M - 1] = -1, [MMU_PAGE_16M] = 0,
151 [MMU_PAGE_16M + 1 ... MMU_PAGE_COUNT - 1] = -1 },
151 .avpnm = 0x1UL, 152 .avpnm = 0x1UL,
152 .tlbiel = 0, 153 .tlbiel = 0,
153 }, 154 },
@@ -209,7 +210,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
209 210
210 BUG_ON(!ppc_md.hpte_insert); 211 BUG_ON(!ppc_md.hpte_insert);
211 ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot, 212 ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot,
212 HPTE_V_BOLTED, psize, ssize); 213 HPTE_V_BOLTED, psize, psize, ssize);
213 214
214 if (ret < 0) 215 if (ret < 0)
215 break; 216 break;
@@ -276,6 +277,30 @@ static void __init htab_init_seg_sizes(void)
276 of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL); 277 of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL);
277} 278}
278 279
280static int __init get_idx_from_shift(unsigned int shift)
281{
282 int idx = -1;
283
284 switch (shift) {
285 case 0xc:
286 idx = MMU_PAGE_4K;
287 break;
288 case 0x10:
289 idx = MMU_PAGE_64K;
290 break;
291 case 0x14:
292 idx = MMU_PAGE_1M;
293 break;
294 case 0x18:
295 idx = MMU_PAGE_16M;
296 break;
297 case 0x22:
298 idx = MMU_PAGE_16G;
299 break;
300 }
301 return idx;
302}
303
279static int __init htab_dt_scan_page_sizes(unsigned long node, 304static int __init htab_dt_scan_page_sizes(unsigned long node,
280 const char *uname, int depth, 305 const char *uname, int depth,
281 void *data) 306 void *data)
@@ -291,64 +316,65 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
291 prop = (u32 *)of_get_flat_dt_prop(node, 316 prop = (u32 *)of_get_flat_dt_prop(node,
292 "ibm,segment-page-sizes", &size); 317 "ibm,segment-page-sizes", &size);
293 if (prop != NULL) { 318 if (prop != NULL) {
294 DBG("Page sizes from device-tree:\n"); 319 pr_info("Page sizes from device-tree:\n");
295 size /= 4; 320 size /= 4;
296 cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); 321 cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
297 while(size > 0) { 322 while(size > 0) {
298 unsigned int shift = prop[0]; 323 unsigned int base_shift = prop[0];
299 unsigned int slbenc = prop[1]; 324 unsigned int slbenc = prop[1];
300 unsigned int lpnum = prop[2]; 325 unsigned int lpnum = prop[2];
301 unsigned int lpenc = 0;
302 struct mmu_psize_def *def; 326 struct mmu_psize_def *def;
303 int idx = -1; 327 int idx, base_idx;
304 328
305 size -= 3; prop += 3; 329 size -= 3; prop += 3;
306 while(size > 0 && lpnum) { 330 base_idx = get_idx_from_shift(base_shift);
307 if (prop[0] == shift) 331 if (base_idx < 0) {
308 lpenc = prop[1]; 332 /*
309 prop += 2; size -= 2; 333 * skip the pte encoding also
310 lpnum--; 334 */
335 prop += lpnum * 2; size -= lpnum * 2;
336 continue;
311 } 337 }
312 switch(shift) { 338 def = &mmu_psize_defs[base_idx];
313 case 0xc: 339 if (base_idx == MMU_PAGE_16M)
314 idx = MMU_PAGE_4K;
315 break;
316 case 0x10:
317 idx = MMU_PAGE_64K;
318 break;
319 case 0x14:
320 idx = MMU_PAGE_1M;
321 break;
322 case 0x18:
323 idx = MMU_PAGE_16M;
324 cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; 340 cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
325 break; 341
326 case 0x22: 342 def->shift = base_shift;
327 idx = MMU_PAGE_16G; 343 if (base_shift <= 23)
328 break;
329 }
330 if (idx < 0)
331 continue;
332 def = &mmu_psize_defs[idx];
333 def->shift = shift;
334 if (shift <= 23)
335 def->avpnm = 0; 344 def->avpnm = 0;
336 else 345 else
337 def->avpnm = (1 << (shift - 23)) - 1; 346 def->avpnm = (1 << (base_shift - 23)) - 1;
338 def->sllp = slbenc; 347 def->sllp = slbenc;
339 def->penc = lpenc; 348 /*
340 /* We don't know for sure what's up with tlbiel, so 349 * We don't know for sure what's up with tlbiel, so
341 * for now we only set it for 4K and 64K pages 350 * for now we only set it for 4K and 64K pages
342 */ 351 */
343 if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K) 352 if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K)
344 def->tlbiel = 1; 353 def->tlbiel = 1;
345 else 354 else
346 def->tlbiel = 0; 355 def->tlbiel = 0;
347 356
348 DBG(" %d: shift=%02x, sllp=%04lx, avpnm=%08lx, " 357 while (size > 0 && lpnum) {
349 "tlbiel=%d, penc=%d\n", 358 unsigned int shift = prop[0];
350 idx, shift, def->sllp, def->avpnm, def->tlbiel, 359 int penc = prop[1];
351 def->penc); 360
361 prop += 2; size -= 2;
362 lpnum--;
363
364 idx = get_idx_from_shift(shift);
365 if (idx < 0)
366 continue;
367
368 if (penc == -1)
369 pr_err("Invalid penc for base_shift=%d "
370 "shift=%d\n", base_shift, shift);
371
372 def->penc[idx] = penc;
373 pr_info("base_shift=%d: shift=%d, sllp=0x%04lx,"
374 " avpnm=0x%08lx, tlbiel=%d, penc=%d\n",
375 base_shift, shift, def->sllp,
376 def->avpnm, def->tlbiel, def->penc[idx]);
377 }
352 } 378 }
353 return 1; 379 return 1;
354 } 380 }
@@ -397,10 +423,21 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
397} 423}
398#endif /* CONFIG_HUGETLB_PAGE */ 424#endif /* CONFIG_HUGETLB_PAGE */
399 425
426static void mmu_psize_set_default_penc(void)
427{
428 int bpsize, apsize;
429 for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
430 for (apsize = 0; apsize < MMU_PAGE_COUNT; apsize++)
431 mmu_psize_defs[bpsize].penc[apsize] = -1;
432}
433
400static void __init htab_init_page_sizes(void) 434static void __init htab_init_page_sizes(void)
401{ 435{
402 int rc; 436 int rc;
403 437
438 /* se the invalid penc to -1 */
439 mmu_psize_set_default_penc();
440
404 /* Default to 4K pages only */ 441 /* Default to 4K pages only */
405 memcpy(mmu_psize_defs, mmu_psize_defaults_old, 442 memcpy(mmu_psize_defs, mmu_psize_defaults_old,
406 sizeof(mmu_psize_defaults_old)); 443 sizeof(mmu_psize_defaults_old));
@@ -899,14 +936,14 @@ static inline int subpage_protection(struct mm_struct *mm, unsigned long ea)
899 936
900void hash_failure_debug(unsigned long ea, unsigned long access, 937void hash_failure_debug(unsigned long ea, unsigned long access,
901 unsigned long vsid, unsigned long trap, 938 unsigned long vsid, unsigned long trap,
902 int ssize, int psize, unsigned long pte) 939 int ssize, int psize, int lpsize, unsigned long pte)
903{ 940{
904 if (!printk_ratelimit()) 941 if (!printk_ratelimit())
905 return; 942 return;
906 pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n", 943 pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n",
907 ea, access, current->comm); 944 ea, access, current->comm);
908 pr_info(" trap=0x%lx vsid=0x%lx ssize=%d psize=%d pte=0x%lx\n", 945 pr_info(" trap=0x%lx vsid=0x%lx ssize=%d base psize=%d psize %d pte=0x%lx\n",
909 trap, vsid, ssize, psize, pte); 946 trap, vsid, ssize, psize, lpsize, pte);
910} 947}
911 948
912/* Result code is: 949/* Result code is:
@@ -1079,7 +1116,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
1079 */ 1116 */
1080 if (rc == -1) 1117 if (rc == -1)
1081 hash_failure_debug(ea, access, vsid, trap, ssize, psize, 1118 hash_failure_debug(ea, access, vsid, trap, ssize, psize,
1082 pte_val(*ptep)); 1119 psize, pte_val(*ptep));
1083#ifndef CONFIG_PPC_64K_PAGES 1120#ifndef CONFIG_PPC_64K_PAGES
1084 DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); 1121 DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
1085#else 1122#else
@@ -1157,7 +1194,9 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
1157 */ 1194 */
1158 if (rc == -1) 1195 if (rc == -1)
1159 hash_failure_debug(ea, access, vsid, trap, ssize, 1196 hash_failure_debug(ea, access, vsid, trap, ssize,
1160 mm->context.user_psize, pte_val(*ptep)); 1197 mm->context.user_psize,
1198 mm->context.user_psize,
1199 pte_val(*ptep));
1161 1200
1162 local_irq_restore(flags); 1201 local_irq_restore(flags);
1163} 1202}
@@ -1230,24 +1269,60 @@ void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc)
1230 bad_page_fault(regs, address, SIGBUS); 1269 bad_page_fault(regs, address, SIGBUS);
1231} 1270}
1232 1271
1272long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
1273 unsigned long pa, unsigned long rflags,
1274 unsigned long vflags, int psize, int ssize)
1275{
1276 unsigned long hpte_group;
1277 long slot;
1278
1279repeat:
1280 hpte_group = ((hash & htab_hash_mask) *
1281 HPTES_PER_GROUP) & ~0x7UL;
1282
1283 /* Insert into the hash table, primary slot */
1284 slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, vflags,
1285 psize, psize, ssize);
1286
1287 /* Primary is full, try the secondary */
1288 if (unlikely(slot == -1)) {
1289 hpte_group = ((~hash & htab_hash_mask) *
1290 HPTES_PER_GROUP) & ~0x7UL;
1291 slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags,
1292 vflags | HPTE_V_SECONDARY,
1293 psize, psize, ssize);
1294 if (slot == -1) {
1295 if (mftb() & 0x1)
1296 hpte_group = ((hash & htab_hash_mask) *
1297 HPTES_PER_GROUP)&~0x7UL;
1298
1299 ppc_md.hpte_remove(hpte_group);
1300 goto repeat;
1301 }
1302 }
1303
1304 return slot;
1305}
1306
1233#ifdef CONFIG_DEBUG_PAGEALLOC 1307#ifdef CONFIG_DEBUG_PAGEALLOC
1234static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) 1308static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
1235{ 1309{
1236 unsigned long hash, hpteg; 1310 unsigned long hash;
1237 unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); 1311 unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
1238 unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); 1312 unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
1239 unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL); 1313 unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL);
1240 int ret; 1314 long ret;
1241 1315
1242 hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); 1316 hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
1243 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
1244 1317
1245 /* Don't create HPTE entries for bad address */ 1318 /* Don't create HPTE entries for bad address */
1246 if (!vsid) 1319 if (!vsid)
1247 return; 1320 return;
1248 ret = ppc_md.hpte_insert(hpteg, vpn, __pa(vaddr), 1321
1249 mode, HPTE_V_BOLTED, 1322 ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode,
1250 mmu_linear_psize, mmu_kernel_ssize); 1323 HPTE_V_BOLTED,
1324 mmu_linear_psize, mmu_kernel_ssize);
1325
1251 BUG_ON (ret < 0); 1326 BUG_ON (ret < 0);
1252 spin_lock(&linear_map_hash_lock); 1327 spin_lock(&linear_map_hash_lock);
1253 BUG_ON(linear_map_hash_slots[lmi] & 0x80); 1328 BUG_ON(linear_map_hash_slots[lmi] & 0x80);
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index cecad348f604..0f1d94a1fb82 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -14,6 +14,10 @@
14#include <asm/cacheflush.h> 14#include <asm/cacheflush.h>
15#include <asm/machdep.h> 15#include <asm/machdep.h>
16 16
17extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
18 unsigned long pa, unsigned long rlags,
19 unsigned long vflags, int psize, int ssize);
20
17int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, 21int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
18 pte_t *ptep, unsigned long trap, int local, int ssize, 22 pte_t *ptep, unsigned long trap, int local, int ssize,
19 unsigned int shift, unsigned int mmu_psize) 23 unsigned int shift, unsigned int mmu_psize)
@@ -83,14 +87,9 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
83 87
84 if (likely(!(old_pte & _PAGE_HASHPTE))) { 88 if (likely(!(old_pte & _PAGE_HASHPTE))) {
85 unsigned long hash = hpt_hash(vpn, shift, ssize); 89 unsigned long hash = hpt_hash(vpn, shift, ssize);
86 unsigned long hpte_group;
87 90
88 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; 91 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
89 92
90repeat:
91 hpte_group = ((hash & htab_hash_mask) *
92 HPTES_PER_GROUP) & ~0x7UL;
93
94 /* clear HPTE slot informations in new PTE */ 93 /* clear HPTE slot informations in new PTE */
95#ifdef CONFIG_PPC_64K_PAGES 94#ifdef CONFIG_PPC_64K_PAGES
96 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0; 95 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0;
@@ -101,26 +100,8 @@ repeat:
101 rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | 100 rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
102 _PAGE_COHERENT | _PAGE_GUARDED)); 101 _PAGE_COHERENT | _PAGE_GUARDED));
103 102
104 /* Insert into the hash table, primary slot */ 103 slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,
105 slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, 104 mmu_psize, ssize);
106 mmu_psize, ssize);
107
108 /* Primary is full, try the secondary */
109 if (unlikely(slot == -1)) {
110 hpte_group = ((~hash & htab_hash_mask) *
111 HPTES_PER_GROUP) & ~0x7UL;
112 slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags,
113 HPTE_V_SECONDARY,
114 mmu_psize, ssize);
115 if (slot == -1) {
116 if (mftb() & 0x1)
117 hpte_group = ((hash & htab_hash_mask) *
118 HPTES_PER_GROUP)&~0x7UL;
119
120 ppc_md.hpte_remove(hpte_group);
121 goto repeat;
122 }
123 }
124 105
125 /* 106 /*
126 * Hypervisor failure. Restore old pte and return -1 107 * Hypervisor failure. Restore old pte and return -1
@@ -129,7 +110,7 @@ repeat:
129 if (unlikely(slot == -2)) { 110 if (unlikely(slot == -2)) {
130 *ptep = __pte(old_pte); 111 *ptep = __pte(old_pte);
131 hash_failure_debug(ea, access, vsid, trap, ssize, 112 hash_failure_debug(ea, access, vsid, trap, ssize,
132 mmu_psize, old_pte); 113 mmu_psize, mmu_psize, old_pte);
133 return -1; 114 return -1;
134 } 115 }
135 116
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1a6de0a7d8eb..237c8e5f2640 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -48,30 +48,71 @@ static u64 gpage_freearray[MAX_NUMBER_GPAGES];
48static unsigned nr_gpages; 48static unsigned nr_gpages;
49#endif 49#endif
50 50
51static inline int shift_to_mmu_psize(unsigned int shift) 51#define hugepd_none(hpd) ((hpd).pd == 0)
52
53#ifdef CONFIG_PPC_BOOK3S_64
54/*
55 * At this point we do the placement change only for BOOK3S 64. This would
56 * possibly work on other subarchs.
57 */
58
59/*
60 * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
61 * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
62 */
63int pmd_huge(pmd_t pmd)
52{ 64{
53 int psize; 65 /*
66 * leaf pte for huge page, bottom two bits != 00
67 */
68 return ((pmd_val(pmd) & 0x3) != 0x0);
69}
54 70
55 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) 71int pud_huge(pud_t pud)
56 if (mmu_psize_defs[psize].shift == shift) 72{
57 return psize; 73 /*
58 return -1; 74 * leaf pte for huge page, bottom two bits != 00
75 */
76 return ((pud_val(pud) & 0x3) != 0x0);
59} 77}
60 78
61static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) 79int pgd_huge(pgd_t pgd)
62{ 80{
63 if (mmu_psize_defs[mmu_psize].shift) 81 /*
64 return mmu_psize_defs[mmu_psize].shift; 82 * leaf pte for huge page, bottom two bits != 00
65 BUG(); 83 */
84 return ((pgd_val(pgd) & 0x3) != 0x0);
85}
86#else
87int pmd_huge(pmd_t pmd)
88{
89 return 0;
66} 90}
67 91
68#define hugepd_none(hpd) ((hpd).pd == 0) 92int pud_huge(pud_t pud)
93{
94 return 0;
95}
96
97int pgd_huge(pgd_t pgd)
98{
99 return 0;
100}
101#endif
69 102
103/*
104 * We have 4 cases for pgds and pmds:
105 * (1) invalid (all zeroes)
106 * (2) pointer to next table, as normal; bottom 6 bits == 0
107 * (3) leaf pte for huge page, bottom two bits != 00
108 * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table
109 */
70pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) 110pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
71{ 111{
72 pgd_t *pg; 112 pgd_t *pg;
73 pud_t *pu; 113 pud_t *pu;
74 pmd_t *pm; 114 pmd_t *pm;
115 pte_t *ret_pte;
75 hugepd_t *hpdp = NULL; 116 hugepd_t *hpdp = NULL;
76 unsigned pdshift = PGDIR_SHIFT; 117 unsigned pdshift = PGDIR_SHIFT;
77 118
@@ -79,30 +120,43 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
79 *shift = 0; 120 *shift = 0;
80 121
81 pg = pgdir + pgd_index(ea); 122 pg = pgdir + pgd_index(ea);
82 if (is_hugepd(pg)) { 123
124 if (pgd_huge(*pg)) {
125 ret_pte = (pte_t *) pg;
126 goto out;
127 } else if (is_hugepd(pg))
83 hpdp = (hugepd_t *)pg; 128 hpdp = (hugepd_t *)pg;
84 } else if (!pgd_none(*pg)) { 129 else if (!pgd_none(*pg)) {
85 pdshift = PUD_SHIFT; 130 pdshift = PUD_SHIFT;
86 pu = pud_offset(pg, ea); 131 pu = pud_offset(pg, ea);
87 if (is_hugepd(pu)) 132
133 if (pud_huge(*pu)) {
134 ret_pte = (pte_t *) pu;
135 goto out;
136 } else if (is_hugepd(pu))
88 hpdp = (hugepd_t *)pu; 137 hpdp = (hugepd_t *)pu;
89 else if (!pud_none(*pu)) { 138 else if (!pud_none(*pu)) {
90 pdshift = PMD_SHIFT; 139 pdshift = PMD_SHIFT;
91 pm = pmd_offset(pu, ea); 140 pm = pmd_offset(pu, ea);
92 if (is_hugepd(pm)) 141
142 if (pmd_huge(*pm)) {
143 ret_pte = (pte_t *) pm;
144 goto out;
145 } else if (is_hugepd(pm))
93 hpdp = (hugepd_t *)pm; 146 hpdp = (hugepd_t *)pm;
94 else if (!pmd_none(*pm)) { 147 else if (!pmd_none(*pm))
95 return pte_offset_kernel(pm, ea); 148 return pte_offset_kernel(pm, ea);
96 }
97 } 149 }
98 } 150 }
99
100 if (!hpdp) 151 if (!hpdp)
101 return NULL; 152 return NULL;
102 153
154 ret_pte = hugepte_offset(hpdp, ea, pdshift);
155 pdshift = hugepd_shift(*hpdp);
156out:
103 if (shift) 157 if (shift)
104 *shift = hugepd_shift(*hpdp); 158 *shift = pdshift;
105 return hugepte_offset(hpdp, ea, pdshift); 159 return ret_pte;
106} 160}
107EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); 161EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
108 162
@@ -145,6 +199,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
145 if (unlikely(!hugepd_none(*hpdp))) 199 if (unlikely(!hugepd_none(*hpdp)))
146 break; 200 break;
147 else 201 else
202 /* We use the old format for PPC_FSL_BOOK3E */
148 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; 203 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
149 } 204 }
150 /* If we bailed from the for loop early, an error occurred, clean up */ 205 /* If we bailed from the for loop early, an error occurred, clean up */
@@ -156,9 +211,15 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
156#else 211#else
157 if (!hugepd_none(*hpdp)) 212 if (!hugepd_none(*hpdp))
158 kmem_cache_free(cachep, new); 213 kmem_cache_free(cachep, new);
159 else 214 else {
215#ifdef CONFIG_PPC_BOOK3S_64
216 hpdp->pd = (unsigned long)new |
217 (shift_to_mmu_psize(pshift) << 2);
218#else
160 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; 219 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
161#endif 220#endif
221 }
222#endif
162 spin_unlock(&mm->page_table_lock); 223 spin_unlock(&mm->page_table_lock);
163 return 0; 224 return 0;
164} 225}
@@ -175,6 +236,61 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
175#define HUGEPD_PUD_SHIFT PMD_SHIFT 236#define HUGEPD_PUD_SHIFT PMD_SHIFT
176#endif 237#endif
177 238
239#ifdef CONFIG_PPC_BOOK3S_64
240/*
241 * At this point we do the placement change only for BOOK3S 64. This would
242 * possibly work on other subarchs.
243 */
244pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
245{
246 pgd_t *pg;
247 pud_t *pu;
248 pmd_t *pm;
249 hugepd_t *hpdp = NULL;
250 unsigned pshift = __ffs(sz);
251 unsigned pdshift = PGDIR_SHIFT;
252
253 addr &= ~(sz-1);
254 pg = pgd_offset(mm, addr);
255
256 if (pshift == PGDIR_SHIFT)
257 /* 16GB huge page */
258 return (pte_t *) pg;
259 else if (pshift > PUD_SHIFT)
260 /*
261 * We need to use hugepd table
262 */
263 hpdp = (hugepd_t *)pg;
264 else {
265 pdshift = PUD_SHIFT;
266 pu = pud_alloc(mm, pg, addr);
267 if (pshift == PUD_SHIFT)
268 return (pte_t *)pu;
269 else if (pshift > PMD_SHIFT)
270 hpdp = (hugepd_t *)pu;
271 else {
272 pdshift = PMD_SHIFT;
273 pm = pmd_alloc(mm, pu, addr);
274 if (pshift == PMD_SHIFT)
275 /* 16MB hugepage */
276 return (pte_t *)pm;
277 else
278 hpdp = (hugepd_t *)pm;
279 }
280 }
281 if (!hpdp)
282 return NULL;
283
284 BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
285
286 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
287 return NULL;
288
289 return hugepte_offset(hpdp, addr, pdshift);
290}
291
292#else
293
178pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) 294pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
179{ 295{
180 pgd_t *pg; 296 pgd_t *pg;
@@ -212,6 +328,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
212 328
213 return hugepte_offset(hpdp, addr, pdshift); 329 return hugepte_offset(hpdp, addr, pdshift);
214} 330}
331#endif
215 332
216#ifdef CONFIG_PPC_FSL_BOOK3E 333#ifdef CONFIG_PPC_FSL_BOOK3E
217/* Build list of addresses of gigantic pages. This function is used in early 334/* Build list of addresses of gigantic pages. This function is used in early
@@ -475,7 +592,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
475 do { 592 do {
476 pmd = pmd_offset(pud, addr); 593 pmd = pmd_offset(pud, addr);
477 next = pmd_addr_end(addr, end); 594 next = pmd_addr_end(addr, end);
478 if (pmd_none(*pmd)) 595 if (pmd_none_or_clear_bad(pmd))
479 continue; 596 continue;
480#ifdef CONFIG_PPC_FSL_BOOK3E 597#ifdef CONFIG_PPC_FSL_BOOK3E
481 /* 598 /*
@@ -628,16 +745,6 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
628 return page; 745 return page;
629} 746}
630 747
631int pmd_huge(pmd_t pmd)
632{
633 return 0;
634}
635
636int pud_huge(pud_t pud)
637{
638 return 0;
639}
640
641struct page * 748struct page *
642follow_huge_pmd(struct mm_struct *mm, unsigned long address, 749follow_huge_pmd(struct mm_struct *mm, unsigned long address,
643 pmd_t *pmd, int write) 750 pmd_t *pmd, int write)
@@ -646,8 +753,8 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
646 return NULL; 753 return NULL;
647} 754}
648 755
649static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, 756int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
650 unsigned long end, int write, struct page **pages, int *nr) 757 unsigned long end, int write, struct page **pages, int *nr)
651{ 758{
652 unsigned long mask; 759 unsigned long mask;
653 unsigned long pte_end; 760 unsigned long pte_end;
@@ -742,7 +849,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
742 struct hstate *hstate = hstate_file(file); 849 struct hstate *hstate = hstate_file(file);
743 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); 850 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
744 851
745 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); 852 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
746} 853}
747#endif 854#endif
748 855
@@ -883,11 +990,16 @@ static int __init hugetlbpage_init(void)
883 pdshift = PUD_SHIFT; 990 pdshift = PUD_SHIFT;
884 else 991 else
885 pdshift = PGDIR_SHIFT; 992 pdshift = PGDIR_SHIFT;
886 993 /*
887 pgtable_cache_add(pdshift - shift, NULL); 994 * if we have pdshift and shift value same, we don't
888 if (!PGT_CACHE(pdshift - shift)) 995 * use pgt cache for hugepd.
889 panic("hugetlbpage_init(): could not create " 996 */
890 "pgtable cache for %d bit pagesize\n", shift); 997 if (pdshift != shift) {
998 pgtable_cache_add(pdshift - shift, NULL);
999 if (!PGT_CACHE(pdshift - shift))
1000 panic("hugetlbpage_init(): could not create "
1001 "pgtable cache for %d bit pagesize\n", shift);
1002 }
891 } 1003 }
892 1004
893 /* Set default large page size. Currently, we pick 16M or 1M 1005 /* Set default large page size. Currently, we pick 16M or 1M
diff --git a/arch/powerpc/mm/icswx.c b/arch/powerpc/mm/icswx.c
index 8cdbd8634a58..915412e4d5ba 100644
--- a/arch/powerpc/mm/icswx.c
+++ b/arch/powerpc/mm/icswx.c
@@ -67,7 +67,7 @@
67 67
68void switch_cop(struct mm_struct *next) 68void switch_cop(struct mm_struct *next)
69{ 69{
70#ifdef CONFIG_ICSWX_PID 70#ifdef CONFIG_PPC_ICSWX_PID
71 mtspr(SPRN_PID, next->context.cop_pid); 71 mtspr(SPRN_PID, next->context.cop_pid);
72#endif 72#endif
73 mtspr(SPRN_ACOP, next->context.acop); 73 mtspr(SPRN_ACOP, next->context.acop);
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 5a535b73ea18..c2787bf779ca 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -129,8 +129,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
129 align = max_t(unsigned long, align, minalign); 129 align = max_t(unsigned long, align, minalign);
130 name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); 130 name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
131 new = kmem_cache_create(name, table_size, align, 0, ctor); 131 new = kmem_cache_create(name, table_size, align, 0, ctor);
132 PGT_CACHE(shift) = new; 132 pgtable_cache[shift - 1] = new;
133
134 pr_debug("Allocated pgtable cache for order %d\n", shift); 133 pr_debug("Allocated pgtable cache for order %d\n", shift);
135} 134}
136 135
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index cd76c454942f..0988a26e0413 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -66,10 +66,9 @@ unsigned long long memory_limit;
66 66
67#ifdef CONFIG_HIGHMEM 67#ifdef CONFIG_HIGHMEM
68pte_t *kmap_pte; 68pte_t *kmap_pte;
69EXPORT_SYMBOL(kmap_pte);
69pgprot_t kmap_prot; 70pgprot_t kmap_prot;
70
71EXPORT_SYMBOL(kmap_prot); 71EXPORT_SYMBOL(kmap_prot);
72EXPORT_SYMBOL(kmap_pte);
73 72
74static inline pte_t *virt_to_kpte(unsigned long vaddr) 73static inline pte_t *virt_to_kpte(unsigned long vaddr)
75{ 74{
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index d1d1b92c5b99..178876aef40f 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -23,6 +23,7 @@
23#include <linux/slab.h> 23#include <linux/slab.h>
24 24
25#include <asm/mmu_context.h> 25#include <asm/mmu_context.h>
26#include <asm/pgalloc.h>
26 27
27#include "icswx.h" 28#include "icswx.h"
28 29
@@ -85,6 +86,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
85 spin_lock_init(mm->context.cop_lockp); 86 spin_lock_init(mm->context.cop_lockp);
86#endif /* CONFIG_PPC_ICSWX */ 87#endif /* CONFIG_PPC_ICSWX */
87 88
89#ifdef CONFIG_PPC_64K_PAGES
90 mm->context.pte_frag = NULL;
91#endif
88 return 0; 92 return 0;
89} 93}
90 94
@@ -96,13 +100,46 @@ void __destroy_context(int context_id)
96} 100}
97EXPORT_SYMBOL_GPL(__destroy_context); 101EXPORT_SYMBOL_GPL(__destroy_context);
98 102
103#ifdef CONFIG_PPC_64K_PAGES
104static void destroy_pagetable_page(struct mm_struct *mm)
105{
106 int count;
107 void *pte_frag;
108 struct page *page;
109
110 pte_frag = mm->context.pte_frag;
111 if (!pte_frag)
112 return;
113
114 page = virt_to_page(pte_frag);
115 /* drop all the pending references */
116 count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
117 /* We allow PTE_FRAG_NR fragments from a PTE page */
118 count = atomic_sub_return(PTE_FRAG_NR - count, &page->_count);
119 if (!count) {
120 pgtable_page_dtor(page);
121 free_hot_cold_page(page, 0);
122 }
123}
124
125#else
126static inline void destroy_pagetable_page(struct mm_struct *mm)
127{
128 return;
129}
130#endif
131
132
99void destroy_context(struct mm_struct *mm) 133void destroy_context(struct mm_struct *mm)
100{ 134{
135
101#ifdef CONFIG_PPC_ICSWX 136#ifdef CONFIG_PPC_ICSWX
102 drop_cop(mm->context.acop, mm); 137 drop_cop(mm->context.acop, mm);
103 kfree(mm->context.cop_lockp); 138 kfree(mm->context.cop_lockp);
104 mm->context.cop_lockp = NULL; 139 mm->context.cop_lockp = NULL;
105#endif /* CONFIG_PPC_ICSWX */ 140#endif /* CONFIG_PPC_ICSWX */
141
142 destroy_pagetable_page(mm);
106 __destroy_context(mm->context.id); 143 __destroy_context(mm->context.id);
107 subpage_prot_free(mm); 144 subpage_prot_free(mm);
108 mm->context.id = MMU_NO_CONTEXT; 145 mm->context.id = MMU_NO_CONTEXT;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index fa33c546e778..88c0425dc0a8 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -22,6 +22,10 @@
22#include <linux/pfn.h> 22#include <linux/pfn.h>
23#include <linux/cpuset.h> 23#include <linux/cpuset.h>
24#include <linux/node.h> 24#include <linux/node.h>
25#include <linux/stop_machine.h>
26#include <linux/proc_fs.h>
27#include <linux/seq_file.h>
28#include <linux/uaccess.h>
25#include <linux/slab.h> 29#include <linux/slab.h>
26#include <asm/sparsemem.h> 30#include <asm/sparsemem.h>
27#include <asm/prom.h> 31#include <asm/prom.h>
@@ -30,6 +34,7 @@
30#include <asm/paca.h> 34#include <asm/paca.h>
31#include <asm/hvcall.h> 35#include <asm/hvcall.h>
32#include <asm/setup.h> 36#include <asm/setup.h>
37#include <asm/vdso.h>
33 38
34static int numa_enabled = 1; 39static int numa_enabled = 1;
35 40
@@ -77,7 +82,7 @@ static void __init setup_node_to_cpumask_map(void)
77 dbg("Node to cpumask map for %d nodes\n", nr_node_ids); 82 dbg("Node to cpumask map for %d nodes\n", nr_node_ids);
78} 83}
79 84
80static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn, 85static int __init fake_numa_create_new_node(unsigned long end_pfn,
81 unsigned int *nid) 86 unsigned int *nid)
82{ 87{
83 unsigned long long mem; 88 unsigned long long mem;
@@ -199,7 +204,7 @@ int __node_distance(int a, int b)
199 int distance = LOCAL_DISTANCE; 204 int distance = LOCAL_DISTANCE;
200 205
201 if (!form1_affinity) 206 if (!form1_affinity)
202 return distance; 207 return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
203 208
204 for (i = 0; i < distance_ref_points_depth; i++) { 209 for (i = 0; i < distance_ref_points_depth; i++) {
205 if (distance_lookup_table[a][i] == distance_lookup_table[b][i]) 210 if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
@@ -289,9 +294,7 @@ EXPORT_SYMBOL_GPL(of_node_to_nid);
289static int __init find_min_common_depth(void) 294static int __init find_min_common_depth(void)
290{ 295{
291 int depth; 296 int depth;
292 struct device_node *chosen;
293 struct device_node *root; 297 struct device_node *root;
294 const char *vec5;
295 298
296 if (firmware_has_feature(FW_FEATURE_OPAL)) 299 if (firmware_has_feature(FW_FEATURE_OPAL))
297 root = of_find_node_by_path("/ibm,opal"); 300 root = of_find_node_by_path("/ibm,opal");
@@ -323,24 +326,10 @@ static int __init find_min_common_depth(void)
323 326
324 distance_ref_points_depth /= sizeof(int); 327 distance_ref_points_depth /= sizeof(int);
325 328
326#define VEC5_AFFINITY_BYTE 5 329 if (firmware_has_feature(FW_FEATURE_OPAL) ||
327#define VEC5_AFFINITY 0x80 330 firmware_has_feature(FW_FEATURE_TYPE1_AFFINITY)) {
328 331 dbg("Using form 1 affinity\n");
329 if (firmware_has_feature(FW_FEATURE_OPAL))
330 form1_affinity = 1; 332 form1_affinity = 1;
331 else {
332 chosen = of_find_node_by_path("/chosen");
333 if (chosen) {
334 vec5 = of_get_property(chosen,
335 "ibm,architecture-vec-5", NULL);
336 if (vec5 && (vec5[VEC5_AFFINITY_BYTE] &
337 VEC5_AFFINITY)) {
338 dbg("Using form 1 affinity\n");
339 form1_affinity = 1;
340 }
341
342 of_node_put(chosen);
343 }
344 } 333 }
345 334
346 if (form1_affinity) { 335 if (form1_affinity) {
@@ -1268,10 +1257,18 @@ u64 memory_hotplug_max(void)
1268 1257
1269/* Virtual Processor Home Node (VPHN) support */ 1258/* Virtual Processor Home Node (VPHN) support */
1270#ifdef CONFIG_PPC_SPLPAR 1259#ifdef CONFIG_PPC_SPLPAR
1260struct topology_update_data {
1261 struct topology_update_data *next;
1262 unsigned int cpu;
1263 int old_nid;
1264 int new_nid;
1265};
1266
1271static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS]; 1267static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
1272static cpumask_t cpu_associativity_changes_mask; 1268static cpumask_t cpu_associativity_changes_mask;
1273static int vphn_enabled; 1269static int vphn_enabled;
1274static void set_topology_timer(void); 1270static int prrn_enabled;
1271static void reset_topology_timer(void);
1275 1272
1276/* 1273/*
1277 * Store the current values of the associativity change counters in the 1274 * Store the current values of the associativity change counters in the
@@ -1307,11 +1304,9 @@ static void setup_cpu_associativity_change_counters(void)
1307 */ 1304 */
1308static int update_cpu_associativity_changes_mask(void) 1305static int update_cpu_associativity_changes_mask(void)
1309{ 1306{
1310 int cpu, nr_cpus = 0; 1307 int cpu;
1311 cpumask_t *changes = &cpu_associativity_changes_mask; 1308 cpumask_t *changes = &cpu_associativity_changes_mask;
1312 1309
1313 cpumask_clear(changes);
1314
1315 for_each_possible_cpu(cpu) { 1310 for_each_possible_cpu(cpu) {
1316 int i, changed = 0; 1311 int i, changed = 0;
1317 u8 *counts = vphn_cpu_change_counts[cpu]; 1312 u8 *counts = vphn_cpu_change_counts[cpu];
@@ -1325,11 +1320,10 @@ static int update_cpu_associativity_changes_mask(void)
1325 } 1320 }
1326 if (changed) { 1321 if (changed) {
1327 cpumask_set_cpu(cpu, changes); 1322 cpumask_set_cpu(cpu, changes);
1328 nr_cpus++;
1329 } 1323 }
1330 } 1324 }
1331 1325
1332 return nr_cpus; 1326 return cpumask_weight(changes);
1333} 1327}
1334 1328
1335/* 1329/*
@@ -1421,40 +1415,84 @@ static long vphn_get_associativity(unsigned long cpu,
1421} 1415}
1422 1416
1423/* 1417/*
1418 * Update the CPU maps and sysfs entries for a single CPU when its NUMA
1419 * characteristics change. This function doesn't perform any locking and is
1420 * only safe to call from stop_machine().
1421 */
1422static int update_cpu_topology(void *data)
1423{
1424 struct topology_update_data *update;
1425 unsigned long cpu;
1426
1427 if (!data)
1428 return -EINVAL;
1429
1430 cpu = get_cpu();
1431
1432 for (update = data; update; update = update->next) {
1433 if (cpu != update->cpu)
1434 continue;
1435
1436 unregister_cpu_under_node(update->cpu, update->old_nid);
1437 unmap_cpu_from_node(update->cpu);
1438 map_cpu_to_node(update->cpu, update->new_nid);
1439 vdso_getcpu_init();
1440 register_cpu_under_node(update->cpu, update->new_nid);
1441 }
1442
1443 return 0;
1444}
1445
1446/*
1424 * Update the node maps and sysfs entries for each cpu whose home node 1447 * Update the node maps and sysfs entries for each cpu whose home node
1425 * has changed. Returns 1 when the topology has changed, and 0 otherwise. 1448 * has changed. Returns 1 when the topology has changed, and 0 otherwise.
1426 */ 1449 */
1427int arch_update_cpu_topology(void) 1450int arch_update_cpu_topology(void)
1428{ 1451{
1429 int cpu, nid, old_nid, changed = 0; 1452 unsigned int cpu, changed = 0;
1453 struct topology_update_data *updates, *ud;
1430 unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; 1454 unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
1455 cpumask_t updated_cpus;
1431 struct device *dev; 1456 struct device *dev;
1457 int weight, i = 0;
1432 1458
1433 for_each_cpu(cpu,&cpu_associativity_changes_mask) { 1459 weight = cpumask_weight(&cpu_associativity_changes_mask);
1460 if (!weight)
1461 return 0;
1462
1463 updates = kzalloc(weight * (sizeof(*updates)), GFP_KERNEL);
1464 if (!updates)
1465 return 0;
1466
1467 cpumask_clear(&updated_cpus);
1468
1469 for_each_cpu(cpu, &cpu_associativity_changes_mask) {
1470 ud = &updates[i++];
1471 ud->cpu = cpu;
1434 vphn_get_associativity(cpu, associativity); 1472 vphn_get_associativity(cpu, associativity);
1435 nid = associativity_to_nid(associativity); 1473 ud->new_nid = associativity_to_nid(associativity);
1436 1474
1437 if (nid < 0 || !node_online(nid)) 1475 if (ud->new_nid < 0 || !node_online(ud->new_nid))
1438 nid = first_online_node; 1476 ud->new_nid = first_online_node;
1439 1477
1440 old_nid = numa_cpu_lookup_table[cpu]; 1478 ud->old_nid = numa_cpu_lookup_table[cpu];
1479 cpumask_set_cpu(cpu, &updated_cpus);
1441 1480
1442 /* Disable hotplug while we update the cpu 1481 if (i < weight)
1443 * masks and sysfs. 1482 ud->next = &updates[i];
1444 */ 1483 }
1445 get_online_cpus(); 1484
1446 unregister_cpu_under_node(cpu, old_nid); 1485 stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
1447 unmap_cpu_from_node(cpu); 1486
1448 map_cpu_to_node(cpu, nid); 1487 for (ud = &updates[0]; ud; ud = ud->next) {
1449 register_cpu_under_node(cpu, nid); 1488 dev = get_cpu_device(ud->cpu);
1450 put_online_cpus();
1451
1452 dev = get_cpu_device(cpu);
1453 if (dev) 1489 if (dev)
1454 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 1490 kobject_uevent(&dev->kobj, KOBJ_CHANGE);
1491 cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
1455 changed = 1; 1492 changed = 1;
1456 } 1493 }
1457 1494
1495 kfree(updates);
1458 return changed; 1496 return changed;
1459} 1497}
1460 1498
@@ -1471,49 +1509,165 @@ void topology_schedule_update(void)
1471 1509
1472static void topology_timer_fn(unsigned long ignored) 1510static void topology_timer_fn(unsigned long ignored)
1473{ 1511{
1474 if (!vphn_enabled) 1512 if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
1475 return;
1476 if (update_cpu_associativity_changes_mask() > 0)
1477 topology_schedule_update(); 1513 topology_schedule_update();
1478 set_topology_timer(); 1514 else if (vphn_enabled) {
1515 if (update_cpu_associativity_changes_mask() > 0)
1516 topology_schedule_update();
1517 reset_topology_timer();
1518 }
1479} 1519}
1480static struct timer_list topology_timer = 1520static struct timer_list topology_timer =
1481 TIMER_INITIALIZER(topology_timer_fn, 0, 0); 1521 TIMER_INITIALIZER(topology_timer_fn, 0, 0);
1482 1522
1483static void set_topology_timer(void) 1523static void reset_topology_timer(void)
1484{ 1524{
1485 topology_timer.data = 0; 1525 topology_timer.data = 0;
1486 topology_timer.expires = jiffies + 60 * HZ; 1526 topology_timer.expires = jiffies + 60 * HZ;
1487 add_timer(&topology_timer); 1527 mod_timer(&topology_timer, topology_timer.expires);
1488} 1528}
1489 1529
1530#ifdef CONFIG_SMP
1531
1532static void stage_topology_update(int core_id)
1533{
1534 cpumask_or(&cpu_associativity_changes_mask,
1535 &cpu_associativity_changes_mask, cpu_sibling_mask(core_id));
1536 reset_topology_timer();
1537}
1538
1539static int dt_update_callback(struct notifier_block *nb,
1540 unsigned long action, void *data)
1541{
1542 struct of_prop_reconfig *update;
1543 int rc = NOTIFY_DONE;
1544
1545 switch (action) {
1546 case OF_RECONFIG_UPDATE_PROPERTY:
1547 update = (struct of_prop_reconfig *)data;
1548 if (!of_prop_cmp(update->dn->type, "cpu") &&
1549 !of_prop_cmp(update->prop->name, "ibm,associativity")) {
1550 u32 core_id;
1551 of_property_read_u32(update->dn, "reg", &core_id);
1552 stage_topology_update(core_id);
1553 rc = NOTIFY_OK;
1554 }
1555 break;
1556 }
1557
1558 return rc;
1559}
1560
1561static struct notifier_block dt_update_nb = {
1562 .notifier_call = dt_update_callback,
1563};
1564
1565#endif
1566
1490/* 1567/*
1491 * Start polling for VPHN associativity changes. 1568 * Start polling for associativity changes.
1492 */ 1569 */
1493int start_topology_update(void) 1570int start_topology_update(void)
1494{ 1571{
1495 int rc = 0; 1572 int rc = 0;
1496 1573
1497 /* Disabled until races with load balancing are fixed */ 1574 if (firmware_has_feature(FW_FEATURE_PRRN)) {
1498 if (0 && firmware_has_feature(FW_FEATURE_VPHN) && 1575 if (!prrn_enabled) {
1499 get_lppaca()->shared_proc) { 1576 prrn_enabled = 1;
1500 vphn_enabled = 1; 1577 vphn_enabled = 0;
1501 setup_cpu_associativity_change_counters(); 1578#ifdef CONFIG_SMP
1502 init_timer_deferrable(&topology_timer); 1579 rc = of_reconfig_notifier_register(&dt_update_nb);
1503 set_topology_timer(); 1580#endif
1504 rc = 1; 1581 }
1582 } else if (firmware_has_feature(FW_FEATURE_VPHN) &&
1583 get_lppaca()->shared_proc) {
1584 if (!vphn_enabled) {
1585 prrn_enabled = 0;
1586 vphn_enabled = 1;
1587 setup_cpu_associativity_change_counters();
1588 init_timer_deferrable(&topology_timer);
1589 reset_topology_timer();
1590 }
1505 } 1591 }
1506 1592
1507 return rc; 1593 return rc;
1508} 1594}
1509__initcall(start_topology_update);
1510 1595
1511/* 1596/*
1512 * Disable polling for VPHN associativity changes. 1597 * Disable polling for VPHN associativity changes.
1513 */ 1598 */
1514int stop_topology_update(void) 1599int stop_topology_update(void)
1515{ 1600{
1516 vphn_enabled = 0; 1601 int rc = 0;
1517 return del_timer_sync(&topology_timer); 1602
1603 if (prrn_enabled) {
1604 prrn_enabled = 0;
1605#ifdef CONFIG_SMP
1606 rc = of_reconfig_notifier_unregister(&dt_update_nb);
1607#endif
1608 } else if (vphn_enabled) {
1609 vphn_enabled = 0;
1610 rc = del_timer_sync(&topology_timer);
1611 }
1612
1613 return rc;
1614}
1615
1616int prrn_is_enabled(void)
1617{
1618 return prrn_enabled;
1619}
1620
1621static int topology_read(struct seq_file *file, void *v)
1622{
1623 if (vphn_enabled || prrn_enabled)
1624 seq_puts(file, "on\n");
1625 else
1626 seq_puts(file, "off\n");
1627
1628 return 0;
1629}
1630
1631static int topology_open(struct inode *inode, struct file *file)
1632{
1633 return single_open(file, topology_read, NULL);
1634}
1635
1636static ssize_t topology_write(struct file *file, const char __user *buf,
1637 size_t count, loff_t *off)
1638{
1639 char kbuf[4]; /* "on" or "off" plus null. */
1640 int read_len;
1641
1642 read_len = count < 3 ? count : 3;
1643 if (copy_from_user(kbuf, buf, read_len))
1644 return -EINVAL;
1645
1646 kbuf[read_len] = '\0';
1647
1648 if (!strncmp(kbuf, "on", 2))
1649 start_topology_update();
1650 else if (!strncmp(kbuf, "off", 3))
1651 stop_topology_update();
1652 else
1653 return -EINVAL;
1654
1655 return count;
1656}
1657
1658static const struct file_operations topology_ops = {
1659 .read = seq_read,
1660 .write = topology_write,
1661 .open = topology_open,
1662 .release = single_release
1663};
1664
1665static int topology_update_init(void)
1666{
1667 start_topology_update();
1668 proc_create("powerpc/topology_updates", 644, NULL, &topology_ops);
1669
1670 return 0;
1518} 1671}
1672device_initcall(topology_update_init);
1519#endif /* CONFIG_PPC_SPLPAR */ 1673#endif /* CONFIG_PPC_SPLPAR */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 654258f165ae..a854096e1023 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -337,3 +337,121 @@ EXPORT_SYMBOL(__ioremap_at);
337EXPORT_SYMBOL(iounmap); 337EXPORT_SYMBOL(iounmap);
338EXPORT_SYMBOL(__iounmap); 338EXPORT_SYMBOL(__iounmap);
339EXPORT_SYMBOL(__iounmap_at); 339EXPORT_SYMBOL(__iounmap_at);
340
341#ifdef CONFIG_PPC_64K_PAGES
342static pte_t *get_from_cache(struct mm_struct *mm)
343{
344 void *pte_frag, *ret;
345
346 spin_lock(&mm->page_table_lock);
347 ret = mm->context.pte_frag;
348 if (ret) {
349 pte_frag = ret + PTE_FRAG_SIZE;
350 /*
351 * If we have taken up all the fragments mark PTE page NULL
352 */
353 if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
354 pte_frag = NULL;
355 mm->context.pte_frag = pte_frag;
356 }
357 spin_unlock(&mm->page_table_lock);
358 return (pte_t *)ret;
359}
360
361static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
362{
363 void *ret = NULL;
364 struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
365 __GFP_REPEAT | __GFP_ZERO);
366 if (!page)
367 return NULL;
368
369 ret = page_address(page);
370 spin_lock(&mm->page_table_lock);
371 /*
372 * If we find pgtable_page set, we return
373 * the allocated page with single fragement
374 * count.
375 */
376 if (likely(!mm->context.pte_frag)) {
377 atomic_set(&page->_count, PTE_FRAG_NR);
378 mm->context.pte_frag = ret + PTE_FRAG_SIZE;
379 }
380 spin_unlock(&mm->page_table_lock);
381
382 if (!kernel)
383 pgtable_page_ctor(page);
384
385 return (pte_t *)ret;
386}
387
388pte_t *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
389{
390 pte_t *pte;
391
392 pte = get_from_cache(mm);
393 if (pte)
394 return pte;
395
396 return __alloc_for_cache(mm, kernel);
397}
398
399void page_table_free(struct mm_struct *mm, unsigned long *table, int kernel)
400{
401 struct page *page = virt_to_page(table);
402 if (put_page_testzero(page)) {
403 if (!kernel)
404 pgtable_page_dtor(page);
405 free_hot_cold_page(page, 0);
406 }
407}
408
409#ifdef CONFIG_SMP
410static void page_table_free_rcu(void *table)
411{
412 struct page *page = virt_to_page(table);
413 if (put_page_testzero(page)) {
414 pgtable_page_dtor(page);
415 free_hot_cold_page(page, 0);
416 }
417}
418
419void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
420{
421 unsigned long pgf = (unsigned long)table;
422
423 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
424 pgf |= shift;
425 tlb_remove_table(tlb, (void *)pgf);
426}
427
428void __tlb_remove_table(void *_table)
429{
430 void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
431 unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
432
433 if (!shift)
434 /* PTE page needs special handling */
435 page_table_free_rcu(table);
436 else {
437 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
438 kmem_cache_free(PGT_CACHE(shift), table);
439 }
440}
441#else
442void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
443{
444 if (!shift) {
445 /* PTE page needs special handling */
446 struct page *page = virt_to_page(table);
447 if (put_page_testzero(page)) {
448 pgtable_page_dtor(page);
449 free_hot_cold_page(page, 0);
450 }
451 } else {
452 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
453 kmem_cache_free(PGT_CACHE(shift), table);
454 }
455}
456#endif
457#endif /* CONFIG_PPC_64K_PAGES */
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index cf9dada734b6..3e99c149271a 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -237,134 +237,112 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
237#endif 237#endif
238} 238}
239 239
240/*
241 * Compute which slice addr is part of;
242 * set *boundary_addr to the start or end boundary of that slice
243 * (depending on 'end' parameter);
244 * return boolean indicating if the slice is marked as available in the
245 * 'available' slice_mark.
246 */
247static bool slice_scan_available(unsigned long addr,
248 struct slice_mask available,
249 int end,
250 unsigned long *boundary_addr)
251{
252 unsigned long slice;
253 if (addr < SLICE_LOW_TOP) {
254 slice = GET_LOW_SLICE_INDEX(addr);
255 *boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
256 return !!(available.low_slices & (1u << slice));
257 } else {
258 slice = GET_HIGH_SLICE_INDEX(addr);
259 *boundary_addr = (slice + end) ?
260 ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
261 return !!(available.high_slices & (1u << slice));
262 }
263}
264
240static unsigned long slice_find_area_bottomup(struct mm_struct *mm, 265static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
241 unsigned long len, 266 unsigned long len,
242 struct slice_mask available, 267 struct slice_mask available,
243 int psize, int use_cache) 268 int psize)
244{ 269{
245 struct vm_area_struct *vma;
246 unsigned long start_addr, addr;
247 struct slice_mask mask;
248 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); 270 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
249 271 unsigned long addr, found, next_end;
250 if (use_cache) { 272 struct vm_unmapped_area_info info;
251 if (len <= mm->cached_hole_size) { 273
252 start_addr = addr = TASK_UNMAPPED_BASE; 274 info.flags = 0;
253 mm->cached_hole_size = 0; 275 info.length = len;
254 } else 276 info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
255 start_addr = addr = mm->free_area_cache; 277 info.align_offset = 0;
256 } else 278
257 start_addr = addr = TASK_UNMAPPED_BASE; 279 addr = TASK_UNMAPPED_BASE;
258 280 while (addr < TASK_SIZE) {
259full_search: 281 info.low_limit = addr;
260 for (;;) { 282 if (!slice_scan_available(addr, available, 1, &addr))
261 addr = _ALIGN_UP(addr, 1ul << pshift);
262 if ((TASK_SIZE - len) < addr)
263 break;
264 vma = find_vma(mm, addr);
265 BUG_ON(vma && (addr >= vma->vm_end));
266
267 mask = slice_range_to_mask(addr, len);
268 if (!slice_check_fit(mask, available)) {
269 if (addr < SLICE_LOW_TOP)
270 addr = _ALIGN_UP(addr + 1, 1ul << SLICE_LOW_SHIFT);
271 else
272 addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT);
273 continue; 283 continue;
284
285 next_slice:
286 /*
287 * At this point [info.low_limit; addr) covers
288 * available slices only and ends at a slice boundary.
289 * Check if we need to reduce the range, or if we can
290 * extend it to cover the next available slice.
291 */
292 if (addr >= TASK_SIZE)
293 addr = TASK_SIZE;
294 else if (slice_scan_available(addr, available, 1, &next_end)) {
295 addr = next_end;
296 goto next_slice;
274 } 297 }
275 if (!vma || addr + len <= vma->vm_start) { 298 info.high_limit = addr;
276 /*
277 * Remember the place where we stopped the search:
278 */
279 if (use_cache)
280 mm->free_area_cache = addr + len;
281 return addr;
282 }
283 if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start)
284 mm->cached_hole_size = vma->vm_start - addr;
285 addr = vma->vm_end;
286 }
287 299
288 /* Make sure we didn't miss any holes */ 300 found = vm_unmapped_area(&info);
289 if (use_cache && start_addr != TASK_UNMAPPED_BASE) { 301 if (!(found & ~PAGE_MASK))
290 start_addr = addr = TASK_UNMAPPED_BASE; 302 return found;
291 mm->cached_hole_size = 0;
292 goto full_search;
293 } 303 }
304
294 return -ENOMEM; 305 return -ENOMEM;
295} 306}
296 307
297static unsigned long slice_find_area_topdown(struct mm_struct *mm, 308static unsigned long slice_find_area_topdown(struct mm_struct *mm,
298 unsigned long len, 309 unsigned long len,
299 struct slice_mask available, 310 struct slice_mask available,
300 int psize, int use_cache) 311 int psize)
301{ 312{
302 struct vm_area_struct *vma;
303 unsigned long addr;
304 struct slice_mask mask;
305 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); 313 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
314 unsigned long addr, found, prev;
315 struct vm_unmapped_area_info info;
306 316
307 /* check if free_area_cache is useful for us */ 317 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
308 if (use_cache) { 318 info.length = len;
309 if (len <= mm->cached_hole_size) { 319 info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
310 mm->cached_hole_size = 0; 320 info.align_offset = 0;
311 mm->free_area_cache = mm->mmap_base;
312 }
313
314 /* either no address requested or can't fit in requested
315 * address hole
316 */
317 addr = mm->free_area_cache;
318
319 /* make sure it can fit in the remaining address space */
320 if (addr > len) {
321 addr = _ALIGN_DOWN(addr - len, 1ul << pshift);
322 mask = slice_range_to_mask(addr, len);
323 if (slice_check_fit(mask, available) &&
324 slice_area_is_free(mm, addr, len))
325 /* remember the address as a hint for
326 * next time
327 */
328 return (mm->free_area_cache = addr);
329 }
330 }
331 321
332 addr = mm->mmap_base; 322 addr = mm->mmap_base;
333 while (addr > len) { 323 while (addr > PAGE_SIZE) {
334 /* Go down by chunk size */ 324 info.high_limit = addr;
335 addr = _ALIGN_DOWN(addr - len, 1ul << pshift); 325 if (!slice_scan_available(addr - 1, available, 0, &addr))
336
337 /* Check for hit with different page size */
338 mask = slice_range_to_mask(addr, len);
339 if (!slice_check_fit(mask, available)) {
340 if (addr < SLICE_LOW_TOP)
341 addr = _ALIGN_DOWN(addr, 1ul << SLICE_LOW_SHIFT);
342 else if (addr < (1ul << SLICE_HIGH_SHIFT))
343 addr = SLICE_LOW_TOP;
344 else
345 addr = _ALIGN_DOWN(addr, 1ul << SLICE_HIGH_SHIFT);
346 continue; 326 continue;
347 }
348 327
328 prev_slice:
349 /* 329 /*
350 * Lookup failure means no vma is above this address, 330 * At this point [addr; info.high_limit) covers
351 * else if new region fits below vma->vm_start, 331 * available slices only and starts at a slice boundary.
352 * return with success: 332 * Check if we need to reduce the range, or if we can
333 * extend it to cover the previous available slice.
353 */ 334 */
354 vma = find_vma(mm, addr); 335 if (addr < PAGE_SIZE)
355 if (!vma || (addr + len) <= vma->vm_start) { 336 addr = PAGE_SIZE;
356 /* remember the address as a hint for next time */ 337 else if (slice_scan_available(addr - 1, available, 0, &prev)) {
357 if (use_cache) 338 addr = prev;
358 mm->free_area_cache = addr; 339 goto prev_slice;
359 return addr;
360 } 340 }
341 info.low_limit = addr;
361 342
362 /* remember the largest hole we saw so far */ 343 found = vm_unmapped_area(&info);
363 if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) 344 if (!(found & ~PAGE_MASK))
364 mm->cached_hole_size = vma->vm_start - addr; 345 return found;
365
366 /* try just below the current vma->vm_start */
367 addr = vma->vm_start;
368 } 346 }
369 347
370 /* 348 /*
@@ -373,28 +351,18 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
373 * can happen with large stack limits and large mmap() 351 * can happen with large stack limits and large mmap()
374 * allocations. 352 * allocations.
375 */ 353 */
376 addr = slice_find_area_bottomup(mm, len, available, psize, 0); 354 return slice_find_area_bottomup(mm, len, available, psize);
377
378 /*
379 * Restore the topdown base:
380 */
381 if (use_cache) {
382 mm->free_area_cache = mm->mmap_base;
383 mm->cached_hole_size = ~0UL;
384 }
385
386 return addr;
387} 355}
388 356
389 357
390static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, 358static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
391 struct slice_mask mask, int psize, 359 struct slice_mask mask, int psize,
392 int topdown, int use_cache) 360 int topdown)
393{ 361{
394 if (topdown) 362 if (topdown)
395 return slice_find_area_topdown(mm, len, mask, psize, use_cache); 363 return slice_find_area_topdown(mm, len, mask, psize);
396 else 364 else
397 return slice_find_area_bottomup(mm, len, mask, psize, use_cache); 365 return slice_find_area_bottomup(mm, len, mask, psize);
398} 366}
399 367
400#define or_mask(dst, src) do { \ 368#define or_mask(dst, src) do { \
@@ -415,7 +383,7 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
415 383
416unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, 384unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
417 unsigned long flags, unsigned int psize, 385 unsigned long flags, unsigned int psize,
418 int topdown, int use_cache) 386 int topdown)
419{ 387{
420 struct slice_mask mask = {0, 0}; 388 struct slice_mask mask = {0, 0};
421 struct slice_mask good_mask; 389 struct slice_mask good_mask;
@@ -430,8 +398,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
430 BUG_ON(mm->task_size == 0); 398 BUG_ON(mm->task_size == 0);
431 399
432 slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize); 400 slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
433 slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d, use_cache=%d\n", 401 slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n",
434 addr, len, flags, topdown, use_cache); 402 addr, len, flags, topdown);
435 403
436 if (len > mm->task_size) 404 if (len > mm->task_size)
437 return -ENOMEM; 405 return -ENOMEM;
@@ -503,8 +471,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
503 /* Now let's see if we can find something in the existing 471 /* Now let's see if we can find something in the existing
504 * slices for that size 472 * slices for that size
505 */ 473 */
506 newaddr = slice_find_area(mm, len, good_mask, psize, topdown, 474 newaddr = slice_find_area(mm, len, good_mask, psize, topdown);
507 use_cache);
508 if (newaddr != -ENOMEM) { 475 if (newaddr != -ENOMEM) {
509 /* Found within the good mask, we don't have to setup, 476 /* Found within the good mask, we don't have to setup,
510 * we thus return directly 477 * we thus return directly
@@ -536,8 +503,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
536 * anywhere in the good area. 503 * anywhere in the good area.
537 */ 504 */
538 if (addr) { 505 if (addr) {
539 addr = slice_find_area(mm, len, good_mask, psize, topdown, 506 addr = slice_find_area(mm, len, good_mask, psize, topdown);
540 use_cache);
541 if (addr != -ENOMEM) { 507 if (addr != -ENOMEM) {
542 slice_dbg(" found area at 0x%lx\n", addr); 508 slice_dbg(" found area at 0x%lx\n", addr);
543 return addr; 509 return addr;
@@ -547,15 +513,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
547 /* Now let's see if we can find something in the existing slices 513 /* Now let's see if we can find something in the existing slices
548 * for that size plus free slices 514 * for that size plus free slices
549 */ 515 */
550 addr = slice_find_area(mm, len, potential_mask, psize, topdown, 516 addr = slice_find_area(mm, len, potential_mask, psize, topdown);
551 use_cache);
552 517
553#ifdef CONFIG_PPC_64K_PAGES 518#ifdef CONFIG_PPC_64K_PAGES
554 if (addr == -ENOMEM && psize == MMU_PAGE_64K) { 519 if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
555 /* retry the search with 4k-page slices included */ 520 /* retry the search with 4k-page slices included */
556 or_mask(potential_mask, compat_mask); 521 or_mask(potential_mask, compat_mask);
557 addr = slice_find_area(mm, len, potential_mask, psize, 522 addr = slice_find_area(mm, len, potential_mask, psize,
558 topdown, use_cache); 523 topdown);
559 } 524 }
560#endif 525#endif
561 526
@@ -586,8 +551,7 @@ unsigned long arch_get_unmapped_area(struct file *filp,
586 unsigned long flags) 551 unsigned long flags)
587{ 552{
588 return slice_get_unmapped_area(addr, len, flags, 553 return slice_get_unmapped_area(addr, len, flags,
589 current->mm->context.user_psize, 554 current->mm->context.user_psize, 0);
590 0, 1);
591} 555}
592 556
593unsigned long arch_get_unmapped_area_topdown(struct file *filp, 557unsigned long arch_get_unmapped_area_topdown(struct file *filp,
@@ -597,8 +561,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
597 const unsigned long flags) 561 const unsigned long flags)
598{ 562{
599 return slice_get_unmapped_area(addr0, len, flags, 563 return slice_get_unmapped_area(addr0, len, flags,
600 current->mm->context.user_psize, 564 current->mm->context.user_psize, 1);
601 1, 1);
602} 565}
603 566
604unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) 567unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index df32a838dcfa..6888cad5103d 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -414,9 +414,9 @@ static void setup_page_sizes(void)
414 414
415#ifdef CONFIG_PPC_FSL_BOOK3E 415#ifdef CONFIG_PPC_FSL_BOOK3E
416 unsigned int mmucfg = mfspr(SPRN_MMUCFG); 416 unsigned int mmucfg = mfspr(SPRN_MMUCFG);
417 int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E);
417 418
418 if (((mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) && 419 if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
419 (mmu_has_feature(MMU_FTR_TYPE_FSL_E))) {
420 unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG); 420 unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG);
421 unsigned int min_pg, max_pg; 421 unsigned int min_pg, max_pg;
422 422
@@ -442,6 +442,20 @@ static void setup_page_sizes(void)
442 442
443 goto no_indirect; 443 goto no_indirect;
444 } 444 }
445
446 if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) {
447 u32 tlb1ps = mfspr(SPRN_TLB1PS);
448
449 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
450 struct mmu_psize_def *def = &mmu_psize_defs[psize];
451
452 if (tlb1ps & (1U << (def->shift - 10))) {
453 def->flags |= MMU_PAGE_SIZE_DIRECT;
454 }
455 }
456
457 goto no_indirect;
458 }
445#endif 459#endif
446 460
447 tlb0cfg = mfspr(SPRN_TLB0CFG); 461 tlb0cfg = mfspr(SPRN_TLB0CFG);