aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt10
-rw-r--r--arch/powerpc/mm/hash_utils_64.c9
-rw-r--r--arch/powerpc/mm/hugetlbpage.c274
-rw-r--r--arch/powerpc/mm/init_64.c8
-rw-r--r--arch/powerpc/mm/tlb_64.c2
-rw-r--r--include/asm-powerpc/hugetlb.h5
-rw-r--r--include/asm-powerpc/mmu-hash64.h4
-rw-r--r--include/asm-powerpc/page_64.h1
-rw-r--r--include/asm-powerpc/pgalloc-64.h4
9 files changed, 199 insertions, 118 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 30278e9e5211..01a2992b5754 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -776,11 +776,11 @@ and is between 256 and 4096 characters. It is defined in the file
776 776
777 hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot. 777 hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot.
778 hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages. 778 hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages.
779 On x86 this option can be specified multiple times 779 On x86-64 and powerpc, this option can be specified
780 interleaved with hugepages= to reserve huge pages 780 multiple times interleaved with hugepages= to reserve
781 of different sizes. Valid pages sizes on x86-64 781 huge pages of different sizes. Valid pages sizes on
782 are 2M (when the CPU supports "pse") and 1G (when the 782 x86-64 are 2M (when the CPU supports "pse") and 1G
783 CPU supports the "pdpe1gb" cpuinfo flag) 783 (when the CPU supports the "pdpe1gb" cpuinfo flag)
784 Note that 1GB pages can only be allocated at boot time 784 Note that 1GB pages can only be allocated at boot time
785 using hugepages= and not freed afterwards. 785 using hugepages= and not freed afterwards.
786 default_hugepagesz= 786 default_hugepagesz=
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index ae4c717243a5..5ce5a4dcd008 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -103,7 +103,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M;
103int mmu_highuser_ssize = MMU_SEGSIZE_256M; 103int mmu_highuser_ssize = MMU_SEGSIZE_256M;
104u16 mmu_slb_size = 64; 104u16 mmu_slb_size = 64;
105#ifdef CONFIG_HUGETLB_PAGE 105#ifdef CONFIG_HUGETLB_PAGE
106int mmu_huge_psize = MMU_PAGE_16M;
107unsigned int HPAGE_SHIFT; 106unsigned int HPAGE_SHIFT;
108#endif 107#endif
109#ifdef CONFIG_PPC_64K_PAGES 108#ifdef CONFIG_PPC_64K_PAGES
@@ -460,15 +459,15 @@ static void __init htab_init_page_sizes(void)
460 /* Reserve 16G huge page memory sections for huge pages */ 459 /* Reserve 16G huge page memory sections for huge pages */
461 of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); 460 of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
462 461
463/* Init large page size. Currently, we pick 16M or 1M depending 462/* Set default large page size. Currently, we pick 16M or 1M depending
464 * on what is available 463 * on what is available
465 */ 464 */
466 if (mmu_psize_defs[MMU_PAGE_16M].shift) 465 if (mmu_psize_defs[MMU_PAGE_16M].shift)
467 set_huge_psize(MMU_PAGE_16M); 466 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
468 /* With 4k/4level pagetables, we can't (for now) cope with a 467 /* With 4k/4level pagetables, we can't (for now) cope with a
469 * huge page size < PMD_SIZE */ 468 * huge page size < PMD_SIZE */
470 else if (mmu_psize_defs[MMU_PAGE_1M].shift) 469 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
471 set_huge_psize(MMU_PAGE_1M); 470 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
472#endif /* CONFIG_HUGETLB_PAGE */ 471#endif /* CONFIG_HUGETLB_PAGE */
473} 472}
474 473
@@ -889,7 +888,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
889 888
890#ifdef CONFIG_HUGETLB_PAGE 889#ifdef CONFIG_HUGETLB_PAGE
891 /* Handle hugepage regions */ 890 /* Handle hugepage regions */
892 if (HPAGE_SHIFT && psize == mmu_huge_psize) { 891 if (HPAGE_SHIFT && mmu_huge_psizes[psize]) {
893 DBG_LOW(" -> huge page !\n"); 892 DBG_LOW(" -> huge page !\n");
894 return hash_huge_page(mm, access, ea, vsid, local, trap); 893 return hash_huge_page(mm, access, ea, vsid, local, trap);
895 } 894 }
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 19b1a9cec6d5..fb42c4dd3217 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -37,15 +37,30 @@
37static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; 37static unsigned long gpage_freearray[MAX_NUMBER_GPAGES];
38static unsigned nr_gpages; 38static unsigned nr_gpages;
39 39
40unsigned int hugepte_shift; 40/* Array of valid huge page sizes - non-zero value(hugepte_shift) is
41#define PTRS_PER_HUGEPTE (1 << hugepte_shift) 41 * stored for the huge page sizes that are valid.
42#define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << hugepte_shift) 42 */
43unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */
44
45#define hugepte_shift mmu_huge_psizes
46#define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize])
47#define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize])
48
49#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \
50 + hugepte_shift[psize])
51#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize))
52#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1))
43 53
44#define HUGEPD_SHIFT (HPAGE_SHIFT + hugepte_shift) 54/* Subtract one from array size because we don't need a cache for 4K since
45#define HUGEPD_SIZE (1UL << HUGEPD_SHIFT) 55 * is not a huge page size */
46#define HUGEPD_MASK (~(HUGEPD_SIZE-1)) 56#define huge_pgtable_cache(psize) (pgtable_cache[HUGEPTE_CACHE_NUM \
57 + psize-1])
58#define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize])
47 59
48#define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM]) 60static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = {
61 "unused_4K", "hugepte_cache_64K", "unused_64K_AP",
62 "hugepte_cache_1M", "hugepte_cache_16M", "hugepte_cache_16G"
63};
49 64
50/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() 65/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad()
51 * will choke on pointers to hugepte tables, which is handy for 66 * will choke on pointers to hugepte tables, which is handy for
@@ -56,24 +71,49 @@ typedef struct { unsigned long pd; } hugepd_t;
56 71
57#define hugepd_none(hpd) ((hpd).pd == 0) 72#define hugepd_none(hpd) ((hpd).pd == 0)
58 73
74static inline int shift_to_mmu_psize(unsigned int shift)
75{
76 switch (shift) {
77#ifndef CONFIG_PPC_64K_PAGES
78 case PAGE_SHIFT_64K:
79 return MMU_PAGE_64K;
80#endif
81 case PAGE_SHIFT_16M:
82 return MMU_PAGE_16M;
83 case PAGE_SHIFT_16G:
84 return MMU_PAGE_16G;
85 }
86 return -1;
87}
88
89static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
90{
91 if (mmu_psize_defs[mmu_psize].shift)
92 return mmu_psize_defs[mmu_psize].shift;
93 BUG();
94}
95
59static inline pte_t *hugepd_page(hugepd_t hpd) 96static inline pte_t *hugepd_page(hugepd_t hpd)
60{ 97{
61 BUG_ON(!(hpd.pd & HUGEPD_OK)); 98 BUG_ON(!(hpd.pd & HUGEPD_OK));
62 return (pte_t *)(hpd.pd & ~HUGEPD_OK); 99 return (pte_t *)(hpd.pd & ~HUGEPD_OK);
63} 100}
64 101
65static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr) 102static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
103 struct hstate *hstate)
66{ 104{
67 unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1)); 105 unsigned int shift = huge_page_shift(hstate);
106 int psize = shift_to_mmu_psize(shift);
107 unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1));
68 pte_t *dir = hugepd_page(*hpdp); 108 pte_t *dir = hugepd_page(*hpdp);
69 109
70 return dir + idx; 110 return dir + idx;
71} 111}
72 112
73static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 113static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
74 unsigned long address) 114 unsigned long address, unsigned int psize)
75{ 115{
76 pte_t *new = kmem_cache_alloc(huge_pgtable_cache, 116 pte_t *new = kmem_cache_alloc(huge_pgtable_cache(psize),
77 GFP_KERNEL|__GFP_REPEAT); 117 GFP_KERNEL|__GFP_REPEAT);
78 118
79 if (! new) 119 if (! new)
@@ -81,7 +121,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
81 121
82 spin_lock(&mm->page_table_lock); 122 spin_lock(&mm->page_table_lock);
83 if (!hugepd_none(*hpdp)) 123 if (!hugepd_none(*hpdp))
84 kmem_cache_free(huge_pgtable_cache, new); 124 kmem_cache_free(huge_pgtable_cache(psize), new);
85 else 125 else
86 hpdp->pd = (unsigned long)new | HUGEPD_OK; 126 hpdp->pd = (unsigned long)new | HUGEPD_OK;
87 spin_unlock(&mm->page_table_lock); 127 spin_unlock(&mm->page_table_lock);
@@ -90,21 +130,22 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
90 130
91/* Base page size affects how we walk hugetlb page tables */ 131/* Base page size affects how we walk hugetlb page tables */
92#ifdef CONFIG_PPC_64K_PAGES 132#ifdef CONFIG_PPC_64K_PAGES
93#define hpmd_offset(pud, addr) pmd_offset(pud, addr) 133#define hpmd_offset(pud, addr, h) pmd_offset(pud, addr)
94#define hpmd_alloc(mm, pud, addr) pmd_alloc(mm, pud, addr) 134#define hpmd_alloc(mm, pud, addr, h) pmd_alloc(mm, pud, addr)
95#else 135#else
96static inline 136static inline
97pmd_t *hpmd_offset(pud_t *pud, unsigned long addr) 137pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate)
98{ 138{
99 if (HPAGE_SHIFT == PAGE_SHIFT_64K) 139 if (huge_page_shift(hstate) == PAGE_SHIFT_64K)
100 return pmd_offset(pud, addr); 140 return pmd_offset(pud, addr);
101 else 141 else
102 return (pmd_t *) pud; 142 return (pmd_t *) pud;
103} 143}
104static inline 144static inline
105pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr) 145pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr,
146 struct hstate *hstate)
106{ 147{
107 if (HPAGE_SHIFT == PAGE_SHIFT_64K) 148 if (huge_page_shift(hstate) == PAGE_SHIFT_64K)
108 return pmd_alloc(mm, pud, addr); 149 return pmd_alloc(mm, pud, addr);
109 else 150 else
110 return (pmd_t *) pud; 151 return (pmd_t *) pud;
@@ -128,8 +169,9 @@ void add_gpage(unsigned long addr, unsigned long page_size,
128} 169}
129 170
130/* Moves the gigantic page addresses from the temporary list to the 171/* Moves the gigantic page addresses from the temporary list to the
131 * huge_boot_pages list. */ 172 * huge_boot_pages list.
132int alloc_bootmem_huge_page(struct hstate *h) 173 */
174int alloc_bootmem_huge_page(struct hstate *hstate)
133{ 175{
134 struct huge_bootmem_page *m; 176 struct huge_bootmem_page *m;
135 if (nr_gpages == 0) 177 if (nr_gpages == 0)
@@ -137,7 +179,7 @@ int alloc_bootmem_huge_page(struct hstate *h)
137 m = phys_to_virt(gpage_freearray[--nr_gpages]); 179 m = phys_to_virt(gpage_freearray[--nr_gpages]);
138 gpage_freearray[nr_gpages] = 0; 180 gpage_freearray[nr_gpages] = 0;
139 list_add(&m->list, &huge_boot_pages); 181 list_add(&m->list, &huge_boot_pages);
140 m->hstate = h; 182 m->hstate = hstate;
141 return 1; 183 return 1;
142} 184}
143 185
@@ -149,17 +191,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
149 pud_t *pu; 191 pud_t *pu;
150 pmd_t *pm; 192 pmd_t *pm;
151 193
152 BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); 194 unsigned int psize;
195 unsigned int shift;
196 unsigned long sz;
197 struct hstate *hstate;
198 psize = get_slice_psize(mm, addr);
199 shift = mmu_psize_to_shift(psize);
200 sz = ((1UL) << shift);
201 hstate = size_to_hstate(sz);
153 202
154 addr &= HPAGE_MASK; 203 addr &= hstate->mask;
155 204
156 pg = pgd_offset(mm, addr); 205 pg = pgd_offset(mm, addr);
157 if (!pgd_none(*pg)) { 206 if (!pgd_none(*pg)) {
158 pu = pud_offset(pg, addr); 207 pu = pud_offset(pg, addr);
159 if (!pud_none(*pu)) { 208 if (!pud_none(*pu)) {
160 pm = hpmd_offset(pu, addr); 209 pm = hpmd_offset(pu, addr, hstate);
161 if (!pmd_none(*pm)) 210 if (!pmd_none(*pm))
162 return hugepte_offset((hugepd_t *)pm, addr); 211 return hugepte_offset((hugepd_t *)pm, addr,
212 hstate);
163 } 213 }
164 } 214 }
165 215
@@ -173,16 +223,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
173 pud_t *pu; 223 pud_t *pu;
174 pmd_t *pm; 224 pmd_t *pm;
175 hugepd_t *hpdp = NULL; 225 hugepd_t *hpdp = NULL;
226 struct hstate *hstate;
227 unsigned int psize;
228 hstate = size_to_hstate(sz);
176 229
177 BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); 230 psize = get_slice_psize(mm, addr);
231 BUG_ON(!mmu_huge_psizes[psize]);
178 232
179 addr &= HPAGE_MASK; 233 addr &= hstate->mask;
180 234
181 pg = pgd_offset(mm, addr); 235 pg = pgd_offset(mm, addr);
182 pu = pud_alloc(mm, pg, addr); 236 pu = pud_alloc(mm, pg, addr);
183 237
184 if (pu) { 238 if (pu) {
185 pm = hpmd_alloc(mm, pu, addr); 239 pm = hpmd_alloc(mm, pu, addr, hstate);
186 if (pm) 240 if (pm)
187 hpdp = (hugepd_t *)pm; 241 hpdp = (hugepd_t *)pm;
188 } 242 }
@@ -190,10 +244,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
190 if (! hpdp) 244 if (! hpdp)
191 return NULL; 245 return NULL;
192 246
193 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr)) 247 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize))
194 return NULL; 248 return NULL;
195 249
196 return hugepte_offset(hpdp, addr); 250 return hugepte_offset(hpdp, addr, hstate);
197} 251}
198 252
199int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 253int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
@@ -201,19 +255,22 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
201 return 0; 255 return 0;
202} 256}
203 257
204static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) 258static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp,
259 unsigned int psize)
205{ 260{
206 pte_t *hugepte = hugepd_page(*hpdp); 261 pte_t *hugepte = hugepd_page(*hpdp);
207 262
208 hpdp->pd = 0; 263 hpdp->pd = 0;
209 tlb->need_flush = 1; 264 tlb->need_flush = 1;
210 pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM, 265 pgtable_free_tlb(tlb, pgtable_free_cache(hugepte,
266 HUGEPTE_CACHE_NUM+psize-1,
211 PGF_CACHENUM_MASK)); 267 PGF_CACHENUM_MASK));
212} 268}
213 269
214static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 270static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
215 unsigned long addr, unsigned long end, 271 unsigned long addr, unsigned long end,
216 unsigned long floor, unsigned long ceiling) 272 unsigned long floor, unsigned long ceiling,
273 unsigned int psize)
217{ 274{
218 pmd_t *pmd; 275 pmd_t *pmd;
219 unsigned long next; 276 unsigned long next;
@@ -225,7 +282,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
225 next = pmd_addr_end(addr, end); 282 next = pmd_addr_end(addr, end);
226 if (pmd_none(*pmd)) 283 if (pmd_none(*pmd))
227 continue; 284 continue;
228 free_hugepte_range(tlb, (hugepd_t *)pmd); 285 free_hugepte_range(tlb, (hugepd_t *)pmd, psize);
229 } while (pmd++, addr = next, addr != end); 286 } while (pmd++, addr = next, addr != end);
230 287
231 start &= PUD_MASK; 288 start &= PUD_MASK;
@@ -251,6 +308,9 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
251 pud_t *pud; 308 pud_t *pud;
252 unsigned long next; 309 unsigned long next;
253 unsigned long start; 310 unsigned long start;
311 unsigned int shift;
312 unsigned int psize = get_slice_psize(tlb->mm, addr);
313 shift = mmu_psize_to_shift(psize);
254 314
255 start = addr; 315 start = addr;
256 pud = pud_offset(pgd, addr); 316 pud = pud_offset(pgd, addr);
@@ -259,16 +319,18 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
259#ifdef CONFIG_PPC_64K_PAGES 319#ifdef CONFIG_PPC_64K_PAGES
260 if (pud_none_or_clear_bad(pud)) 320 if (pud_none_or_clear_bad(pud))
261 continue; 321 continue;
262 hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); 322 hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling,
323 psize);
263#else 324#else
264 if (HPAGE_SHIFT == PAGE_SHIFT_64K) { 325 if (shift == PAGE_SHIFT_64K) {
265 if (pud_none_or_clear_bad(pud)) 326 if (pud_none_or_clear_bad(pud))
266 continue; 327 continue;
267 hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); 328 hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
329 ceiling, psize);
268 } else { 330 } else {
269 if (pud_none(*pud)) 331 if (pud_none(*pud))
270 continue; 332 continue;
271 free_hugepte_range(tlb, (hugepd_t *)pud); 333 free_hugepte_range(tlb, (hugepd_t *)pud, psize);
272 } 334 }
273#endif 335#endif
274 } while (pud++, addr = next, addr != end); 336 } while (pud++, addr = next, addr != end);
@@ -336,27 +398,29 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
336 * now has no other vmas using it, so can be freed, we don't 398 * now has no other vmas using it, so can be freed, we don't
337 * bother to round floor or end up - the tests don't need that. 399 * bother to round floor or end up - the tests don't need that.
338 */ 400 */
401 unsigned int psize = get_slice_psize(tlb->mm, addr);
339 402
340 addr &= HUGEPD_MASK; 403 addr &= HUGEPD_MASK(psize);
341 if (addr < floor) { 404 if (addr < floor) {
342 addr += HUGEPD_SIZE; 405 addr += HUGEPD_SIZE(psize);
343 if (!addr) 406 if (!addr)
344 return; 407 return;
345 } 408 }
346 if (ceiling) { 409 if (ceiling) {
347 ceiling &= HUGEPD_MASK; 410 ceiling &= HUGEPD_MASK(psize);
348 if (!ceiling) 411 if (!ceiling)
349 return; 412 return;
350 } 413 }
351 if (end - 1 > ceiling - 1) 414 if (end - 1 > ceiling - 1)
352 end -= HUGEPD_SIZE; 415 end -= HUGEPD_SIZE(psize);
353 if (addr > end - 1) 416 if (addr > end - 1)
354 return; 417 return;
355 418
356 start = addr; 419 start = addr;
357 pgd = pgd_offset(tlb->mm, addr); 420 pgd = pgd_offset(tlb->mm, addr);
358 do { 421 do {
359 BUG_ON(get_slice_psize(tlb->mm, addr) != mmu_huge_psize); 422 psize = get_slice_psize(tlb->mm, addr);
423 BUG_ON(!mmu_huge_psizes[psize]);
360 next = pgd_addr_end(addr, end); 424 next = pgd_addr_end(addr, end);
361 if (pgd_none_or_clear_bad(pgd)) 425 if (pgd_none_or_clear_bad(pgd))
362 continue; 426 continue;
@@ -373,7 +437,11 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
373 * necessary anymore if we make hpte_need_flush() get the 437 * necessary anymore if we make hpte_need_flush() get the
374 * page size from the slices 438 * page size from the slices
375 */ 439 */
376 pte_update(mm, addr & HPAGE_MASK, ptep, ~0UL, 1); 440 unsigned int psize = get_slice_psize(mm, addr);
441 unsigned int shift = mmu_psize_to_shift(psize);
442 unsigned long sz = ((1UL) << shift);
443 struct hstate *hstate = size_to_hstate(sz);
444 pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1);
377 } 445 }
378 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 446 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
379} 447}
@@ -390,14 +458,19 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
390{ 458{
391 pte_t *ptep; 459 pte_t *ptep;
392 struct page *page; 460 struct page *page;
461 unsigned int mmu_psize = get_slice_psize(mm, address);
393 462
394 if (get_slice_psize(mm, address) != mmu_huge_psize) 463 /* Verify it is a huge page else bail. */
464 if (!mmu_huge_psizes[mmu_psize])
395 return ERR_PTR(-EINVAL); 465 return ERR_PTR(-EINVAL);
396 466
397 ptep = huge_pte_offset(mm, address); 467 ptep = huge_pte_offset(mm, address);
398 page = pte_page(*ptep); 468 page = pte_page(*ptep);
399 if (page) 469 if (page) {
400 page += (address % HPAGE_SIZE) / PAGE_SIZE; 470 unsigned int shift = mmu_psize_to_shift(mmu_psize);
471 unsigned long sz = ((1UL) << shift);
472 page += (address % sz) / PAGE_SIZE;
473 }
401 474
402 return page; 475 return page;
403} 476}
@@ -425,15 +498,16 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
425 unsigned long len, unsigned long pgoff, 498 unsigned long len, unsigned long pgoff,
426 unsigned long flags) 499 unsigned long flags)
427{ 500{
428 return slice_get_unmapped_area(addr, len, flags, 501 struct hstate *hstate = hstate_file(file);
429 mmu_huge_psize, 1, 0); 502 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
503 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
430} 504}
431 505
432/* 506/*
433 * Called by asm hashtable.S for doing lazy icache flush 507 * Called by asm hashtable.S for doing lazy icache flush
434 */ 508 */
435static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, 509static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
436 pte_t pte, int trap) 510 pte_t pte, int trap, unsigned long sz)
437{ 511{
438 struct page *page; 512 struct page *page;
439 int i; 513 int i;
@@ -446,7 +520,7 @@ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
446 /* page is dirty */ 520 /* page is dirty */
447 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { 521 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
448 if (trap == 0x400) { 522 if (trap == 0x400) {
449 for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) 523 for (i = 0; i < (sz / PAGE_SIZE); i++)
450 __flush_dcache_icache(page_address(page+i)); 524 __flush_dcache_icache(page_address(page+i));
451 set_bit(PG_arch_1, &page->flags); 525 set_bit(PG_arch_1, &page->flags);
452 } else { 526 } else {
@@ -462,11 +536,16 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
462{ 536{
463 pte_t *ptep; 537 pte_t *ptep;
464 unsigned long old_pte, new_pte; 538 unsigned long old_pte, new_pte;
465 unsigned long va, rflags, pa; 539 unsigned long va, rflags, pa, sz;
466 long slot; 540 long slot;
467 int err = 1; 541 int err = 1;
468 int ssize = user_segment_size(ea); 542 int ssize = user_segment_size(ea);
543 unsigned int mmu_psize;
544 int shift;
545 mmu_psize = get_slice_psize(mm, ea);
469 546
547 if (!mmu_huge_psizes[mmu_psize])
548 goto out;
470 ptep = huge_pte_offset(mm, ea); 549 ptep = huge_pte_offset(mm, ea);
471 550
472 /* Search the Linux page table for a match with va */ 551 /* Search the Linux page table for a match with va */
@@ -509,30 +588,32 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
509 rflags = 0x2 | (!(new_pte & _PAGE_RW)); 588 rflags = 0x2 | (!(new_pte & _PAGE_RW));
510 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ 589 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
511 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); 590 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
591 shift = mmu_psize_to_shift(mmu_psize);
592 sz = ((1UL) << shift);
512 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) 593 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
513 /* No CPU has hugepages but lacks no execute, so we 594 /* No CPU has hugepages but lacks no execute, so we
514 * don't need to worry about that case */ 595 * don't need to worry about that case */
515 rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), 596 rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte),
516 trap); 597 trap, sz);
517 598
518 /* Check if pte already has an hpte (case 2) */ 599 /* Check if pte already has an hpte (case 2) */
519 if (unlikely(old_pte & _PAGE_HASHPTE)) { 600 if (unlikely(old_pte & _PAGE_HASHPTE)) {
520 /* There MIGHT be an HPTE for this pte */ 601 /* There MIGHT be an HPTE for this pte */
521 unsigned long hash, slot; 602 unsigned long hash, slot;
522 603
523 hash = hpt_hash(va, HPAGE_SHIFT, ssize); 604 hash = hpt_hash(va, shift, ssize);
524 if (old_pte & _PAGE_F_SECOND) 605 if (old_pte & _PAGE_F_SECOND)
525 hash = ~hash; 606 hash = ~hash;
526 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 607 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
527 slot += (old_pte & _PAGE_F_GIX) >> 12; 608 slot += (old_pte & _PAGE_F_GIX) >> 12;
528 609
529 if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, 610 if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize,
530 ssize, local) == -1) 611 ssize, local) == -1)
531 old_pte &= ~_PAGE_HPTEFLAGS; 612 old_pte &= ~_PAGE_HPTEFLAGS;
532 } 613 }
533 614
534 if (likely(!(old_pte & _PAGE_HASHPTE))) { 615 if (likely(!(old_pte & _PAGE_HASHPTE))) {
535 unsigned long hash = hpt_hash(va, HPAGE_SHIFT, ssize); 616 unsigned long hash = hpt_hash(va, shift, ssize);
536 unsigned long hpte_group; 617 unsigned long hpte_group;
537 618
538 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; 619 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
@@ -553,7 +634,7 @@ repeat:
553 634
554 /* Insert into the hash table, primary slot */ 635 /* Insert into the hash table, primary slot */
555 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, 636 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
556 mmu_huge_psize, ssize); 637 mmu_psize, ssize);
557 638
558 /* Primary is full, try the secondary */ 639 /* Primary is full, try the secondary */
559 if (unlikely(slot == -1)) { 640 if (unlikely(slot == -1)) {
@@ -561,7 +642,7 @@ repeat:
561 HPTES_PER_GROUP) & ~0x7UL; 642 HPTES_PER_GROUP) & ~0x7UL;
562 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 643 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
563 HPTE_V_SECONDARY, 644 HPTE_V_SECONDARY,
564 mmu_huge_psize, ssize); 645 mmu_psize, ssize);
565 if (slot == -1) { 646 if (slot == -1) {
566 if (mftb() & 0x1) 647 if (mftb() & 0x1)
567 hpte_group = ((hash & htab_hash_mask) * 648 hpte_group = ((hash & htab_hash_mask) *
@@ -598,66 +679,50 @@ void set_huge_psize(int psize)
598 (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT || 679 (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT ||
599 mmu_psize_defs[psize].shift == PAGE_SHIFT_64K || 680 mmu_psize_defs[psize].shift == PAGE_SHIFT_64K ||
600 mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) { 681 mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) {
601 /* Return if huge page size is the same as the 682 /* Return if huge page size has already been setup or is the
602 * base page size. */ 683 * same as the base page size. */
603 if (mmu_psize_defs[psize].shift == PAGE_SHIFT) 684 if (mmu_huge_psizes[psize] ||
685 mmu_psize_defs[psize].shift == PAGE_SHIFT)
604 return; 686 return;
687 hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT);
605 688
606 HPAGE_SHIFT = mmu_psize_defs[psize].shift; 689 switch (mmu_psize_defs[psize].shift) {
607 mmu_huge_psize = psize;
608
609 switch (HPAGE_SHIFT) {
610 case PAGE_SHIFT_64K: 690 case PAGE_SHIFT_64K:
611 /* We only allow 64k hpages with 4k base page, 691 /* We only allow 64k hpages with 4k base page,
612 * which was checked above, and always put them 692 * which was checked above, and always put them
613 * at the PMD */ 693 * at the PMD */
614 hugepte_shift = PMD_SHIFT; 694 hugepte_shift[psize] = PMD_SHIFT;
615 break; 695 break;
616 case PAGE_SHIFT_16M: 696 case PAGE_SHIFT_16M:
617 /* 16M pages can be at two different levels 697 /* 16M pages can be at two different levels
618 * of pagestables based on base page size */ 698 * of pagestables based on base page size */
619 if (PAGE_SHIFT == PAGE_SHIFT_64K) 699 if (PAGE_SHIFT == PAGE_SHIFT_64K)
620 hugepte_shift = PMD_SHIFT; 700 hugepte_shift[psize] = PMD_SHIFT;
621 else /* 4k base page */ 701 else /* 4k base page */
622 hugepte_shift = PUD_SHIFT; 702 hugepte_shift[psize] = PUD_SHIFT;
623 break; 703 break;
624 case PAGE_SHIFT_16G: 704 case PAGE_SHIFT_16G:
625 /* 16G pages are always at PGD level */ 705 /* 16G pages are always at PGD level */
626 hugepte_shift = PGDIR_SHIFT; 706 hugepte_shift[psize] = PGDIR_SHIFT;
627 break; 707 break;
628 } 708 }
629 hugepte_shift -= HPAGE_SHIFT; 709 hugepte_shift[psize] -= mmu_psize_defs[psize].shift;
630 } else 710 } else
631 HPAGE_SHIFT = 0; 711 hugepte_shift[psize] = 0;
632} 712}
633 713
634static int __init hugepage_setup_sz(char *str) 714static int __init hugepage_setup_sz(char *str)
635{ 715{
636 unsigned long long size; 716 unsigned long long size;
637 int mmu_psize = -1; 717 int mmu_psize;
638 int shift; 718 int shift;
639 719
640 size = memparse(str, &str); 720 size = memparse(str, &str);
641 721
642 shift = __ffs(size); 722 shift = __ffs(size);
643 switch (shift) { 723 mmu_psize = shift_to_mmu_psize(shift);
644#ifndef CONFIG_PPC_64K_PAGES 724 if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift)
645 case PAGE_SHIFT_64K:
646 mmu_psize = MMU_PAGE_64K;
647 break;
648#endif
649 case PAGE_SHIFT_16M:
650 mmu_psize = MMU_PAGE_16M;
651 break;
652 case PAGE_SHIFT_16G:
653 mmu_psize = MMU_PAGE_16G;
654 break;
655 }
656
657 if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift) {
658 set_huge_psize(mmu_psize); 725 set_huge_psize(mmu_psize);
659 hugetlb_add_hstate(shift - PAGE_SHIFT);
660 }
661 else 726 else
662 printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); 727 printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
663 728
@@ -672,16 +737,31 @@ static void zero_ctor(struct kmem_cache *cache, void *addr)
672 737
673static int __init hugetlbpage_init(void) 738static int __init hugetlbpage_init(void)
674{ 739{
740 unsigned int psize;
741
675 if (!cpu_has_feature(CPU_FTR_16M_PAGE)) 742 if (!cpu_has_feature(CPU_FTR_16M_PAGE))
676 return -ENODEV; 743 return -ENODEV;
677 744 /* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE
678 huge_pgtable_cache = kmem_cache_create("hugepte_cache", 745 * and adjust PTE_NONCACHE_NUM if the number of supported huge page
679 HUGEPTE_TABLE_SIZE, 746 * sizes changes.
680 HUGEPTE_TABLE_SIZE, 747 */
681 0, 748 set_huge_psize(MMU_PAGE_16M);
682 zero_ctor); 749 set_huge_psize(MMU_PAGE_64K);
683 if (! huge_pgtable_cache) 750 set_huge_psize(MMU_PAGE_16G);
684 panic("hugetlbpage_init(): could not create hugepte cache\n"); 751
752 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
753 if (mmu_huge_psizes[psize]) {
754 huge_pgtable_cache(psize) = kmem_cache_create(
755 HUGEPTE_CACHE_NAME(psize),
756 HUGEPTE_TABLE_SIZE(psize),
757 HUGEPTE_TABLE_SIZE(psize),
758 0,
759 zero_ctor);
760 if (!huge_pgtable_cache(psize))
761 panic("hugetlbpage_init(): could not create %s"\
762 "\n", HUGEPTE_CACHE_NAME(psize));
763 }
764 }
685 765
686 return 0; 766 return 0;
687} 767}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 6ef63caca682..a41bc5aa2043 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -153,10 +153,10 @@ static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
153}; 153};
154 154
155#ifdef CONFIG_HUGETLB_PAGE 155#ifdef CONFIG_HUGETLB_PAGE
156/* Hugepages need one extra cache, initialized in hugetlbpage.c. We 156/* Hugepages need an extra cache per hugepagesize, initialized in
157 * can't put into the tables above, because HPAGE_SHIFT is not compile 157 * hugetlbpage.c. We can't put into the tables above, because HPAGE_SHIFT
158 * time constant. */ 158 * is not compile time constant. */
159struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1]; 159struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT];
160#else 160#else
161struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; 161struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
162#endif 162#endif
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index a01b5c608ff9..409fcc7b63ce 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -147,7 +147,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
147 */ 147 */
148 if (huge) { 148 if (huge) {
149#ifdef CONFIG_HUGETLB_PAGE 149#ifdef CONFIG_HUGETLB_PAGE
150 psize = mmu_huge_psize; 150 psize = get_slice_psize(mm, addr);;
151#else 151#else
152 BUG(); 152 BUG();
153 psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ 153 psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
diff --git a/include/asm-powerpc/hugetlb.h b/include/asm-powerpc/hugetlb.h
index ca37c4af27b1..26f0d0ab27a5 100644
--- a/include/asm-powerpc/hugetlb.h
+++ b/include/asm-powerpc/hugetlb.h
@@ -24,9 +24,10 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
24static inline int prepare_hugepage_range(struct file *file, 24static inline int prepare_hugepage_range(struct file *file,
25 unsigned long addr, unsigned long len) 25 unsigned long addr, unsigned long len)
26{ 26{
27 if (len & ~HPAGE_MASK) 27 struct hstate *h = hstate_file(file);
28 if (len & ~huge_page_mask(h))
28 return -EINVAL; 29 return -EINVAL;
29 if (addr & ~HPAGE_MASK) 30 if (addr & ~huge_page_mask(h))
30 return -EINVAL; 31 return -EINVAL;
31 return 0; 32 return 0;
32} 33}
diff --git a/include/asm-powerpc/mmu-hash64.h b/include/asm-powerpc/mmu-hash64.h
index b61181aa7746..19c7a9403490 100644
--- a/include/asm-powerpc/mmu-hash64.h
+++ b/include/asm-powerpc/mmu-hash64.h
@@ -194,9 +194,9 @@ extern int mmu_ci_restrictions;
194 194
195#ifdef CONFIG_HUGETLB_PAGE 195#ifdef CONFIG_HUGETLB_PAGE
196/* 196/*
197 * The page size index of the huge pages for use by hugetlbfs 197 * The page size indexes of the huge pages for use by hugetlbfs
198 */ 198 */
199extern int mmu_huge_psize; 199extern unsigned int mmu_huge_psizes[MMU_PAGE_COUNT];
200 200
201#endif /* CONFIG_HUGETLB_PAGE */ 201#endif /* CONFIG_HUGETLB_PAGE */
202 202
diff --git a/include/asm-powerpc/page_64.h b/include/asm-powerpc/page_64.h
index 02fd80710e9d..043bfdfe4f73 100644
--- a/include/asm-powerpc/page_64.h
+++ b/include/asm-powerpc/page_64.h
@@ -90,6 +90,7 @@ extern unsigned int HPAGE_SHIFT;
90#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) 90#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
91#define HPAGE_MASK (~(HPAGE_SIZE - 1)) 91#define HPAGE_MASK (~(HPAGE_SIZE - 1))
92#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 92#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
93#define HUGE_MAX_HSTATE 3
93 94
94#endif /* __ASSEMBLY__ */ 95#endif /* __ASSEMBLY__ */
95 96
diff --git a/include/asm-powerpc/pgalloc-64.h b/include/asm-powerpc/pgalloc-64.h
index 68980990f62a..812a1d8f35cb 100644
--- a/include/asm-powerpc/pgalloc-64.h
+++ b/include/asm-powerpc/pgalloc-64.h
@@ -22,7 +22,7 @@ extern struct kmem_cache *pgtable_cache[];
22#define PUD_CACHE_NUM 1 22#define PUD_CACHE_NUM 1
23#define PMD_CACHE_NUM 1 23#define PMD_CACHE_NUM 1
24#define HUGEPTE_CACHE_NUM 2 24#define HUGEPTE_CACHE_NUM 2
25#define PTE_NONCACHE_NUM 3 /* from GFP rather than kmem_cache */ 25#define PTE_NONCACHE_NUM 7 /* from GFP rather than kmem_cache */
26 26
27static inline pgd_t *pgd_alloc(struct mm_struct *mm) 27static inline pgd_t *pgd_alloc(struct mm_struct *mm)
28{ 28{
@@ -119,7 +119,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
119 __free_page(ptepage); 119 __free_page(ptepage);
120} 120}
121 121
122#define PGF_CACHENUM_MASK 0x3 122#define PGF_CACHENUM_MASK 0x7
123 123
124typedef struct pgtable_free { 124typedef struct pgtable_free {
125 unsigned long val; 125 unsigned long val;