aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/hugetlbpage.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-25 07:08:16 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-25 07:08:16 -0400
commit10a010f6953b5a14ba2f0be40a4fce1bea220875 (patch)
tree19aadf718c796bc7fae0a1a1c970d84d67c541d4 /arch/powerpc/mm/hugetlbpage.c
parent510b37258dfd61693ca6c039865c78bd996e3718 (diff)
parentfb2e405fc1fc8b20d9c78eaa1c7fd5a297efde43 (diff)
Merge branch 'linus' into x86/x2apic
Conflicts: drivers/pci/dmar.c Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r--arch/powerpc/mm/hugetlbpage.c341
1 files changed, 245 insertions, 96 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0d12fba31bc5..fb42c4dd3217 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -24,21 +24,43 @@
24#include <asm/cputable.h> 24#include <asm/cputable.h>
25#include <asm/spu.h> 25#include <asm/spu.h>
26 26
27#define HPAGE_SHIFT_64K 16 27#define PAGE_SHIFT_64K 16
28#define HPAGE_SHIFT_16M 24 28#define PAGE_SHIFT_16M 24
29#define PAGE_SHIFT_16G 34
29 30
30#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) 31#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)
31#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) 32#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
33#define MAX_NUMBER_GPAGES 1024
32 34
33unsigned int hugepte_shift; 35/* Tracks the 16G pages after the device tree is scanned and before the
34#define PTRS_PER_HUGEPTE (1 << hugepte_shift) 36 * huge_boot_pages list is ready. */
35#define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << hugepte_shift) 37static unsigned long gpage_freearray[MAX_NUMBER_GPAGES];
38static unsigned nr_gpages;
36 39
37#define HUGEPD_SHIFT (HPAGE_SHIFT + hugepte_shift) 40/* Array of valid huge page sizes - non-zero value(hugepte_shift) is
38#define HUGEPD_SIZE (1UL << HUGEPD_SHIFT) 41 * stored for the huge page sizes that are valid.
39#define HUGEPD_MASK (~(HUGEPD_SIZE-1)) 42 */
43unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */
44
45#define hugepte_shift mmu_huge_psizes
46#define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize])
47#define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize])
48
49#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \
50 + hugepte_shift[psize])
51#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize))
52#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1))
53
54/* Subtract one from array size because we don't need a cache for 4K since
55 * is not a huge page size */
56#define huge_pgtable_cache(psize) (pgtable_cache[HUGEPTE_CACHE_NUM \
57 + psize-1])
58#define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize])
40 59
41#define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM]) 60static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = {
61 "unused_4K", "hugepte_cache_64K", "unused_64K_AP",
62 "hugepte_cache_1M", "hugepte_cache_16M", "hugepte_cache_16G"
63};
42 64
43/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() 65/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad()
44 * will choke on pointers to hugepte tables, which is handy for 66 * will choke on pointers to hugepte tables, which is handy for
@@ -49,24 +71,49 @@ typedef struct { unsigned long pd; } hugepd_t;
49 71
50#define hugepd_none(hpd) ((hpd).pd == 0) 72#define hugepd_none(hpd) ((hpd).pd == 0)
51 73
74static inline int shift_to_mmu_psize(unsigned int shift)
75{
76 switch (shift) {
77#ifndef CONFIG_PPC_64K_PAGES
78 case PAGE_SHIFT_64K:
79 return MMU_PAGE_64K;
80#endif
81 case PAGE_SHIFT_16M:
82 return MMU_PAGE_16M;
83 case PAGE_SHIFT_16G:
84 return MMU_PAGE_16G;
85 }
86 return -1;
87}
88
89static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
90{
91 if (mmu_psize_defs[mmu_psize].shift)
92 return mmu_psize_defs[mmu_psize].shift;
93 BUG();
94}
95
52static inline pte_t *hugepd_page(hugepd_t hpd) 96static inline pte_t *hugepd_page(hugepd_t hpd)
53{ 97{
54 BUG_ON(!(hpd.pd & HUGEPD_OK)); 98 BUG_ON(!(hpd.pd & HUGEPD_OK));
55 return (pte_t *)(hpd.pd & ~HUGEPD_OK); 99 return (pte_t *)(hpd.pd & ~HUGEPD_OK);
56} 100}
57 101
58static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr) 102static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
103 struct hstate *hstate)
59{ 104{
60 unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1)); 105 unsigned int shift = huge_page_shift(hstate);
106 int psize = shift_to_mmu_psize(shift);
107 unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1));
61 pte_t *dir = hugepd_page(*hpdp); 108 pte_t *dir = hugepd_page(*hpdp);
62 109
63 return dir + idx; 110 return dir + idx;
64} 111}
65 112
66static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 113static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
67 unsigned long address) 114 unsigned long address, unsigned int psize)
68{ 115{
69 pte_t *new = kmem_cache_alloc(huge_pgtable_cache, 116 pte_t *new = kmem_cache_alloc(huge_pgtable_cache(psize),
70 GFP_KERNEL|__GFP_REPEAT); 117 GFP_KERNEL|__GFP_REPEAT);
71 118
72 if (! new) 119 if (! new)
@@ -74,7 +121,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
74 121
75 spin_lock(&mm->page_table_lock); 122 spin_lock(&mm->page_table_lock);
76 if (!hugepd_none(*hpdp)) 123 if (!hugepd_none(*hpdp))
77 kmem_cache_free(huge_pgtable_cache, new); 124 kmem_cache_free(huge_pgtable_cache(psize), new);
78 else 125 else
79 hpdp->pd = (unsigned long)new | HUGEPD_OK; 126 hpdp->pd = (unsigned long)new | HUGEPD_OK;
80 spin_unlock(&mm->page_table_lock); 127 spin_unlock(&mm->page_table_lock);
@@ -83,27 +130,60 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
83 130
84/* Base page size affects how we walk hugetlb page tables */ 131/* Base page size affects how we walk hugetlb page tables */
85#ifdef CONFIG_PPC_64K_PAGES 132#ifdef CONFIG_PPC_64K_PAGES
86#define hpmd_offset(pud, addr) pmd_offset(pud, addr) 133#define hpmd_offset(pud, addr, h) pmd_offset(pud, addr)
87#define hpmd_alloc(mm, pud, addr) pmd_alloc(mm, pud, addr) 134#define hpmd_alloc(mm, pud, addr, h) pmd_alloc(mm, pud, addr)
88#else 135#else
89static inline 136static inline
90pmd_t *hpmd_offset(pud_t *pud, unsigned long addr) 137pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate)
91{ 138{
92 if (HPAGE_SHIFT == HPAGE_SHIFT_64K) 139 if (huge_page_shift(hstate) == PAGE_SHIFT_64K)
93 return pmd_offset(pud, addr); 140 return pmd_offset(pud, addr);
94 else 141 else
95 return (pmd_t *) pud; 142 return (pmd_t *) pud;
96} 143}
97static inline 144static inline
98pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr) 145pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr,
146 struct hstate *hstate)
99{ 147{
100 if (HPAGE_SHIFT == HPAGE_SHIFT_64K) 148 if (huge_page_shift(hstate) == PAGE_SHIFT_64K)
101 return pmd_alloc(mm, pud, addr); 149 return pmd_alloc(mm, pud, addr);
102 else 150 else
103 return (pmd_t *) pud; 151 return (pmd_t *) pud;
104} 152}
105#endif 153#endif
106 154
155/* Build list of addresses of gigantic pages. This function is used in early
156 * boot before the buddy or bootmem allocator is setup.
157 */
158void add_gpage(unsigned long addr, unsigned long page_size,
159 unsigned long number_of_pages)
160{
161 if (!addr)
162 return;
163 while (number_of_pages > 0) {
164 gpage_freearray[nr_gpages] = addr;
165 nr_gpages++;
166 number_of_pages--;
167 addr += page_size;
168 }
169}
170
171/* Moves the gigantic page addresses from the temporary list to the
172 * huge_boot_pages list.
173 */
174int alloc_bootmem_huge_page(struct hstate *hstate)
175{
176 struct huge_bootmem_page *m;
177 if (nr_gpages == 0)
178 return 0;
179 m = phys_to_virt(gpage_freearray[--nr_gpages]);
180 gpage_freearray[nr_gpages] = 0;
181 list_add(&m->list, &huge_boot_pages);
182 m->hstate = hstate;
183 return 1;
184}
185
186
107/* Modelled after find_linux_pte() */ 187/* Modelled after find_linux_pte() */
108pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 188pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
109{ 189{
@@ -111,39 +191,52 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
111 pud_t *pu; 191 pud_t *pu;
112 pmd_t *pm; 192 pmd_t *pm;
113 193
114 BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); 194 unsigned int psize;
195 unsigned int shift;
196 unsigned long sz;
197 struct hstate *hstate;
198 psize = get_slice_psize(mm, addr);
199 shift = mmu_psize_to_shift(psize);
200 sz = ((1UL) << shift);
201 hstate = size_to_hstate(sz);
115 202
116 addr &= HPAGE_MASK; 203 addr &= hstate->mask;
117 204
118 pg = pgd_offset(mm, addr); 205 pg = pgd_offset(mm, addr);
119 if (!pgd_none(*pg)) { 206 if (!pgd_none(*pg)) {
120 pu = pud_offset(pg, addr); 207 pu = pud_offset(pg, addr);
121 if (!pud_none(*pu)) { 208 if (!pud_none(*pu)) {
122 pm = hpmd_offset(pu, addr); 209 pm = hpmd_offset(pu, addr, hstate);
123 if (!pmd_none(*pm)) 210 if (!pmd_none(*pm))
124 return hugepte_offset((hugepd_t *)pm, addr); 211 return hugepte_offset((hugepd_t *)pm, addr,
212 hstate);
125 } 213 }
126 } 214 }
127 215
128 return NULL; 216 return NULL;
129} 217}
130 218
131pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 219pte_t *huge_pte_alloc(struct mm_struct *mm,
220 unsigned long addr, unsigned long sz)
132{ 221{
133 pgd_t *pg; 222 pgd_t *pg;
134 pud_t *pu; 223 pud_t *pu;
135 pmd_t *pm; 224 pmd_t *pm;
136 hugepd_t *hpdp = NULL; 225 hugepd_t *hpdp = NULL;
226 struct hstate *hstate;
227 unsigned int psize;
228 hstate = size_to_hstate(sz);
137 229
138 BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); 230 psize = get_slice_psize(mm, addr);
231 BUG_ON(!mmu_huge_psizes[psize]);
139 232
140 addr &= HPAGE_MASK; 233 addr &= hstate->mask;
141 234
142 pg = pgd_offset(mm, addr); 235 pg = pgd_offset(mm, addr);
143 pu = pud_alloc(mm, pg, addr); 236 pu = pud_alloc(mm, pg, addr);
144 237
145 if (pu) { 238 if (pu) {
146 pm = hpmd_alloc(mm, pu, addr); 239 pm = hpmd_alloc(mm, pu, addr, hstate);
147 if (pm) 240 if (pm)
148 hpdp = (hugepd_t *)pm; 241 hpdp = (hugepd_t *)pm;
149 } 242 }
@@ -151,10 +244,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
151 if (! hpdp) 244 if (! hpdp)
152 return NULL; 245 return NULL;
153 246
154 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr)) 247 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize))
155 return NULL; 248 return NULL;
156 249
157 return hugepte_offset(hpdp, addr); 250 return hugepte_offset(hpdp, addr, hstate);
158} 251}
159 252
160int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 253int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
@@ -162,19 +255,22 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
162 return 0; 255 return 0;
163} 256}
164 257
165static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) 258static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp,
259 unsigned int psize)
166{ 260{
167 pte_t *hugepte = hugepd_page(*hpdp); 261 pte_t *hugepte = hugepd_page(*hpdp);
168 262
169 hpdp->pd = 0; 263 hpdp->pd = 0;
170 tlb->need_flush = 1; 264 tlb->need_flush = 1;
171 pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM, 265 pgtable_free_tlb(tlb, pgtable_free_cache(hugepte,
266 HUGEPTE_CACHE_NUM+psize-1,
172 PGF_CACHENUM_MASK)); 267 PGF_CACHENUM_MASK));
173} 268}
174 269
175static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 270static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
176 unsigned long addr, unsigned long end, 271 unsigned long addr, unsigned long end,
177 unsigned long floor, unsigned long ceiling) 272 unsigned long floor, unsigned long ceiling,
273 unsigned int psize)
178{ 274{
179 pmd_t *pmd; 275 pmd_t *pmd;
180 unsigned long next; 276 unsigned long next;
@@ -186,7 +282,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
186 next = pmd_addr_end(addr, end); 282 next = pmd_addr_end(addr, end);
187 if (pmd_none(*pmd)) 283 if (pmd_none(*pmd))
188 continue; 284 continue;
189 free_hugepte_range(tlb, (hugepd_t *)pmd); 285 free_hugepte_range(tlb, (hugepd_t *)pmd, psize);
190 } while (pmd++, addr = next, addr != end); 286 } while (pmd++, addr = next, addr != end);
191 287
192 start &= PUD_MASK; 288 start &= PUD_MASK;
@@ -212,6 +308,9 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
212 pud_t *pud; 308 pud_t *pud;
213 unsigned long next; 309 unsigned long next;
214 unsigned long start; 310 unsigned long start;
311 unsigned int shift;
312 unsigned int psize = get_slice_psize(tlb->mm, addr);
313 shift = mmu_psize_to_shift(psize);
215 314
216 start = addr; 315 start = addr;
217 pud = pud_offset(pgd, addr); 316 pud = pud_offset(pgd, addr);
@@ -220,16 +319,18 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
220#ifdef CONFIG_PPC_64K_PAGES 319#ifdef CONFIG_PPC_64K_PAGES
221 if (pud_none_or_clear_bad(pud)) 320 if (pud_none_or_clear_bad(pud))
222 continue; 321 continue;
223 hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); 322 hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling,
323 psize);
224#else 324#else
225 if (HPAGE_SHIFT == HPAGE_SHIFT_64K) { 325 if (shift == PAGE_SHIFT_64K) {
226 if (pud_none_or_clear_bad(pud)) 326 if (pud_none_or_clear_bad(pud))
227 continue; 327 continue;
228 hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); 328 hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
329 ceiling, psize);
229 } else { 330 } else {
230 if (pud_none(*pud)) 331 if (pud_none(*pud))
231 continue; 332 continue;
232 free_hugepte_range(tlb, (hugepd_t *)pud); 333 free_hugepte_range(tlb, (hugepd_t *)pud, psize);
233 } 334 }
234#endif 335#endif
235 } while (pud++, addr = next, addr != end); 336 } while (pud++, addr = next, addr != end);
@@ -255,7 +356,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
255 * 356 *
256 * Must be called with pagetable lock held. 357 * Must be called with pagetable lock held.
257 */ 358 */
258void hugetlb_free_pgd_range(struct mmu_gather **tlb, 359void hugetlb_free_pgd_range(struct mmu_gather *tlb,
259 unsigned long addr, unsigned long end, 360 unsigned long addr, unsigned long end,
260 unsigned long floor, unsigned long ceiling) 361 unsigned long floor, unsigned long ceiling)
261{ 362{
@@ -297,31 +398,33 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb,
297 * now has no other vmas using it, so can be freed, we don't 398 * now has no other vmas using it, so can be freed, we don't
298 * bother to round floor or end up - the tests don't need that. 399 * bother to round floor or end up - the tests don't need that.
299 */ 400 */
401 unsigned int psize = get_slice_psize(tlb->mm, addr);
300 402
301 addr &= HUGEPD_MASK; 403 addr &= HUGEPD_MASK(psize);
302 if (addr < floor) { 404 if (addr < floor) {
303 addr += HUGEPD_SIZE; 405 addr += HUGEPD_SIZE(psize);
304 if (!addr) 406 if (!addr)
305 return; 407 return;
306 } 408 }
307 if (ceiling) { 409 if (ceiling) {
308 ceiling &= HUGEPD_MASK; 410 ceiling &= HUGEPD_MASK(psize);
309 if (!ceiling) 411 if (!ceiling)
310 return; 412 return;
311 } 413 }
312 if (end - 1 > ceiling - 1) 414 if (end - 1 > ceiling - 1)
313 end -= HUGEPD_SIZE; 415 end -= HUGEPD_SIZE(psize);
314 if (addr > end - 1) 416 if (addr > end - 1)
315 return; 417 return;
316 418
317 start = addr; 419 start = addr;
318 pgd = pgd_offset((*tlb)->mm, addr); 420 pgd = pgd_offset(tlb->mm, addr);
319 do { 421 do {
320 BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize); 422 psize = get_slice_psize(tlb->mm, addr);
423 BUG_ON(!mmu_huge_psizes[psize]);
321 next = pgd_addr_end(addr, end); 424 next = pgd_addr_end(addr, end);
322 if (pgd_none_or_clear_bad(pgd)) 425 if (pgd_none_or_clear_bad(pgd))
323 continue; 426 continue;
324 hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling); 427 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
325 } while (pgd++, addr = next, addr != end); 428 } while (pgd++, addr = next, addr != end);
326} 429}
327 430
@@ -334,7 +437,11 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
334 * necessary anymore if we make hpte_need_flush() get the 437 * necessary anymore if we make hpte_need_flush() get the
335 * page size from the slices 438 * page size from the slices
336 */ 439 */
337 pte_update(mm, addr & HPAGE_MASK, ptep, ~0UL, 1); 440 unsigned int psize = get_slice_psize(mm, addr);
441 unsigned int shift = mmu_psize_to_shift(psize);
442 unsigned long sz = ((1UL) << shift);
443 struct hstate *hstate = size_to_hstate(sz);
444 pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1);
338 } 445 }
339 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 446 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
340} 447}
@@ -351,14 +458,19 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
351{ 458{
352 pte_t *ptep; 459 pte_t *ptep;
353 struct page *page; 460 struct page *page;
461 unsigned int mmu_psize = get_slice_psize(mm, address);
354 462
355 if (get_slice_psize(mm, address) != mmu_huge_psize) 463 /* Verify it is a huge page else bail. */
464 if (!mmu_huge_psizes[mmu_psize])
356 return ERR_PTR(-EINVAL); 465 return ERR_PTR(-EINVAL);
357 466
358 ptep = huge_pte_offset(mm, address); 467 ptep = huge_pte_offset(mm, address);
359 page = pte_page(*ptep); 468 page = pte_page(*ptep);
360 if (page) 469 if (page) {
361 page += (address % HPAGE_SIZE) / PAGE_SIZE; 470 unsigned int shift = mmu_psize_to_shift(mmu_psize);
471 unsigned long sz = ((1UL) << shift);
472 page += (address % sz) / PAGE_SIZE;
473 }
362 474
363 return page; 475 return page;
364} 476}
@@ -368,6 +480,11 @@ int pmd_huge(pmd_t pmd)
368 return 0; 480 return 0;
369} 481}
370 482
483int pud_huge(pud_t pud)
484{
485 return 0;
486}
487
371struct page * 488struct page *
372follow_huge_pmd(struct mm_struct *mm, unsigned long address, 489follow_huge_pmd(struct mm_struct *mm, unsigned long address,
373 pmd_t *pmd, int write) 490 pmd_t *pmd, int write)
@@ -381,15 +498,16 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
381 unsigned long len, unsigned long pgoff, 498 unsigned long len, unsigned long pgoff,
382 unsigned long flags) 499 unsigned long flags)
383{ 500{
384 return slice_get_unmapped_area(addr, len, flags, 501 struct hstate *hstate = hstate_file(file);
385 mmu_huge_psize, 1, 0); 502 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
503 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
386} 504}
387 505
388/* 506/*
389 * Called by asm hashtable.S for doing lazy icache flush 507 * Called by asm hashtable.S for doing lazy icache flush
390 */ 508 */
391static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, 509static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
392 pte_t pte, int trap) 510 pte_t pte, int trap, unsigned long sz)
393{ 511{
394 struct page *page; 512 struct page *page;
395 int i; 513 int i;
@@ -402,7 +520,7 @@ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
402 /* page is dirty */ 520 /* page is dirty */
403 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { 521 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
404 if (trap == 0x400) { 522 if (trap == 0x400) {
405 for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) 523 for (i = 0; i < (sz / PAGE_SIZE); i++)
406 __flush_dcache_icache(page_address(page+i)); 524 __flush_dcache_icache(page_address(page+i));
407 set_bit(PG_arch_1, &page->flags); 525 set_bit(PG_arch_1, &page->flags);
408 } else { 526 } else {
@@ -418,11 +536,16 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
418{ 536{
419 pte_t *ptep; 537 pte_t *ptep;
420 unsigned long old_pte, new_pte; 538 unsigned long old_pte, new_pte;
421 unsigned long va, rflags, pa; 539 unsigned long va, rflags, pa, sz;
422 long slot; 540 long slot;
423 int err = 1; 541 int err = 1;
424 int ssize = user_segment_size(ea); 542 int ssize = user_segment_size(ea);
543 unsigned int mmu_psize;
544 int shift;
545 mmu_psize = get_slice_psize(mm, ea);
425 546
547 if (!mmu_huge_psizes[mmu_psize])
548 goto out;
426 ptep = huge_pte_offset(mm, ea); 549 ptep = huge_pte_offset(mm, ea);
427 550
428 /* Search the Linux page table for a match with va */ 551 /* Search the Linux page table for a match with va */
@@ -465,30 +588,32 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
465 rflags = 0x2 | (!(new_pte & _PAGE_RW)); 588 rflags = 0x2 | (!(new_pte & _PAGE_RW));
466 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ 589 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
467 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); 590 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
591 shift = mmu_psize_to_shift(mmu_psize);
592 sz = ((1UL) << shift);
468 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) 593 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
469 /* No CPU has hugepages but lacks no execute, so we 594 /* No CPU has hugepages but lacks no execute, so we
470 * don't need to worry about that case */ 595 * don't need to worry about that case */
471 rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), 596 rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte),
472 trap); 597 trap, sz);
473 598
474 /* Check if pte already has an hpte (case 2) */ 599 /* Check if pte already has an hpte (case 2) */
475 if (unlikely(old_pte & _PAGE_HASHPTE)) { 600 if (unlikely(old_pte & _PAGE_HASHPTE)) {
476 /* There MIGHT be an HPTE for this pte */ 601 /* There MIGHT be an HPTE for this pte */
477 unsigned long hash, slot; 602 unsigned long hash, slot;
478 603
479 hash = hpt_hash(va, HPAGE_SHIFT, ssize); 604 hash = hpt_hash(va, shift, ssize);
480 if (old_pte & _PAGE_F_SECOND) 605 if (old_pte & _PAGE_F_SECOND)
481 hash = ~hash; 606 hash = ~hash;
482 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 607 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
483 slot += (old_pte & _PAGE_F_GIX) >> 12; 608 slot += (old_pte & _PAGE_F_GIX) >> 12;
484 609
485 if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, 610 if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize,
486 ssize, local) == -1) 611 ssize, local) == -1)
487 old_pte &= ~_PAGE_HPTEFLAGS; 612 old_pte &= ~_PAGE_HPTEFLAGS;
488 } 613 }
489 614
490 if (likely(!(old_pte & _PAGE_HASHPTE))) { 615 if (likely(!(old_pte & _PAGE_HASHPTE))) {
491 unsigned long hash = hpt_hash(va, HPAGE_SHIFT, ssize); 616 unsigned long hash = hpt_hash(va, shift, ssize);
492 unsigned long hpte_group; 617 unsigned long hpte_group;
493 618
494 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; 619 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
@@ -509,7 +634,7 @@ repeat:
509 634
510 /* Insert into the hash table, primary slot */ 635 /* Insert into the hash table, primary slot */
511 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, 636 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
512 mmu_huge_psize, ssize); 637 mmu_psize, ssize);
513 638
514 /* Primary is full, try the secondary */ 639 /* Primary is full, try the secondary */
515 if (unlikely(slot == -1)) { 640 if (unlikely(slot == -1)) {
@@ -517,7 +642,7 @@ repeat:
517 HPTES_PER_GROUP) & ~0x7UL; 642 HPTES_PER_GROUP) & ~0x7UL;
518 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 643 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
519 HPTE_V_SECONDARY, 644 HPTE_V_SECONDARY,
520 mmu_huge_psize, ssize); 645 mmu_psize, ssize);
521 if (slot == -1) { 646 if (slot == -1) {
522 if (mftb() & 0x1) 647 if (mftb() & 0x1)
523 hpte_group = ((hash & htab_hash_mask) * 648 hpte_group = ((hash & htab_hash_mask) *
@@ -549,45 +674,54 @@ void set_huge_psize(int psize)
549{ 674{
550 /* Check that it is a page size supported by the hardware and 675 /* Check that it is a page size supported by the hardware and
551 * that it fits within pagetable limits. */ 676 * that it fits within pagetable limits. */
552 if (mmu_psize_defs[psize].shift && mmu_psize_defs[psize].shift < SID_SHIFT && 677 if (mmu_psize_defs[psize].shift &&
678 mmu_psize_defs[psize].shift < SID_SHIFT_1T &&
553 (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT || 679 (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT ||
554 mmu_psize_defs[psize].shift == HPAGE_SHIFT_64K)) { 680 mmu_psize_defs[psize].shift == PAGE_SHIFT_64K ||
555 HPAGE_SHIFT = mmu_psize_defs[psize].shift; 681 mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) {
556 mmu_huge_psize = psize; 682 /* Return if huge page size has already been setup or is the
557#ifdef CONFIG_PPC_64K_PAGES 683 * same as the base page size. */
558 hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT); 684 if (mmu_huge_psizes[psize] ||
559#else 685 mmu_psize_defs[psize].shift == PAGE_SHIFT)
560 if (HPAGE_SHIFT == HPAGE_SHIFT_64K) 686 return;
561 hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT); 687 hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT);
562 else 688
563 hugepte_shift = (PUD_SHIFT-HPAGE_SHIFT); 689 switch (mmu_psize_defs[psize].shift) {
564#endif 690 case PAGE_SHIFT_64K:
565 691 /* We only allow 64k hpages with 4k base page,
692 * which was checked above, and always put them
693 * at the PMD */
694 hugepte_shift[psize] = PMD_SHIFT;
695 break;
696 case PAGE_SHIFT_16M:
697 /* 16M pages can be at two different levels
698 * of pagestables based on base page size */
699 if (PAGE_SHIFT == PAGE_SHIFT_64K)
700 hugepte_shift[psize] = PMD_SHIFT;
701 else /* 4k base page */
702 hugepte_shift[psize] = PUD_SHIFT;
703 break;
704 case PAGE_SHIFT_16G:
705 /* 16G pages are always at PGD level */
706 hugepte_shift[psize] = PGDIR_SHIFT;
707 break;
708 }
709 hugepte_shift[psize] -= mmu_psize_defs[psize].shift;
566 } else 710 } else
567 HPAGE_SHIFT = 0; 711 hugepte_shift[psize] = 0;
568} 712}
569 713
570static int __init hugepage_setup_sz(char *str) 714static int __init hugepage_setup_sz(char *str)
571{ 715{
572 unsigned long long size; 716 unsigned long long size;
573 int mmu_psize = -1; 717 int mmu_psize;
574 int shift; 718 int shift;
575 719
576 size = memparse(str, &str); 720 size = memparse(str, &str);
577 721
578 shift = __ffs(size); 722 shift = __ffs(size);
579 switch (shift) { 723 mmu_psize = shift_to_mmu_psize(shift);
580#ifndef CONFIG_PPC_64K_PAGES 724 if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift)
581 case HPAGE_SHIFT_64K:
582 mmu_psize = MMU_PAGE_64K;
583 break;
584#endif
585 case HPAGE_SHIFT_16M:
586 mmu_psize = MMU_PAGE_16M;
587 break;
588 }
589
590 if (mmu_psize >=0 && mmu_psize_defs[mmu_psize].shift)
591 set_huge_psize(mmu_psize); 725 set_huge_psize(mmu_psize);
592 else 726 else
593 printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); 727 printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
@@ -603,16 +737,31 @@ static void zero_ctor(struct kmem_cache *cache, void *addr)
603 737
604static int __init hugetlbpage_init(void) 738static int __init hugetlbpage_init(void)
605{ 739{
740 unsigned int psize;
741
606 if (!cpu_has_feature(CPU_FTR_16M_PAGE)) 742 if (!cpu_has_feature(CPU_FTR_16M_PAGE))
607 return -ENODEV; 743 return -ENODEV;
608 744 /* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE
609 huge_pgtable_cache = kmem_cache_create("hugepte_cache", 745 * and adjust PTE_NONCACHE_NUM if the number of supported huge page
610 HUGEPTE_TABLE_SIZE, 746 * sizes changes.
611 HUGEPTE_TABLE_SIZE, 747 */
612 0, 748 set_huge_psize(MMU_PAGE_16M);
613 zero_ctor); 749 set_huge_psize(MMU_PAGE_64K);
614 if (! huge_pgtable_cache) 750 set_huge_psize(MMU_PAGE_16G);
615 panic("hugetlbpage_init(): could not create hugepte cache\n"); 751
752 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
753 if (mmu_huge_psizes[psize]) {
754 huge_pgtable_cache(psize) = kmem_cache_create(
755 HUGEPTE_CACHE_NAME(psize),
756 HUGEPTE_TABLE_SIZE(psize),
757 HUGEPTE_TABLE_SIZE(psize),
758 0,
759 zero_ctor);
760 if (!huge_pgtable_cache(psize))
761 panic("hugetlbpage_init(): could not create %s"\
762 "\n", HUGEPTE_CACHE_NAME(psize));
763 }
764 }
616 765
617 return 0; 766 return 0;
618} 767}