aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/hugetlbpage.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-11-28 12:46:22 -0500
committerTejun Heo <tj@kernel.org>2011-11-28 12:46:22 -0500
commitd4bbf7e7759afc172e2bfbc5c416324590049cdd (patch)
tree7eab5ee5481cd3dcf1162329fec827177640018a /arch/powerpc/mm/hugetlbpage.c
parenta150439c4a97db379f0ed6faa46fbbb6e7bf3cb2 (diff)
parent401d0069cb344f401bc9d264c31db55876ff78c0 (diff)
Merge branch 'master' into x86/memblock
Conflicts & resolutions: * arch/x86/xen/setup.c dc91c728fd "xen: allow extra memory to be in multiple regions" 24aa07882b "memblock, x86: Replace memblock_x86_reserve/free..." conflicted on xen_add_extra_mem() updates. The resolution is trivial as the latter just want to replace memblock_x86_reserve_range() with memblock_reserve(). * drivers/pci/intel-iommu.c 166e9278a3f "x86/ia64: intel-iommu: move to drivers/iommu/" 5dfe8660a3d "bootmem: Replace work_with_active_regions() with..." conflicted as the former moved the file under drivers/iommu/. Resolved by applying the chnages from the latter on the moved file. * mm/Kconfig 6661672053a "memblock: add NO_BOOTMEM config symbol" c378ddd53f9 "memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option" conflicted trivially. Both added config options. Just letting both add their own options resolves the conflict. * mm/memblock.c d1f0ece6cdc "mm/memblock.c: small function definition fixes" ed7b56a799c "memblock: Remove memblock_memory_can_coalesce()" confliected. The former updates function removed by the latter. Resolution is trivial. Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r--arch/powerpc/mm/hugetlbpage.c401
1 files changed, 354 insertions, 47 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0b9a5c1901b9..8558b572e55d 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -1,7 +1,8 @@
1/* 1/*
2 * PPC64 (POWER4) Huge TLB Page Support for Kernel. 2 * PPC Huge TLB Page Support for Kernel.
3 * 3 *
4 * Copyright (C) 2003 David Gibson, IBM Corporation. 4 * Copyright (C) 2003 David Gibson, IBM Corporation.
5 * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
5 * 6 *
6 * Based on the IA-32 version: 7 * Based on the IA-32 version:
7 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> 8 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
@@ -11,24 +12,40 @@
11#include <linux/io.h> 12#include <linux/io.h>
12#include <linux/slab.h> 13#include <linux/slab.h>
13#include <linux/hugetlb.h> 14#include <linux/hugetlb.h>
15#include <linux/of_fdt.h>
16#include <linux/memblock.h>
17#include <linux/bootmem.h>
18#include <linux/moduleparam.h>
14#include <asm/pgtable.h> 19#include <asm/pgtable.h>
15#include <asm/pgalloc.h> 20#include <asm/pgalloc.h>
16#include <asm/tlb.h> 21#include <asm/tlb.h>
22#include <asm/setup.h>
17 23
18#define PAGE_SHIFT_64K 16 24#define PAGE_SHIFT_64K 16
19#define PAGE_SHIFT_16M 24 25#define PAGE_SHIFT_16M 24
20#define PAGE_SHIFT_16G 34 26#define PAGE_SHIFT_16G 34
21 27
22#define MAX_NUMBER_GPAGES 1024 28unsigned int HPAGE_SHIFT;
23 29
24/* Tracks the 16G pages after the device tree is scanned and before the 30/*
25 * huge_boot_pages list is ready. */ 31 * Tracks gpages after the device tree is scanned and before the
26static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; 32 * huge_boot_pages list is ready. On 64-bit implementations, this is
33 * just used to track 16G pages and so is a single array. 32-bit
34 * implementations may have more than one gpage size due to limitations
35 * of the memory allocators, so we need multiple arrays
36 */
37#ifdef CONFIG_PPC64
38#define MAX_NUMBER_GPAGES 1024
39static u64 gpage_freearray[MAX_NUMBER_GPAGES];
27static unsigned nr_gpages; 40static unsigned nr_gpages;
28 41#else
29/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() 42#define MAX_NUMBER_GPAGES 128
30 * will choke on pointers to hugepte tables, which is handy for 43struct psize_gpages {
31 * catching screwups early. */ 44 u64 gpage_list[MAX_NUMBER_GPAGES];
45 unsigned int nr_gpages;
46};
47static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT];
48#endif
32 49
33static inline int shift_to_mmu_psize(unsigned int shift) 50static inline int shift_to_mmu_psize(unsigned int shift)
34{ 51{
@@ -49,25 +66,6 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
49 66
50#define hugepd_none(hpd) ((hpd).pd == 0) 67#define hugepd_none(hpd) ((hpd).pd == 0)
51 68
52static inline pte_t *hugepd_page(hugepd_t hpd)
53{
54 BUG_ON(!hugepd_ok(hpd));
55 return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000);
56}
57
58static inline unsigned int hugepd_shift(hugepd_t hpd)
59{
60 return hpd.pd & HUGEPD_SHIFT_MASK;
61}
62
63static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift)
64{
65 unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
66 pte_t *dir = hugepd_page(*hpdp);
67
68 return dir + idx;
69}
70
71pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) 69pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
72{ 70{
73 pgd_t *pg; 71 pgd_t *pg;
@@ -93,7 +91,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
93 if (is_hugepd(pm)) 91 if (is_hugepd(pm))
94 hpdp = (hugepd_t *)pm; 92 hpdp = (hugepd_t *)pm;
95 else if (!pmd_none(*pm)) { 93 else if (!pmd_none(*pm)) {
96 return pte_offset_map(pm, ea); 94 return pte_offset_kernel(pm, ea);
97 } 95 }
98 } 96 }
99 } 97 }
@@ -114,8 +112,18 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
114static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 112static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
115 unsigned long address, unsigned pdshift, unsigned pshift) 113 unsigned long address, unsigned pdshift, unsigned pshift)
116{ 114{
117 pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift), 115 struct kmem_cache *cachep;
118 GFP_KERNEL|__GFP_REPEAT); 116 pte_t *new;
117
118#ifdef CONFIG_PPC64
119 cachep = PGT_CACHE(pdshift - pshift);
120#else
121 int i;
122 int num_hugepd = 1 << (pshift - pdshift);
123 cachep = hugepte_cache;
124#endif
125
126 new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
119 127
120 BUG_ON(pshift > HUGEPD_SHIFT_MASK); 128 BUG_ON(pshift > HUGEPD_SHIFT_MASK);
121 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); 129 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
@@ -124,10 +132,31 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
124 return -ENOMEM; 132 return -ENOMEM;
125 133
126 spin_lock(&mm->page_table_lock); 134 spin_lock(&mm->page_table_lock);
135#ifdef CONFIG_PPC64
127 if (!hugepd_none(*hpdp)) 136 if (!hugepd_none(*hpdp))
128 kmem_cache_free(PGT_CACHE(pdshift - pshift), new); 137 kmem_cache_free(cachep, new);
129 else 138 else
130 hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift; 139 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
140#else
141 /*
142 * We have multiple higher-level entries that point to the same
143 * actual pte location. Fill in each as we go and backtrack on error.
144 * We need all of these so the DTLB pgtable walk code can find the
145 * right higher-level entry without knowing if it's a hugepage or not.
146 */
147 for (i = 0; i < num_hugepd; i++, hpdp++) {
148 if (unlikely(!hugepd_none(*hpdp)))
149 break;
150 else
151 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
152 }
153 /* If we bailed from the for loop early, an error occurred, clean up */
154 if (i < num_hugepd) {
155 for (i = i - 1 ; i >= 0; i--, hpdp--)
156 hpdp->pd = 0;
157 kmem_cache_free(cachep, new);
158 }
159#endif
131 spin_unlock(&mm->page_table_lock); 160 spin_unlock(&mm->page_table_lock);
132 return 0; 161 return 0;
133} 162}
@@ -169,11 +198,132 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
169 return hugepte_offset(hpdp, addr, pdshift); 198 return hugepte_offset(hpdp, addr, pdshift);
170} 199}
171 200
201#ifdef CONFIG_PPC32
202/* Build list of addresses of gigantic pages. This function is used in early
203 * boot before the buddy or bootmem allocator is setup.
204 */
205void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
206{
207 unsigned int idx = shift_to_mmu_psize(__ffs(page_size));
208 int i;
209
210 if (addr == 0)
211 return;
212
213 gpage_freearray[idx].nr_gpages = number_of_pages;
214
215 for (i = 0; i < number_of_pages; i++) {
216 gpage_freearray[idx].gpage_list[i] = addr;
217 addr += page_size;
218 }
219}
220
221/*
222 * Moves the gigantic page addresses from the temporary list to the
223 * huge_boot_pages list.
224 */
225int alloc_bootmem_huge_page(struct hstate *hstate)
226{
227 struct huge_bootmem_page *m;
228 int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT);
229 int nr_gpages = gpage_freearray[idx].nr_gpages;
230
231 if (nr_gpages == 0)
232 return 0;
233
234#ifdef CONFIG_HIGHMEM
235 /*
236 * If gpages can be in highmem we can't use the trick of storing the
237 * data structure in the page; allocate space for this
238 */
239 m = alloc_bootmem(sizeof(struct huge_bootmem_page));
240 m->phys = gpage_freearray[idx].gpage_list[--nr_gpages];
241#else
242 m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]);
243#endif
244
245 list_add(&m->list, &huge_boot_pages);
246 gpage_freearray[idx].nr_gpages = nr_gpages;
247 gpage_freearray[idx].gpage_list[nr_gpages] = 0;
248 m->hstate = hstate;
249
250 return 1;
251}
252/*
253 * Scan the command line hugepagesz= options for gigantic pages; store those in
254 * a list that we use to allocate the memory once all options are parsed.
255 */
256
257unsigned long gpage_npages[MMU_PAGE_COUNT];
258
259static int __init do_gpage_early_setup(char *param, char *val)
260{
261 static phys_addr_t size;
262 unsigned long npages;
263
264 /*
265 * The hugepagesz and hugepages cmdline options are interleaved. We
266 * use the size variable to keep track of whether or not this was done
267 * properly and skip over instances where it is incorrect. Other
268 * command-line parsing code will issue warnings, so we don't need to.
269 *
270 */
271 if ((strcmp(param, "default_hugepagesz") == 0) ||
272 (strcmp(param, "hugepagesz") == 0)) {
273 size = memparse(val, NULL);
274 } else if (strcmp(param, "hugepages") == 0) {
275 if (size != 0) {
276 if (sscanf(val, "%lu", &npages) <= 0)
277 npages = 0;
278 gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages;
279 size = 0;
280 }
281 }
282 return 0;
283}
284
285
286/*
287 * This function allocates physical space for pages that are larger than the
288 * buddy allocator can handle. We want to allocate these in highmem because
289 * the amount of lowmem is limited. This means that this function MUST be
290 * called before lowmem_end_addr is set up in MMU_init() in order for the lmb
291 * allocate to grab highmem.
292 */
293void __init reserve_hugetlb_gpages(void)
294{
295 static __initdata char cmdline[COMMAND_LINE_SIZE];
296 phys_addr_t size, base;
297 int i;
298
299 strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
300 parse_args("hugetlb gpages", cmdline, NULL, 0, &do_gpage_early_setup);
301
302 /*
303 * Walk gpage list in reverse, allocating larger page sizes first.
304 * Skip over unsupported sizes, or sizes that have 0 gpages allocated.
305 * When we reach the point in the list where pages are no longer
306 * considered gpages, we're done.
307 */
308 for (i = MMU_PAGE_COUNT-1; i >= 0; i--) {
309 if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0)
310 continue;
311 else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT))
312 break;
313
314 size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i));
315 base = memblock_alloc_base(size * gpage_npages[i], size,
316 MEMBLOCK_ALLOC_ANYWHERE);
317 add_gpage(base, size, gpage_npages[i]);
318 }
319}
320
321#else /* PPC64 */
322
172/* Build list of addresses of gigantic pages. This function is used in early 323/* Build list of addresses of gigantic pages. This function is used in early
173 * boot before the buddy or bootmem allocator is setup. 324 * boot before the buddy or bootmem allocator is setup.
174 */ 325 */
175void add_gpage(unsigned long addr, unsigned long page_size, 326void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
176 unsigned long number_of_pages)
177{ 327{
178 if (!addr) 328 if (!addr)
179 return; 329 return;
@@ -199,19 +349,79 @@ int alloc_bootmem_huge_page(struct hstate *hstate)
199 m->hstate = hstate; 349 m->hstate = hstate;
200 return 1; 350 return 1;
201} 351}
352#endif
202 353
203int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 354int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
204{ 355{
205 return 0; 356 return 0;
206} 357}
207 358
359#ifdef CONFIG_PPC32
360#define HUGEPD_FREELIST_SIZE \
361 ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
362
363struct hugepd_freelist {
364 struct rcu_head rcu;
365 unsigned int index;
366 void *ptes[0];
367};
368
369static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);
370
371static void hugepd_free_rcu_callback(struct rcu_head *head)
372{
373 struct hugepd_freelist *batch =
374 container_of(head, struct hugepd_freelist, rcu);
375 unsigned int i;
376
377 for (i = 0; i < batch->index; i++)
378 kmem_cache_free(hugepte_cache, batch->ptes[i]);
379
380 free_page((unsigned long)batch);
381}
382
383static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
384{
385 struct hugepd_freelist **batchp;
386
387 batchp = &__get_cpu_var(hugepd_freelist_cur);
388
389 if (atomic_read(&tlb->mm->mm_users) < 2 ||
390 cpumask_equal(mm_cpumask(tlb->mm),
391 cpumask_of(smp_processor_id()))) {
392 kmem_cache_free(hugepte_cache, hugepte);
393 return;
394 }
395
396 if (*batchp == NULL) {
397 *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC);
398 (*batchp)->index = 0;
399 }
400
401 (*batchp)->ptes[(*batchp)->index++] = hugepte;
402 if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
403 call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
404 *batchp = NULL;
405 }
406}
407#endif
408
208static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, 409static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
209 unsigned long start, unsigned long end, 410 unsigned long start, unsigned long end,
210 unsigned long floor, unsigned long ceiling) 411 unsigned long floor, unsigned long ceiling)
211{ 412{
212 pte_t *hugepte = hugepd_page(*hpdp); 413 pte_t *hugepte = hugepd_page(*hpdp);
213 unsigned shift = hugepd_shift(*hpdp); 414 int i;
415
214 unsigned long pdmask = ~((1UL << pdshift) - 1); 416 unsigned long pdmask = ~((1UL << pdshift) - 1);
417 unsigned int num_hugepd = 1;
418
419#ifdef CONFIG_PPC64
420 unsigned int shift = hugepd_shift(*hpdp);
421#else
422 /* Note: On 32-bit the hpdp may be the first of several */
423 num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift));
424#endif
215 425
216 start &= pdmask; 426 start &= pdmask;
217 if (start < floor) 427 if (start < floor)
@@ -224,9 +434,15 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
224 if (end - 1 > ceiling - 1) 434 if (end - 1 > ceiling - 1)
225 return; 435 return;
226 436
227 hpdp->pd = 0; 437 for (i = 0; i < num_hugepd; i++, hpdp++)
438 hpdp->pd = 0;
439
228 tlb->need_flush = 1; 440 tlb->need_flush = 1;
441#ifdef CONFIG_PPC64
229 pgtable_free_tlb(tlb, hugepte, pdshift - shift); 442 pgtable_free_tlb(tlb, hugepte, pdshift - shift);
443#else
444 hugepd_free(tlb, hugepte);
445#endif
230} 446}
231 447
232static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 448static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -331,18 +547,27 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
331 * too. 547 * too.
332 */ 548 */
333 549
334 pgd = pgd_offset(tlb->mm, addr);
335 do { 550 do {
336 next = pgd_addr_end(addr, end); 551 next = pgd_addr_end(addr, end);
552 pgd = pgd_offset(tlb->mm, addr);
337 if (!is_hugepd(pgd)) { 553 if (!is_hugepd(pgd)) {
338 if (pgd_none_or_clear_bad(pgd)) 554 if (pgd_none_or_clear_bad(pgd))
339 continue; 555 continue;
340 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); 556 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
341 } else { 557 } else {
558#ifdef CONFIG_PPC32
559 /*
560 * Increment next by the size of the huge mapping since
561 * on 32-bit there may be more than one entry at the pgd
562 * level for a single hugepage, but all of them point to
563 * the same kmem cache that holds the hugepte.
564 */
565 next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
566#endif
342 free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, 567 free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
343 addr, next, floor, ceiling); 568 addr, next, floor, ceiling);
344 } 569 }
345 } while (pgd++, addr = next, addr != end); 570 } while (addr = next, addr != end);
346} 571}
347 572
348struct page * 573struct page *
@@ -390,7 +615,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add
390{ 615{
391 unsigned long mask; 616 unsigned long mask;
392 unsigned long pte_end; 617 unsigned long pte_end;
393 struct page *head, *page; 618 struct page *head, *page, *tail;
394 pte_t pte; 619 pte_t pte;
395 int refs; 620 int refs;
396 621
@@ -413,6 +638,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add
413 head = pte_page(pte); 638 head = pte_page(pte);
414 639
415 page = head + ((addr & (sz-1)) >> PAGE_SHIFT); 640 page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
641 tail = page;
416 do { 642 do {
417 VM_BUG_ON(compound_head(page) != head); 643 VM_BUG_ON(compound_head(page) != head);
418 pages[*nr] = page; 644 pages[*nr] = page;
@@ -428,10 +654,20 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add
428 654
429 if (unlikely(pte_val(pte) != pte_val(*ptep))) { 655 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
430 /* Could be optimized better */ 656 /* Could be optimized better */
431 while (*nr) { 657 *nr -= refs;
432 put_page(page); 658 while (refs--)
433 (*nr)--; 659 put_page(head);
434 } 660 return 0;
661 }
662
663 /*
664 * Any tail page need their mapcount reference taken before we
665 * return.
666 */
667 while (refs--) {
668 if (PageTail(tail))
669 get_huge_page_tail(tail);
670 tail++;
435 } 671 }
436 672
437 return 1; 673 return 1;
@@ -466,17 +702,35 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
466 unsigned long len, unsigned long pgoff, 702 unsigned long len, unsigned long pgoff,
467 unsigned long flags) 703 unsigned long flags)
468{ 704{
705#ifdef CONFIG_PPC_MM_SLICES
469 struct hstate *hstate = hstate_file(file); 706 struct hstate *hstate = hstate_file(file);
470 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); 707 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
471 708
472 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); 709 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
710#else
711 return get_unmapped_area(file, addr, len, pgoff, flags);
712#endif
473} 713}
474 714
475unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 715unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
476{ 716{
717#ifdef CONFIG_PPC_MM_SLICES
477 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); 718 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
478 719
479 return 1UL << mmu_psize_to_shift(psize); 720 return 1UL << mmu_psize_to_shift(psize);
721#else
722 if (!is_vm_hugetlb_page(vma))
723 return PAGE_SIZE;
724
725 return huge_page_size(hstate_vma(vma));
726#endif
727}
728
729static inline bool is_power_of_4(unsigned long x)
730{
731 if (is_power_of_2(x))
732 return (__ilog2(x) % 2) ? false : true;
733 return false;
480} 734}
481 735
482static int __init add_huge_page_size(unsigned long long size) 736static int __init add_huge_page_size(unsigned long long size)
@@ -486,9 +740,14 @@ static int __init add_huge_page_size(unsigned long long size)
486 740
487 /* Check that it is a page size supported by the hardware and 741 /* Check that it is a page size supported by the hardware and
488 * that it fits within pagetable and slice limits. */ 742 * that it fits within pagetable and slice limits. */
743#ifdef CONFIG_PPC_FSL_BOOK3E
744 if ((size < PAGE_SIZE) || !is_power_of_4(size))
745 return -EINVAL;
746#else
489 if (!is_power_of_2(size) 747 if (!is_power_of_2(size)
490 || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) 748 || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
491 return -EINVAL; 749 return -EINVAL;
750#endif
492 751
493 if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) 752 if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
494 return -EINVAL; 753 return -EINVAL;
@@ -525,6 +784,46 @@ static int __init hugepage_setup_sz(char *str)
525} 784}
526__setup("hugepagesz=", hugepage_setup_sz); 785__setup("hugepagesz=", hugepage_setup_sz);
527 786
787#ifdef CONFIG_FSL_BOOKE
788struct kmem_cache *hugepte_cache;
789static int __init hugetlbpage_init(void)
790{
791 int psize;
792
793 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
794 unsigned shift;
795
796 if (!mmu_psize_defs[psize].shift)
797 continue;
798
799 shift = mmu_psize_to_shift(psize);
800
801 /* Don't treat normal page sizes as huge... */
802 if (shift != PAGE_SHIFT)
803 if (add_huge_page_size(1ULL << shift) < 0)
804 continue;
805 }
806
807 /*
808 * Create a kmem cache for hugeptes. The bottom bits in the pte have
809 * size information encoded in them, so align them to allow this
810 */
811 hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t),
812 HUGEPD_SHIFT_MASK + 1, 0, NULL);
813 if (hugepte_cache == NULL)
814 panic("%s: Unable to create kmem cache for hugeptes\n",
815 __func__);
816
817 /* Default hpage size = 4M */
818 if (mmu_psize_defs[MMU_PAGE_4M].shift)
819 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
820 else
821 panic("%s: Unable to set default huge page size\n", __func__);
822
823
824 return 0;
825}
826#else
528static int __init hugetlbpage_init(void) 827static int __init hugetlbpage_init(void)
529{ 828{
530 int psize; 829 int psize;
@@ -567,15 +866,23 @@ static int __init hugetlbpage_init(void)
567 866
568 return 0; 867 return 0;
569} 868}
570 869#endif
571module_init(hugetlbpage_init); 870module_init(hugetlbpage_init);
572 871
573void flush_dcache_icache_hugepage(struct page *page) 872void flush_dcache_icache_hugepage(struct page *page)
574{ 873{
575 int i; 874 int i;
875 void *start;
576 876
577 BUG_ON(!PageCompound(page)); 877 BUG_ON(!PageCompound(page));
578 878
579 for (i = 0; i < (1UL << compound_order(page)); i++) 879 for (i = 0; i < (1UL << compound_order(page)); i++) {
580 __flush_dcache_icache(page_address(page+i)); 880 if (!PageHighMem(page)) {
881 __flush_dcache_icache(page_address(page+i));
882 } else {
883 start = kmap_atomic(page+i, KM_PPC_SYNC_ICACHE);
884 __flush_dcache_icache(start);
885 kunmap_atomic(start, KM_PPC_SYNC_ICACHE);
886 }
887 }
581} 888}