aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/hugetlbpage.c
diff options
context:
space:
mode:
authorBecky Bruce <beckyb@kernel.crashing.org>2011-06-28 05:54:48 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2011-09-19 19:19:40 -0400
commit41151e77a4d96ea138cede6d84c955aa4769ce74 (patch)
tree2d997b77b9adf406a2fd30326bff688577d2e64f /arch/powerpc/mm/hugetlbpage.c
parent7df5659eefad9b6d457ccdee016bd78bd064cfc0 (diff)
powerpc: Hugetlb for BookE
Enable hugepages on Freescale BookE processors. This allows the kernel to use huge TLB entries to map pages, which can greatly reduce the number of TLB misses and the amount of TLB thrashing experienced by applications with large memory footprints. Care should be taken when using this on FSL processors, as the number of large TLB entries supported by the core is low (16-64) on current processors. The supported set of hugepage sizes include 4m, 16m, 64m, 256m, and 1g. Page sizes larger than the max zone size are called "gigantic" pages and must be allocated on the command line (and cannot be deallocated). This is currently only fully implemented for Freescale 32-bit BookE processors, but there is some infrastructure in the code for 64-bit BooKE. Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org> Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r--arch/powerpc/mm/hugetlbpage.c379
1 files changed, 337 insertions, 42 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0b9a5c1901b9..3a5f59dcbb33 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -1,7 +1,8 @@
1/* 1/*
2 * PPC64 (POWER4) Huge TLB Page Support for Kernel. 2 * PPC Huge TLB Page Support for Kernel.
3 * 3 *
4 * Copyright (C) 2003 David Gibson, IBM Corporation. 4 * Copyright (C) 2003 David Gibson, IBM Corporation.
5 * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
5 * 6 *
6 * Based on the IA-32 version: 7 * Based on the IA-32 version:
7 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> 8 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
@@ -11,24 +12,39 @@
11#include <linux/io.h> 12#include <linux/io.h>
12#include <linux/slab.h> 13#include <linux/slab.h>
13#include <linux/hugetlb.h> 14#include <linux/hugetlb.h>
15#include <linux/of_fdt.h>
16#include <linux/memblock.h>
17#include <linux/bootmem.h>
14#include <asm/pgtable.h> 18#include <asm/pgtable.h>
15#include <asm/pgalloc.h> 19#include <asm/pgalloc.h>
16#include <asm/tlb.h> 20#include <asm/tlb.h>
21#include <asm/setup.h>
17 22
18#define PAGE_SHIFT_64K 16 23#define PAGE_SHIFT_64K 16
19#define PAGE_SHIFT_16M 24 24#define PAGE_SHIFT_16M 24
20#define PAGE_SHIFT_16G 34 25#define PAGE_SHIFT_16G 34
21 26
22#define MAX_NUMBER_GPAGES 1024 27unsigned int HPAGE_SHIFT;
23 28
24/* Tracks the 16G pages after the device tree is scanned and before the 29/*
25 * huge_boot_pages list is ready. */ 30 * Tracks gpages after the device tree is scanned and before the
26static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; 31 * huge_boot_pages list is ready. On 64-bit implementations, this is
32 * just used to track 16G pages and so is a single array. 32-bit
33 * implementations may have more than one gpage size due to limitations
34 * of the memory allocators, so we need multiple arrays
35 */
36#ifdef CONFIG_PPC64
37#define MAX_NUMBER_GPAGES 1024
38static u64 gpage_freearray[MAX_NUMBER_GPAGES];
27static unsigned nr_gpages; 39static unsigned nr_gpages;
28 40#else
29/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() 41#define MAX_NUMBER_GPAGES 128
30 * will choke on pointers to hugepte tables, which is handy for 42struct psize_gpages {
31 * catching screwups early. */ 43 u64 gpage_list[MAX_NUMBER_GPAGES];
44 unsigned int nr_gpages;
45};
46static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT];
47#endif
32 48
33static inline int shift_to_mmu_psize(unsigned int shift) 49static inline int shift_to_mmu_psize(unsigned int shift)
34{ 50{
@@ -49,25 +65,6 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
49 65
50#define hugepd_none(hpd) ((hpd).pd == 0) 66#define hugepd_none(hpd) ((hpd).pd == 0)
51 67
52static inline pte_t *hugepd_page(hugepd_t hpd)
53{
54 BUG_ON(!hugepd_ok(hpd));
55 return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000);
56}
57
58static inline unsigned int hugepd_shift(hugepd_t hpd)
59{
60 return hpd.pd & HUGEPD_SHIFT_MASK;
61}
62
63static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift)
64{
65 unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
66 pte_t *dir = hugepd_page(*hpdp);
67
68 return dir + idx;
69}
70
71pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) 68pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
72{ 69{
73 pgd_t *pg; 70 pgd_t *pg;
@@ -93,7 +90,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
93 if (is_hugepd(pm)) 90 if (is_hugepd(pm))
94 hpdp = (hugepd_t *)pm; 91 hpdp = (hugepd_t *)pm;
95 else if (!pmd_none(*pm)) { 92 else if (!pmd_none(*pm)) {
96 return pte_offset_map(pm, ea); 93 return pte_offset_kernel(pm, ea);
97 } 94 }
98 } 95 }
99 } 96 }
@@ -114,8 +111,18 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
114static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 111static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
115 unsigned long address, unsigned pdshift, unsigned pshift) 112 unsigned long address, unsigned pdshift, unsigned pshift)
116{ 113{
117 pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift), 114 struct kmem_cache *cachep;
118 GFP_KERNEL|__GFP_REPEAT); 115 pte_t *new;
116
117#ifdef CONFIG_PPC64
118 cachep = PGT_CACHE(pdshift - pshift);
119#else
120 int i;
121 int num_hugepd = 1 << (pshift - pdshift);
122 cachep = hugepte_cache;
123#endif
124
125 new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
119 126
120 BUG_ON(pshift > HUGEPD_SHIFT_MASK); 127 BUG_ON(pshift > HUGEPD_SHIFT_MASK);
121 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); 128 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
@@ -124,10 +131,31 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
124 return -ENOMEM; 131 return -ENOMEM;
125 132
126 spin_lock(&mm->page_table_lock); 133 spin_lock(&mm->page_table_lock);
134#ifdef CONFIG_PPC64
127 if (!hugepd_none(*hpdp)) 135 if (!hugepd_none(*hpdp))
128 kmem_cache_free(PGT_CACHE(pdshift - pshift), new); 136 kmem_cache_free(cachep, new);
129 else 137 else
130 hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift; 138 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
139#else
140 /*
141 * We have multiple higher-level entries that point to the same
142 * actual pte location. Fill in each as we go and backtrack on error.
143 * We need all of these so the DTLB pgtable walk code can find the
144 * right higher-level entry without knowing if it's a hugepage or not.
145 */
146 for (i = 0; i < num_hugepd; i++, hpdp++) {
147 if (unlikely(!hugepd_none(*hpdp)))
148 break;
149 else
150 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
151 }
152 /* If we bailed from the for loop early, an error occurred, clean up */
153 if (i < num_hugepd) {
154 for (i = i - 1 ; i >= 0; i--, hpdp--)
155 hpdp->pd = 0;
156 kmem_cache_free(cachep, new);
157 }
158#endif
131 spin_unlock(&mm->page_table_lock); 159 spin_unlock(&mm->page_table_lock);
132 return 0; 160 return 0;
133} 161}
@@ -169,11 +197,132 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
169 return hugepte_offset(hpdp, addr, pdshift); 197 return hugepte_offset(hpdp, addr, pdshift);
170} 198}
171 199
200#ifdef CONFIG_PPC32
172/* Build list of addresses of gigantic pages. This function is used in early 201/* Build list of addresses of gigantic pages. This function is used in early
173 * boot before the buddy or bootmem allocator is setup. 202 * boot before the buddy or bootmem allocator is setup.
174 */ 203 */
175void add_gpage(unsigned long addr, unsigned long page_size, 204void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
176 unsigned long number_of_pages) 205{
206 unsigned int idx = shift_to_mmu_psize(__ffs(page_size));
207 int i;
208
209 if (addr == 0)
210 return;
211
212 gpage_freearray[idx].nr_gpages = number_of_pages;
213
214 for (i = 0; i < number_of_pages; i++) {
215 gpage_freearray[idx].gpage_list[i] = addr;
216 addr += page_size;
217 }
218}
219
220/*
221 * Moves the gigantic page addresses from the temporary list to the
222 * huge_boot_pages list.
223 */
224int alloc_bootmem_huge_page(struct hstate *hstate)
225{
226 struct huge_bootmem_page *m;
227 int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT);
228 int nr_gpages = gpage_freearray[idx].nr_gpages;
229
230 if (nr_gpages == 0)
231 return 0;
232
233#ifdef CONFIG_HIGHMEM
234 /*
235 * If gpages can be in highmem we can't use the trick of storing the
236 * data structure in the page; allocate space for this
237 */
238 m = alloc_bootmem(sizeof(struct huge_bootmem_page));
239 m->phys = gpage_freearray[idx].gpage_list[--nr_gpages];
240#else
241 m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]);
242#endif
243
244 list_add(&m->list, &huge_boot_pages);
245 gpage_freearray[idx].nr_gpages = nr_gpages;
246 gpage_freearray[idx].gpage_list[nr_gpages] = 0;
247 m->hstate = hstate;
248
249 return 1;
250}
251/*
252 * Scan the command line hugepagesz= options for gigantic pages; store those in
253 * a list that we use to allocate the memory once all options are parsed.
254 */
255
256unsigned long gpage_npages[MMU_PAGE_COUNT];
257
258static int __init do_gpage_early_setup(char *param, char *val)
259{
260 static phys_addr_t size;
261 unsigned long npages;
262
263 /*
264 * The hugepagesz and hugepages cmdline options are interleaved. We
265 * use the size variable to keep track of whether or not this was done
266 * properly and skip over instances where it is incorrect. Other
267 * command-line parsing code will issue warnings, so we don't need to.
268 *
269 */
270 if ((strcmp(param, "default_hugepagesz") == 0) ||
271 (strcmp(param, "hugepagesz") == 0)) {
272 size = memparse(val, NULL);
273 } else if (strcmp(param, "hugepages") == 0) {
274 if (size != 0) {
275 if (sscanf(val, "%lu", &npages) <= 0)
276 npages = 0;
277 gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages;
278 size = 0;
279 }
280 }
281 return 0;
282}
283
284
285/*
286 * This function allocates physical space for pages that are larger than the
287 * buddy allocator can handle. We want to allocate these in highmem because
288 * the amount of lowmem is limited. This means that this function MUST be
289 * called before lowmem_end_addr is set up in MMU_init() in order for the lmb
290 * allocate to grab highmem.
291 */
292void __init reserve_hugetlb_gpages(void)
293{
294 static __initdata char cmdline[COMMAND_LINE_SIZE];
295 phys_addr_t size, base;
296 int i;
297
298 strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
299 parse_args("hugetlb gpages", cmdline, NULL, 0, &do_gpage_early_setup);
300
301 /*
302 * Walk gpage list in reverse, allocating larger page sizes first.
303 * Skip over unsupported sizes, or sizes that have 0 gpages allocated.
304 * When we reach the point in the list where pages are no longer
305 * considered gpages, we're done.
306 */
307 for (i = MMU_PAGE_COUNT-1; i >= 0; i--) {
308 if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0)
309 continue;
310 else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT))
311 break;
312
313 size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i));
314 base = memblock_alloc_base(size * gpage_npages[i], size,
315 MEMBLOCK_ALLOC_ANYWHERE);
316 add_gpage(base, size, gpage_npages[i]);
317 }
318}
319
320#else /* PPC64 */
321
322/* Build list of addresses of gigantic pages. This function is used in early
323 * boot before the buddy or bootmem allocator is setup.
324 */
325void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
177{ 326{
178 if (!addr) 327 if (!addr)
179 return; 328 return;
@@ -199,19 +348,79 @@ int alloc_bootmem_huge_page(struct hstate *hstate)
199 m->hstate = hstate; 348 m->hstate = hstate;
200 return 1; 349 return 1;
201} 350}
351#endif
202 352
203int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 353int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
204{ 354{
205 return 0; 355 return 0;
206} 356}
207 357
358#ifdef CONFIG_PPC32
359#define HUGEPD_FREELIST_SIZE \
360 ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
361
362struct hugepd_freelist {
363 struct rcu_head rcu;
364 unsigned int index;
365 void *ptes[0];
366};
367
368static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);
369
370static void hugepd_free_rcu_callback(struct rcu_head *head)
371{
372 struct hugepd_freelist *batch =
373 container_of(head, struct hugepd_freelist, rcu);
374 unsigned int i;
375
376 for (i = 0; i < batch->index; i++)
377 kmem_cache_free(hugepte_cache, batch->ptes[i]);
378
379 free_page((unsigned long)batch);
380}
381
382static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
383{
384 struct hugepd_freelist **batchp;
385
386 batchp = &__get_cpu_var(hugepd_freelist_cur);
387
388 if (atomic_read(&tlb->mm->mm_users) < 2 ||
389 cpumask_equal(mm_cpumask(tlb->mm),
390 cpumask_of(smp_processor_id()))) {
391 kmem_cache_free(hugepte_cache, hugepte);
392 return;
393 }
394
395 if (*batchp == NULL) {
396 *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC);
397 (*batchp)->index = 0;
398 }
399
400 (*batchp)->ptes[(*batchp)->index++] = hugepte;
401 if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
402 call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
403 *batchp = NULL;
404 }
405}
406#endif
407
208static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, 408static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
209 unsigned long start, unsigned long end, 409 unsigned long start, unsigned long end,
210 unsigned long floor, unsigned long ceiling) 410 unsigned long floor, unsigned long ceiling)
211{ 411{
212 pte_t *hugepte = hugepd_page(*hpdp); 412 pte_t *hugepte = hugepd_page(*hpdp);
213 unsigned shift = hugepd_shift(*hpdp); 413 int i;
414
214 unsigned long pdmask = ~((1UL << pdshift) - 1); 415 unsigned long pdmask = ~((1UL << pdshift) - 1);
416 unsigned int num_hugepd = 1;
417
418#ifdef CONFIG_PPC64
419 unsigned int shift = hugepd_shift(*hpdp);
420#else
421 /* Note: On 32-bit the hpdp may be the first of several */
422 num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift));
423#endif
215 424
216 start &= pdmask; 425 start &= pdmask;
217 if (start < floor) 426 if (start < floor)
@@ -224,9 +433,15 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
224 if (end - 1 > ceiling - 1) 433 if (end - 1 > ceiling - 1)
225 return; 434 return;
226 435
227 hpdp->pd = 0; 436 for (i = 0; i < num_hugepd; i++, hpdp++)
437 hpdp->pd = 0;
438
228 tlb->need_flush = 1; 439 tlb->need_flush = 1;
440#ifdef CONFIG_PPC64
229 pgtable_free_tlb(tlb, hugepte, pdshift - shift); 441 pgtable_free_tlb(tlb, hugepte, pdshift - shift);
442#else
443 hugepd_free(tlb, hugepte);
444#endif
230} 445}
231 446
232static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 447static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -331,18 +546,27 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
331 * too. 546 * too.
332 */ 547 */
333 548
334 pgd = pgd_offset(tlb->mm, addr);
335 do { 549 do {
336 next = pgd_addr_end(addr, end); 550 next = pgd_addr_end(addr, end);
551 pgd = pgd_offset(tlb->mm, addr);
337 if (!is_hugepd(pgd)) { 552 if (!is_hugepd(pgd)) {
338 if (pgd_none_or_clear_bad(pgd)) 553 if (pgd_none_or_clear_bad(pgd))
339 continue; 554 continue;
340 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); 555 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
341 } else { 556 } else {
557#ifdef CONFIG_PPC32
558 /*
559 * Increment next by the size of the huge mapping since
560 * on 32-bit there may be more than one entry at the pgd
561 * level for a single hugepage, but all of them point to
562 * the same kmem cache that holds the hugepte.
563 */
564 next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
565#endif
342 free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, 566 free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
343 addr, next, floor, ceiling); 567 addr, next, floor, ceiling);
344 } 568 }
345 } while (pgd++, addr = next, addr != end); 569 } while (addr = next, addr != end);
346} 570}
347 571
348struct page * 572struct page *
@@ -466,17 +690,35 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
466 unsigned long len, unsigned long pgoff, 690 unsigned long len, unsigned long pgoff,
467 unsigned long flags) 691 unsigned long flags)
468{ 692{
693#ifdef CONFIG_MM_SLICES
469 struct hstate *hstate = hstate_file(file); 694 struct hstate *hstate = hstate_file(file);
470 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); 695 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
471 696
472 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); 697 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
698#else
699 return get_unmapped_area(file, addr, len, pgoff, flags);
700#endif
473} 701}
474 702
475unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 703unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
476{ 704{
705#ifdef CONFIG_MM_SLICES
477 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); 706 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
478 707
479 return 1UL << mmu_psize_to_shift(psize); 708 return 1UL << mmu_psize_to_shift(psize);
709#else
710 if (!is_vm_hugetlb_page(vma))
711 return PAGE_SIZE;
712
713 return huge_page_size(hstate_vma(vma));
714#endif
715}
716
717static inline bool is_power_of_4(unsigned long x)
718{
719 if (is_power_of_2(x))
720 return (__ilog2(x) % 2) ? false : true;
721 return false;
480} 722}
481 723
482static int __init add_huge_page_size(unsigned long long size) 724static int __init add_huge_page_size(unsigned long long size)
@@ -486,9 +728,14 @@ static int __init add_huge_page_size(unsigned long long size)
486 728
487 /* Check that it is a page size supported by the hardware and 729 /* Check that it is a page size supported by the hardware and
488 * that it fits within pagetable and slice limits. */ 730 * that it fits within pagetable and slice limits. */
731#ifdef CONFIG_PPC_FSL_BOOK3E
732 if ((size < PAGE_SIZE) || !is_power_of_4(size))
733 return -EINVAL;
734#else
489 if (!is_power_of_2(size) 735 if (!is_power_of_2(size)
490 || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) 736 || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
491 return -EINVAL; 737 return -EINVAL;
738#endif
492 739
493 if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) 740 if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
494 return -EINVAL; 741 return -EINVAL;
@@ -525,6 +772,46 @@ static int __init hugepage_setup_sz(char *str)
525} 772}
526__setup("hugepagesz=", hugepage_setup_sz); 773__setup("hugepagesz=", hugepage_setup_sz);
527 774
775#ifdef CONFIG_FSL_BOOKE
776struct kmem_cache *hugepte_cache;
777static int __init hugetlbpage_init(void)
778{
779 int psize;
780
781 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
782 unsigned shift;
783
784 if (!mmu_psize_defs[psize].shift)
785 continue;
786
787 shift = mmu_psize_to_shift(psize);
788
789 /* Don't treat normal page sizes as huge... */
790 if (shift != PAGE_SHIFT)
791 if (add_huge_page_size(1ULL << shift) < 0)
792 continue;
793 }
794
795 /*
796 * Create a kmem cache for hugeptes. The bottom bits in the pte have
797 * size information encoded in them, so align them to allow this
798 */
799 hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t),
800 HUGEPD_SHIFT_MASK + 1, 0, NULL);
801 if (hugepte_cache == NULL)
802 panic("%s: Unable to create kmem cache for hugeptes\n",
803 __func__);
804
805 /* Default hpage size = 4M */
806 if (mmu_psize_defs[MMU_PAGE_4M].shift)
807 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
808 else
809 panic("%s: Unable to set default huge page size\n", __func__);
810
811
812 return 0;
813}
814#else
528static int __init hugetlbpage_init(void) 815static int __init hugetlbpage_init(void)
529{ 816{
530 int psize; 817 int psize;
@@ -567,15 +854,23 @@ static int __init hugetlbpage_init(void)
567 854
568 return 0; 855 return 0;
569} 856}
570 857#endif
571module_init(hugetlbpage_init); 858module_init(hugetlbpage_init);
572 859
573void flush_dcache_icache_hugepage(struct page *page) 860void flush_dcache_icache_hugepage(struct page *page)
574{ 861{
575 int i; 862 int i;
863 void *start;
576 864
577 BUG_ON(!PageCompound(page)); 865 BUG_ON(!PageCompound(page));
578 866
579 for (i = 0; i < (1UL << compound_order(page)); i++) 867 for (i = 0; i < (1UL << compound_order(page)); i++) {
580 __flush_dcache_icache(page_address(page+i)); 868 if (!PageHighMem(page)) {
869 __flush_dcache_icache(page_address(page+i));
870 } else {
871 start = kmap_atomic(page+i, KM_PPC_SYNC_ICACHE);
872 __flush_dcache_icache(start);
873 kunmap_atomic(start, KM_PPC_SYNC_ICACHE);
874 }
875 }
581} 876}