aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorBecky Bruce <beckyb@kernel.crashing.org>2011-06-28 05:54:48 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2011-09-19 19:19:40 -0400
commit41151e77a4d96ea138cede6d84c955aa4769ce74 (patch)
tree2d997b77b9adf406a2fd30326bff688577d2e64f /arch/powerpc
parent7df5659eefad9b6d457ccdee016bd78bd064cfc0 (diff)
powerpc: Hugetlb for BookE
Enable hugepages on Freescale BookE processors. This allows the kernel to use huge TLB entries to map pages, which can greatly reduce the number of TLB misses and the amount of TLB thrashing experienced by applications with large memory footprints. Care should be taken when using this on FSL processors, as the number of large TLB entries supported by the core is low (16-64) on current processors. The supported set of hugepage sizes include 4m, 16m, 64m, 256m, and 1g. Page sizes larger than the max zone size are called "gigantic" pages and must be allocated on the command line (and cannot be deallocated). This is currently only fully implemented for Freescale 32-bit BookE processors, but there is some infrastructure in the code for 64-bit BooKE. Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org> Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig3
-rw-r--r--arch/powerpc/include/asm/hugetlb.h63
-rw-r--r--arch/powerpc/include/asm/mmu-book3e.h7
-rw-r--r--arch/powerpc/include/asm/mmu-hash64.h3
-rw-r--r--arch/powerpc/include/asm/mmu.h18
-rw-r--r--arch/powerpc/include/asm/page.h31
-rw-r--r--arch/powerpc/include/asm/page_64.h11
-rw-r--r--arch/powerpc/include/asm/pte-book3e.h3
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S133
-rw-r--r--arch/powerpc/mm/Makefile1
-rw-r--r--arch/powerpc/mm/hash_utils_64.c3
-rw-r--r--arch/powerpc/mm/hugetlbpage-book3e.c121
-rw-r--r--arch/powerpc/mm/hugetlbpage.c379
-rw-r--r--arch/powerpc/mm/init_32.c9
-rw-r--r--arch/powerpc/mm/mem.c5
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c5
-rw-r--r--arch/powerpc/mm/pgtable.c3
-rw-r--r--arch/powerpc/mm/tlb_low_64e.S24
-rw-r--r--arch/powerpc/mm/tlb_nohash.c46
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype4
20 files changed, 766 insertions, 106 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 0a3d5560c9be..62711421cd64 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -429,8 +429,7 @@ config ARCH_POPULATES_NODE_MAP
429 def_bool y 429 def_bool y
430 430
431config SYS_SUPPORTS_HUGETLBFS 431config SYS_SUPPORTS_HUGETLBFS
432 def_bool y 432 bool
433 depends on PPC_BOOK3S_64
434 433
435source "mm/Kconfig" 434source "mm/Kconfig"
436 435
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 5856a66ab404..86004930a78e 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -1,15 +1,60 @@
1#ifndef _ASM_POWERPC_HUGETLB_H 1#ifndef _ASM_POWERPC_HUGETLB_H
2#define _ASM_POWERPC_HUGETLB_H 2#define _ASM_POWERPC_HUGETLB_H
3 3
4#ifdef CONFIG_HUGETLB_PAGE
4#include <asm/page.h> 5#include <asm/page.h>
5 6
7extern struct kmem_cache *hugepte_cache;
8extern void __init reserve_hugetlb_gpages(void);
9
10static inline pte_t *hugepd_page(hugepd_t hpd)
11{
12 BUG_ON(!hugepd_ok(hpd));
13 return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | PD_HUGE);
14}
15
16static inline unsigned int hugepd_shift(hugepd_t hpd)
17{
18 return hpd.pd & HUGEPD_SHIFT_MASK;
19}
20
21static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
22 unsigned pdshift)
23{
24 /*
25 * On 32-bit, we have multiple higher-level table entries that point to
26 * the same hugepte. Just use the first one since they're all
27 * identical. So for that case, idx=0.
28 */
29 unsigned long idx = 0;
30
31 pte_t *dir = hugepd_page(*hpdp);
32#ifdef CONFIG_PPC64
33 idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
34#endif
35
36 return dir + idx;
37}
38
6pte_t *huge_pte_offset_and_shift(struct mm_struct *mm, 39pte_t *huge_pte_offset_and_shift(struct mm_struct *mm,
7 unsigned long addr, unsigned *shift); 40 unsigned long addr, unsigned *shift);
8 41
9void flush_dcache_icache_hugepage(struct page *page); 42void flush_dcache_icache_hugepage(struct page *page);
10 43
44#if defined(CONFIG_PPC_MM_SLICES) || defined(CONFIG_PPC_SUBPAGE_PROT)
11int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, 45int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
12 unsigned long len); 46 unsigned long len);
47#else
48static inline int is_hugepage_only_range(struct mm_struct *mm,
49 unsigned long addr,
50 unsigned long len)
51{
52 return 0;
53}
54#endif
55
56void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte);
57void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
13 58
14void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, 59void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
15 unsigned long end, unsigned long floor, 60 unsigned long end, unsigned long floor,
@@ -50,8 +95,11 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
50static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 95static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
51 unsigned long addr, pte_t *ptep) 96 unsigned long addr, pte_t *ptep)
52{ 97{
53 unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1); 98#ifdef CONFIG_PPC64
54 return __pte(old); 99 return __pte(pte_update(mm, addr, ptep, ~0UL, 1));
100#else
101 return __pte(pte_update(ptep, ~0UL, 0));
102#endif
55} 103}
56 104
57static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, 105static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
@@ -93,4 +141,15 @@ static inline void arch_release_hugepage(struct page *page)
93{ 141{
94} 142}
95 143
144#else /* ! CONFIG_HUGETLB_PAGE */
145static inline void reserve_hugetlb_gpages(void)
146{
147 pr_err("Cannot reserve gpages without hugetlb enabled\n");
148}
149static inline void flush_hugetlb_page(struct vm_area_struct *vma,
150 unsigned long vmaddr)
151{
152}
153#endif
154
96#endif /* _ASM_POWERPC_HUGETLB_H */ 155#endif /* _ASM_POWERPC_HUGETLB_H */
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index 3ea0f9a259d8..0260ea5ec3c2 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -66,6 +66,7 @@
66#define MAS2_M 0x00000004 66#define MAS2_M 0x00000004
67#define MAS2_G 0x00000002 67#define MAS2_G 0x00000002
68#define MAS2_E 0x00000001 68#define MAS2_E 0x00000001
69#define MAS2_WIMGE_MASK 0x0000001f
69#define MAS2_EPN_MASK(size) (~0 << (size + 10)) 70#define MAS2_EPN_MASK(size) (~0 << (size + 10))
70#define MAS2_VAL(addr, size, flags) ((addr) & MAS2_EPN_MASK(size) | (flags)) 71#define MAS2_VAL(addr, size, flags) ((addr) & MAS2_EPN_MASK(size) | (flags))
71 72
@@ -80,6 +81,7 @@
80#define MAS3_SW 0x00000004 81#define MAS3_SW 0x00000004
81#define MAS3_UR 0x00000002 82#define MAS3_UR 0x00000002
82#define MAS3_SR 0x00000001 83#define MAS3_SR 0x00000001
84#define MAS3_BAP_MASK 0x0000003f
83#define MAS3_SPSIZE 0x0000003e 85#define MAS3_SPSIZE 0x0000003e
84#define MAS3_SPSIZE_SHIFT 1 86#define MAS3_SPSIZE_SHIFT 1
85 87
@@ -212,6 +214,11 @@ typedef struct {
212 unsigned int id; 214 unsigned int id;
213 unsigned int active; 215 unsigned int active;
214 unsigned long vdso_base; 216 unsigned long vdso_base;
217#ifdef CONFIG_PPC_MM_SLICES
218 u64 low_slices_psize; /* SLB page size encodings */
219 u64 high_slices_psize; /* 4 bits per slice for now */
220 u16 user_psize; /* page size index */
221#endif
215} mm_context_t; 222} mm_context_t;
216 223
217/* Page size definitions, common between 32 and 64-bit 224/* Page size definitions, common between 32 and 64-bit
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index b445e0af4c2b..db645ec842bd 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -262,8 +262,7 @@ extern void hash_failure_debug(unsigned long ea, unsigned long access,
262extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, 262extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
263 unsigned long pstart, unsigned long prot, 263 unsigned long pstart, unsigned long prot,
264 int psize, int ssize); 264 int psize, int ssize);
265extern void add_gpage(unsigned long addr, unsigned long page_size, 265extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
266 unsigned long number_of_pages);
267extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); 266extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
268 267
269extern void hpte_init_native(void); 268extern void hpte_init_native(void);
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 698b30638681..f0145522cfba 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -175,14 +175,16 @@ extern u64 ppc64_rma_size;
175#define MMU_PAGE_64K_AP 3 /* "Admixed pages" (hash64 only) */ 175#define MMU_PAGE_64K_AP 3 /* "Admixed pages" (hash64 only) */
176#define MMU_PAGE_256K 4 176#define MMU_PAGE_256K 4
177#define MMU_PAGE_1M 5 177#define MMU_PAGE_1M 5
178#define MMU_PAGE_8M 6 178#define MMU_PAGE_4M 6
179#define MMU_PAGE_16M 7 179#define MMU_PAGE_8M 7
180#define MMU_PAGE_256M 8 180#define MMU_PAGE_16M 8
181#define MMU_PAGE_1G 9 181#define MMU_PAGE_64M 9
182#define MMU_PAGE_16G 10 182#define MMU_PAGE_256M 10
183#define MMU_PAGE_64G 11 183#define MMU_PAGE_1G 11
184#define MMU_PAGE_COUNT 12 184#define MMU_PAGE_16G 12
185 185#define MMU_PAGE_64G 13
186
187#define MMU_PAGE_COUNT 14
186 188
187#if defined(CONFIG_PPC_STD_MMU_64) 189#if defined(CONFIG_PPC_STD_MMU_64)
188/* 64-bit classic hash table MMU */ 190/* 64-bit classic hash table MMU */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 2cd664ef0a5e..dd9c4fd038e0 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -36,6 +36,18 @@
36 36
37#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) 37#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
38 38
39#ifndef __ASSEMBLY__
40#ifdef CONFIG_HUGETLB_PAGE
41extern unsigned int HPAGE_SHIFT;
42#else
43#define HPAGE_SHIFT PAGE_SHIFT
44#endif
45#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
46#define HPAGE_MASK (~(HPAGE_SIZE - 1))
47#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
48#define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1)
49#endif
50
39/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ 51/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
40#define __HAVE_ARCH_GATE_AREA 1 52#define __HAVE_ARCH_GATE_AREA 1
41 53
@@ -158,6 +170,24 @@ extern phys_addr_t kernstart_addr;
158#define is_kernel_addr(x) ((x) >= PAGE_OFFSET) 170#define is_kernel_addr(x) ((x) >= PAGE_OFFSET)
159#endif 171#endif
160 172
173/*
174 * Use the top bit of the higher-level page table entries to indicate whether
175 * the entries we point to contain hugepages. This works because we know that
176 * the page tables live in kernel space. If we ever decide to support having
177 * page tables at arbitrary addresses, this breaks and will have to change.
178 */
179#ifdef CONFIG_PPC64
180#define PD_HUGE 0x8000000000000000
181#else
182#define PD_HUGE 0x80000000
183#endif
184
185/*
186 * Some number of bits at the level of the page table that points to
187 * a hugepte are used to encode the size. This masks those bits.
188 */
189#define HUGEPD_SHIFT_MASK 0x3f
190
161#ifndef __ASSEMBLY__ 191#ifndef __ASSEMBLY__
162 192
163#undef STRICT_MM_TYPECHECKS 193#undef STRICT_MM_TYPECHECKS
@@ -243,7 +273,6 @@ typedef unsigned long pgprot_t;
243#endif 273#endif
244 274
245typedef struct { signed long pd; } hugepd_t; 275typedef struct { signed long pd; } hugepd_t;
246#define HUGEPD_SHIFT_MASK 0x3f
247 276
248#ifdef CONFIG_HUGETLB_PAGE 277#ifdef CONFIG_HUGETLB_PAGE
249static inline int hugepd_ok(hugepd_t hpd) 278static inline int hugepd_ok(hugepd_t hpd)
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 9356262fd3cc..fb40ede6bc0d 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -64,17 +64,6 @@ extern void copy_page(void *to, void *from);
64/* Log 2 of page table size */ 64/* Log 2 of page table size */
65extern u64 ppc64_pft_size; 65extern u64 ppc64_pft_size;
66 66
67/* Large pages size */
68#ifdef CONFIG_HUGETLB_PAGE
69extern unsigned int HPAGE_SHIFT;
70#else
71#define HPAGE_SHIFT PAGE_SHIFT
72#endif
73#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
74#define HPAGE_MASK (~(HPAGE_SIZE - 1))
75#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
76#define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1)
77
78#endif /* __ASSEMBLY__ */ 67#endif /* __ASSEMBLY__ */
79 68
80#ifdef CONFIG_PPC_MM_SLICES 69#ifdef CONFIG_PPC_MM_SLICES
diff --git a/arch/powerpc/include/asm/pte-book3e.h b/arch/powerpc/include/asm/pte-book3e.h
index 082d515930a2..0156702ba24e 100644
--- a/arch/powerpc/include/asm/pte-book3e.h
+++ b/arch/powerpc/include/asm/pte-book3e.h
@@ -72,6 +72,9 @@
72#define PTE_RPN_SHIFT (24) 72#define PTE_RPN_SHIFT (24)
73#endif 73#endif
74 74
75#define PTE_WIMGE_SHIFT (19)
76#define PTE_BAP_SHIFT (2)
77
75/* On 32-bit, we never clear the top part of the PTE */ 78/* On 32-bit, we never clear the top part of the PTE */
76#ifdef CONFIG_PPC32 79#ifdef CONFIG_PPC32
77#define _PTE_NONE_MASK 0xffffffff00000000ULL 80#define _PTE_NONE_MASK 0xffffffff00000000ULL
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 50845924b7d9..4ea9bfbf67e9 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -236,8 +236,24 @@ _ENTRY(__early_start)
236 * if we find the pte (fall through): 236 * if we find the pte (fall through):
237 * r11 is low pte word 237 * r11 is low pte word
238 * r12 is pointer to the pte 238 * r12 is pointer to the pte
239 * r10 is the pshift from the PGD, if we're a hugepage
239 */ 240 */
240#ifdef CONFIG_PTE_64BIT 241#ifdef CONFIG_PTE_64BIT
242#ifdef CONFIG_HUGETLB_PAGE
243#define FIND_PTE \
244 rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \
245 lwzx r11, r12, r11; /* Get pgd/pmd entry */ \
246 rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \
247 blt 1000f; /* Normal non-huge page */ \
248 beq 2f; /* Bail if no table */ \
249 oris r11, r11, PD_HUGE@h; /* Put back address bit */ \
250 andi. r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */ \
251 xor r12, r10, r11; /* drop size bits from pointer */ \
252 b 1001f; \
2531000: rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \
254 li r10, 0; /* clear r10 */ \
2551001: lwz r11, 4(r12); /* Get pte entry */
256#else
241#define FIND_PTE \ 257#define FIND_PTE \
242 rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ 258 rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \
243 lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ 259 lwzx r11, r12, r11; /* Get pgd/pmd entry */ \
@@ -245,7 +261,8 @@ _ENTRY(__early_start)
245 beq 2f; /* Bail if no table */ \ 261 beq 2f; /* Bail if no table */ \
246 rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ 262 rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \
247 lwz r11, 4(r12); /* Get pte entry */ 263 lwz r11, 4(r12); /* Get pte entry */
248#else 264#endif /* HUGEPAGE */
265#else /* !PTE_64BIT */
249#define FIND_PTE \ 266#define FIND_PTE \
250 rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ 267 rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \
251 lwz r11, 0(r11); /* Get L1 entry */ \ 268 lwz r11, 0(r11); /* Get L1 entry */ \
@@ -402,8 +419,8 @@ interrupt_base:
402 419
403#ifdef CONFIG_PTE_64BIT 420#ifdef CONFIG_PTE_64BIT
404#ifdef CONFIG_SMP 421#ifdef CONFIG_SMP
405 subf r10,r11,r12 /* create false data dep */ 422 subf r13,r11,r12 /* create false data dep */
406 lwzx r13,r11,r10 /* Get upper pte bits */ 423 lwzx r13,r11,r13 /* Get upper pte bits */
407#else 424#else
408 lwz r13,0(r12) /* Get upper pte bits */ 425 lwz r13,0(r12) /* Get upper pte bits */
409#endif 426#endif
@@ -483,8 +500,8 @@ interrupt_base:
483 500
484#ifdef CONFIG_PTE_64BIT 501#ifdef CONFIG_PTE_64BIT
485#ifdef CONFIG_SMP 502#ifdef CONFIG_SMP
486 subf r10,r11,r12 /* create false data dep */ 503 subf r13,r11,r12 /* create false data dep */
487 lwzx r13,r11,r10 /* Get upper pte bits */ 504 lwzx r13,r11,r13 /* Get upper pte bits */
488#else 505#else
489 lwz r13,0(r12) /* Get upper pte bits */ 506 lwz r13,0(r12) /* Get upper pte bits */
490#endif 507#endif
@@ -548,7 +565,7 @@ interrupt_base:
548/* 565/*
549 * Both the instruction and data TLB miss get to this 566 * Both the instruction and data TLB miss get to this
550 * point to load the TLB. 567 * point to load the TLB.
551 * r10 - available to use 568 * r10 - tsize encoding (if HUGETLB_PAGE) or available to use
552 * r11 - TLB (info from Linux PTE) 569 * r11 - TLB (info from Linux PTE)
553 * r12 - available to use 570 * r12 - available to use
554 * r13 - upper bits of PTE (if PTE_64BIT) or available to use 571 * r13 - upper bits of PTE (if PTE_64BIT) or available to use
@@ -558,21 +575,73 @@ interrupt_base:
558 * Upon exit, we reload everything and RFI. 575 * Upon exit, we reload everything and RFI.
559 */ 576 */
560finish_tlb_load: 577finish_tlb_load:
578#ifdef CONFIG_HUGETLB_PAGE
579 cmpwi 6, r10, 0 /* check for huge page */
580 beq 6, finish_tlb_load_cont /* !huge */
581
582 /* Alas, we need more scratch registers for hugepages */
583 mfspr r12, SPRN_SPRG_THREAD
584 stw r14, THREAD_NORMSAVE(4)(r12)
585 stw r15, THREAD_NORMSAVE(5)(r12)
586 stw r16, THREAD_NORMSAVE(6)(r12)
587 stw r17, THREAD_NORMSAVE(7)(r12)
588
589 /* Get the next_tlbcam_idx percpu var */
590#ifdef CONFIG_SMP
591 lwz r12, THREAD_INFO-THREAD(r12)
592 lwz r15, TI_CPU(r12)
593 lis r14, __per_cpu_offset@h
594 ori r14, r14, __per_cpu_offset@l
595 rlwinm r15, r15, 2, 0, 29
596 lwzx r16, r14, r15
597#else
598 li r16, 0
599#endif
600 lis r17, next_tlbcam_idx@h
601 ori r17, r17, next_tlbcam_idx@l
602 add r17, r17, r16 /* r17 = *next_tlbcam_idx */
603 lwz r15, 0(r17) /* r15 = next_tlbcam_idx */
604
605 lis r14, MAS0_TLBSEL(1)@h /* select TLB1 (TLBCAM) */
606 rlwimi r14, r15, 16, 4, 15 /* next_tlbcam_idx entry */
607 mtspr SPRN_MAS0, r14
608
609 /* Extract TLB1CFG(NENTRY) */
610 mfspr r16, SPRN_TLB1CFG
611 andi. r16, r16, 0xfff
612
613 /* Update next_tlbcam_idx, wrapping when necessary */
614 addi r15, r15, 1
615 cmpw r15, r16
616 blt 100f
617 lis r14, tlbcam_index@h
618 ori r14, r14, tlbcam_index@l
619 lwz r15, 0(r14)
620100: stw r15, 0(r17)
621
622 /*
623 * Calc MAS1_TSIZE from r10 (which has pshift encoded)
624 * tlb_enc = (pshift - 10).
625 */
626 subi r15, r10, 10
627 mfspr r16, SPRN_MAS1
628 rlwimi r16, r15, 7, 20, 24
629 mtspr SPRN_MAS1, r16
630
631 /* copy the pshift for use later */
632 mr r14, r10
633
634 /* fall through */
635
636#endif /* CONFIG_HUGETLB_PAGE */
637
561 /* 638 /*
562 * We set execute, because we don't have the granularity to 639 * We set execute, because we don't have the granularity to
563 * properly set this at the page level (Linux problem). 640 * properly set this at the page level (Linux problem).
564 * Many of these bits are software only. Bits we don't set 641 * Many of these bits are software only. Bits we don't set
565 * here we (properly should) assume have the appropriate value. 642 * here we (properly should) assume have the appropriate value.
566 */ 643 */
567 644finish_tlb_load_cont:
568 mfspr r12, SPRN_MAS2
569#ifdef CONFIG_PTE_64BIT
570 rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */
571#else
572 rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */
573#endif
574 mtspr SPRN_MAS2, r12
575
576#ifdef CONFIG_PTE_64BIT 645#ifdef CONFIG_PTE_64BIT
577 rlwinm r12, r11, 32-2, 26, 31 /* Move in perm bits */ 646 rlwinm r12, r11, 32-2, 26, 31 /* Move in perm bits */
578 andi. r10, r11, _PAGE_DIRTY 647 andi. r10, r11, _PAGE_DIRTY
@@ -581,22 +650,40 @@ finish_tlb_load:
581 andc r12, r12, r10 650 andc r12, r12, r10
5821: rlwimi r12, r13, 20, 0, 11 /* grab RPN[32:43] */ 6511: rlwimi r12, r13, 20, 0, 11 /* grab RPN[32:43] */
583 rlwimi r12, r11, 20, 12, 19 /* grab RPN[44:51] */ 652 rlwimi r12, r11, 20, 12, 19 /* grab RPN[44:51] */
584 mtspr SPRN_MAS3, r12 6532: mtspr SPRN_MAS3, r12
585BEGIN_MMU_FTR_SECTION 654BEGIN_MMU_FTR_SECTION
586 srwi r10, r13, 12 /* grab RPN[12:31] */ 655 srwi r10, r13, 12 /* grab RPN[12:31] */
587 mtspr SPRN_MAS7, r10 656 mtspr SPRN_MAS7, r10
588END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) 657END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
589#else 658#else
590 li r10, (_PAGE_EXEC | _PAGE_PRESENT) 659 li r10, (_PAGE_EXEC | _PAGE_PRESENT)
660 mr r13, r11
591 rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */ 661 rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */
592 and r12, r11, r10 662 and r12, r11, r10
593 andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ 663 andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */
594 slwi r10, r12, 1 664 slwi r10, r12, 1
595 or r10, r10, r12 665 or r10, r10, r12
596 iseleq r12, r12, r10 666 iseleq r12, r12, r10
597 rlwimi r11, r12, 0, 20, 31 /* Extract RPN from PTE and merge with perms */ 667 rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */
598 mtspr SPRN_MAS3, r11 668 mtspr SPRN_MAS3, r13
599#endif 669#endif
670
671 mfspr r12, SPRN_MAS2
672#ifdef CONFIG_PTE_64BIT
673 rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */
674#else
675 rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */
676#endif
677#ifdef CONFIG_HUGETLB_PAGE
678 beq 6, 3f /* don't mask if page isn't huge */
679 li r13, 1
680 slw r13, r13, r14
681 subi r13, r13, 1
682 rlwinm r13, r13, 0, 0, 19 /* bottom bits used for WIMGE/etc */
683 andc r12, r12, r13 /* mask off ea bits within the page */
684#endif
6853: mtspr SPRN_MAS2, r12
686
600#ifdef CONFIG_E200 687#ifdef CONFIG_E200
601 /* Round robin TLB1 entries assignment */ 688 /* Round robin TLB1 entries assignment */
602 mfspr r12, SPRN_MAS0 689 mfspr r12, SPRN_MAS0
@@ -622,11 +709,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
622 mtspr SPRN_MAS0,r12 709 mtspr SPRN_MAS0,r12
623#endif /* CONFIG_E200 */ 710#endif /* CONFIG_E200 */
624 711
712tlb_write_entry:
625 tlbwe 713 tlbwe
626 714
627 /* Done...restore registers and get out of here. */ 715 /* Done...restore registers and get out of here. */
628 mfspr r10, SPRN_SPRG_THREAD 716 mfspr r10, SPRN_SPRG_THREAD
629 lwz r11, THREAD_NORMSAVE(3)(r10) 717#ifdef CONFIG_HUGETLB_PAGE
718 beq 6, 8f /* skip restore for 4k page faults */
719 lwz r14, THREAD_NORMSAVE(4)(r10)
720 lwz r15, THREAD_NORMSAVE(5)(r10)
721 lwz r16, THREAD_NORMSAVE(6)(r10)
722 lwz r17, THREAD_NORMSAVE(7)(r10)
723#endif
7248: lwz r11, THREAD_NORMSAVE(3)(r10)
630 mtcr r11 725 mtcr r11
631 lwz r13, THREAD_NORMSAVE(2)(r10) 726 lwz r13, THREAD_NORMSAVE(2)(r10)
632 lwz r12, THREAD_NORMSAVE(1)(r10) 727 lwz r12, THREAD_NORMSAVE(1)(r10)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index bdca46e08382..991ee813d2a8 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_PPC_MM_SLICES) += slice.o
29ifeq ($(CONFIG_HUGETLB_PAGE),y) 29ifeq ($(CONFIG_HUGETLB_PAGE),y)
30obj-y += hugetlbpage.o 30obj-y += hugetlbpage.o
31obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o 31obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o
32obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
32endif 33endif
33obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o 34obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
34obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o 35obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 26b2872b3d00..1f8b2a05e3d0 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -105,9 +105,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M;
105int mmu_highuser_ssize = MMU_SEGSIZE_256M; 105int mmu_highuser_ssize = MMU_SEGSIZE_256M;
106u16 mmu_slb_size = 64; 106u16 mmu_slb_size = 64;
107EXPORT_SYMBOL_GPL(mmu_slb_size); 107EXPORT_SYMBOL_GPL(mmu_slb_size);
108#ifdef CONFIG_HUGETLB_PAGE
109unsigned int HPAGE_SHIFT;
110#endif
111#ifdef CONFIG_PPC_64K_PAGES 108#ifdef CONFIG_PPC_64K_PAGES
112int mmu_ci_restrictions; 109int mmu_ci_restrictions;
113#endif 110#endif
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
new file mode 100644
index 000000000000..1295b7c1cdac
--- /dev/null
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -0,0 +1,121 @@
1/*
2 * PPC Huge TLB Page Support for Book3E MMU
3 *
4 * Copyright (C) 2009 David Gibson, IBM Corporation.
5 * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
6 *
7 */
8#include <linux/mm.h>
9#include <linux/hugetlb.h>
10
11static inline int mmu_get_tsize(int psize)
12{
13 return mmu_psize_defs[psize].enc;
14}
15
16static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid)
17{
18 int found = 0;
19
20 mtspr(SPRN_MAS6, pid << 16);
21 if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) {
22 asm volatile(
23 "li %0,0\n"
24 "tlbsx. 0,%1\n"
25 "bne 1f\n"
26 "li %0,1\n"
27 "1:\n"
28 : "=&r"(found) : "r"(ea));
29 } else {
30 asm volatile(
31 "tlbsx 0,%1\n"
32 "mfspr %0,0x271\n"
33 "srwi %0,%0,31\n"
34 : "=&r"(found) : "r"(ea));
35 }
36
37 return found;
38}
39
40void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte)
41{
42 unsigned long mas1, mas2;
43 u64 mas7_3;
44 unsigned long psize, tsize, shift;
45 unsigned long flags;
46
47#ifdef CONFIG_PPC_FSL_BOOK3E
48 int index, lz, ncams;
49 struct vm_area_struct *vma;
50#endif
51
52 if (unlikely(is_kernel_addr(ea)))
53 return;
54
55#ifdef CONFIG_MM_SLICES
56 psize = mmu_get_tsize(get_slice_psize(mm, ea));
57 tsize = mmu_get_psize(psize);
58 shift = mmu_psize_defs[psize].shift;
59#else
60 vma = find_vma(mm, ea);
61 psize = vma_mmu_pagesize(vma); /* returns actual size in bytes */
62 asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (psize));
63 shift = 31 - lz;
64 tsize = 21 - lz;
65#endif
66
67 /*
68 * We can't be interrupted while we're setting up the MAS
69 * regusters or after we've confirmed that no tlb exists.
70 */
71 local_irq_save(flags);
72
73 if (unlikely(book3e_tlb_exists(ea, mm->context.id))) {
74 local_irq_restore(flags);
75 return;
76 }
77
78#ifdef CONFIG_PPC_FSL_BOOK3E
79 ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
80
81 /* We have to use the CAM(TLB1) on FSL parts for hugepages */
82 index = __get_cpu_var(next_tlbcam_idx);
83 mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1));
84
85 /* Just round-robin the entries and wrap when we hit the end */
86 if (unlikely(index == ncams - 1))
87 __get_cpu_var(next_tlbcam_idx) = tlbcam_index;
88 else
89 __get_cpu_var(next_tlbcam_idx)++;
90#endif
91 mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize);
92 mas2 = ea & ~((1UL << shift) - 1);
93 mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
94 mas7_3 = (u64)pte_pfn(pte) << PAGE_SHIFT;
95 mas7_3 |= (pte_val(pte) >> PTE_BAP_SHIFT) & MAS3_BAP_MASK;
96 if (!pte_dirty(pte))
97 mas7_3 &= ~(MAS3_SW|MAS3_UW);
98
99 mtspr(SPRN_MAS1, mas1);
100 mtspr(SPRN_MAS2, mas2);
101
102 if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) {
103 mtspr(SPRN_MAS7_MAS3, mas7_3);
104 } else {
105 mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
106 mtspr(SPRN_MAS3, lower_32_bits(mas7_3));
107 }
108
109 asm volatile ("tlbwe");
110
111 local_irq_restore(flags);
112}
113
114void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
115{
116 struct hstate *hstate = hstate_file(vma->vm_file);
117 unsigned long tsize = huge_page_shift(hstate) - 10;
118
119 __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, tsize, 0);
120
121}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0b9a5c1901b9..3a5f59dcbb33 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -1,7 +1,8 @@
1/* 1/*
2 * PPC64 (POWER4) Huge TLB Page Support for Kernel. 2 * PPC Huge TLB Page Support for Kernel.
3 * 3 *
4 * Copyright (C) 2003 David Gibson, IBM Corporation. 4 * Copyright (C) 2003 David Gibson, IBM Corporation.
5 * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
5 * 6 *
6 * Based on the IA-32 version: 7 * Based on the IA-32 version:
7 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> 8 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
@@ -11,24 +12,39 @@
11#include <linux/io.h> 12#include <linux/io.h>
12#include <linux/slab.h> 13#include <linux/slab.h>
13#include <linux/hugetlb.h> 14#include <linux/hugetlb.h>
15#include <linux/of_fdt.h>
16#include <linux/memblock.h>
17#include <linux/bootmem.h>
14#include <asm/pgtable.h> 18#include <asm/pgtable.h>
15#include <asm/pgalloc.h> 19#include <asm/pgalloc.h>
16#include <asm/tlb.h> 20#include <asm/tlb.h>
21#include <asm/setup.h>
17 22
18#define PAGE_SHIFT_64K 16 23#define PAGE_SHIFT_64K 16
19#define PAGE_SHIFT_16M 24 24#define PAGE_SHIFT_16M 24
20#define PAGE_SHIFT_16G 34 25#define PAGE_SHIFT_16G 34
21 26
22#define MAX_NUMBER_GPAGES 1024 27unsigned int HPAGE_SHIFT;
23 28
24/* Tracks the 16G pages after the device tree is scanned and before the 29/*
25 * huge_boot_pages list is ready. */ 30 * Tracks gpages after the device tree is scanned and before the
26static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; 31 * huge_boot_pages list is ready. On 64-bit implementations, this is
32 * just used to track 16G pages and so is a single array. 32-bit
33 * implementations may have more than one gpage size due to limitations
34 * of the memory allocators, so we need multiple arrays
35 */
36#ifdef CONFIG_PPC64
37#define MAX_NUMBER_GPAGES 1024
38static u64 gpage_freearray[MAX_NUMBER_GPAGES];
27static unsigned nr_gpages; 39static unsigned nr_gpages;
28 40#else
29/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() 41#define MAX_NUMBER_GPAGES 128
30 * will choke on pointers to hugepte tables, which is handy for 42struct psize_gpages {
31 * catching screwups early. */ 43 u64 gpage_list[MAX_NUMBER_GPAGES];
44 unsigned int nr_gpages;
45};
46static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT];
47#endif
32 48
33static inline int shift_to_mmu_psize(unsigned int shift) 49static inline int shift_to_mmu_psize(unsigned int shift)
34{ 50{
@@ -49,25 +65,6 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
49 65
50#define hugepd_none(hpd) ((hpd).pd == 0) 66#define hugepd_none(hpd) ((hpd).pd == 0)
51 67
52static inline pte_t *hugepd_page(hugepd_t hpd)
53{
54 BUG_ON(!hugepd_ok(hpd));
55 return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000);
56}
57
58static inline unsigned int hugepd_shift(hugepd_t hpd)
59{
60 return hpd.pd & HUGEPD_SHIFT_MASK;
61}
62
63static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift)
64{
65 unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
66 pte_t *dir = hugepd_page(*hpdp);
67
68 return dir + idx;
69}
70
71pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) 68pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
72{ 69{
73 pgd_t *pg; 70 pgd_t *pg;
@@ -93,7 +90,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
93 if (is_hugepd(pm)) 90 if (is_hugepd(pm))
94 hpdp = (hugepd_t *)pm; 91 hpdp = (hugepd_t *)pm;
95 else if (!pmd_none(*pm)) { 92 else if (!pmd_none(*pm)) {
96 return pte_offset_map(pm, ea); 93 return pte_offset_kernel(pm, ea);
97 } 94 }
98 } 95 }
99 } 96 }
@@ -114,8 +111,18 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
114static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 111static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
115 unsigned long address, unsigned pdshift, unsigned pshift) 112 unsigned long address, unsigned pdshift, unsigned pshift)
116{ 113{
117 pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift), 114 struct kmem_cache *cachep;
118 GFP_KERNEL|__GFP_REPEAT); 115 pte_t *new;
116
117#ifdef CONFIG_PPC64
118 cachep = PGT_CACHE(pdshift - pshift);
119#else
120 int i;
121 int num_hugepd = 1 << (pshift - pdshift);
122 cachep = hugepte_cache;
123#endif
124
125 new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
119 126
120 BUG_ON(pshift > HUGEPD_SHIFT_MASK); 127 BUG_ON(pshift > HUGEPD_SHIFT_MASK);
121 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); 128 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
@@ -124,10 +131,31 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
124 return -ENOMEM; 131 return -ENOMEM;
125 132
126 spin_lock(&mm->page_table_lock); 133 spin_lock(&mm->page_table_lock);
134#ifdef CONFIG_PPC64
127 if (!hugepd_none(*hpdp)) 135 if (!hugepd_none(*hpdp))
128 kmem_cache_free(PGT_CACHE(pdshift - pshift), new); 136 kmem_cache_free(cachep, new);
129 else 137 else
130 hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift; 138 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
139#else
140 /*
141 * We have multiple higher-level entries that point to the same
142 * actual pte location. Fill in each as we go and backtrack on error.
143 * We need all of these so the DTLB pgtable walk code can find the
144 * right higher-level entry without knowing if it's a hugepage or not.
145 */
146 for (i = 0; i < num_hugepd; i++, hpdp++) {
147 if (unlikely(!hugepd_none(*hpdp)))
148 break;
149 else
150 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
151 }
152 /* If we bailed from the for loop early, an error occurred, clean up */
153 if (i < num_hugepd) {
154 for (i = i - 1 ; i >= 0; i--, hpdp--)
155 hpdp->pd = 0;
156 kmem_cache_free(cachep, new);
157 }
158#endif
131 spin_unlock(&mm->page_table_lock); 159 spin_unlock(&mm->page_table_lock);
132 return 0; 160 return 0;
133} 161}
@@ -169,11 +197,132 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
169 return hugepte_offset(hpdp, addr, pdshift); 197 return hugepte_offset(hpdp, addr, pdshift);
170} 198}
171 199
200#ifdef CONFIG_PPC32
172/* Build list of addresses of gigantic pages. This function is used in early 201/* Build list of addresses of gigantic pages. This function is used in early
173 * boot before the buddy or bootmem allocator is setup. 202 * boot before the buddy or bootmem allocator is setup.
174 */ 203 */
175void add_gpage(unsigned long addr, unsigned long page_size, 204void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
176 unsigned long number_of_pages) 205{
206 unsigned int idx = shift_to_mmu_psize(__ffs(page_size));
207 int i;
208
209 if (addr == 0)
210 return;
211
212 gpage_freearray[idx].nr_gpages = number_of_pages;
213
214 for (i = 0; i < number_of_pages; i++) {
215 gpage_freearray[idx].gpage_list[i] = addr;
216 addr += page_size;
217 }
218}
219
220/*
221 * Moves the gigantic page addresses from the temporary list to the
222 * huge_boot_pages list.
223 */
224int alloc_bootmem_huge_page(struct hstate *hstate)
225{
226 struct huge_bootmem_page *m;
227 int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT);
228 int nr_gpages = gpage_freearray[idx].nr_gpages;
229
230 if (nr_gpages == 0)
231 return 0;
232
233#ifdef CONFIG_HIGHMEM
234 /*
235 * If gpages can be in highmem we can't use the trick of storing the
236 * data structure in the page; allocate space for this
237 */
238 m = alloc_bootmem(sizeof(struct huge_bootmem_page));
239 m->phys = gpage_freearray[idx].gpage_list[--nr_gpages];
240#else
241 m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]);
242#endif
243
244 list_add(&m->list, &huge_boot_pages);
245 gpage_freearray[idx].nr_gpages = nr_gpages;
246 gpage_freearray[idx].gpage_list[nr_gpages] = 0;
247 m->hstate = hstate;
248
249 return 1;
250}
251/*
252 * Scan the command line hugepagesz= options for gigantic pages; store those in
253 * a list that we use to allocate the memory once all options are parsed.
254 */
255
256unsigned long gpage_npages[MMU_PAGE_COUNT];
257
258static int __init do_gpage_early_setup(char *param, char *val)
259{
260 static phys_addr_t size;
261 unsigned long npages;
262
263 /*
264 * The hugepagesz and hugepages cmdline options are interleaved. We
265 * use the size variable to keep track of whether or not this was done
266 * properly and skip over instances where it is incorrect. Other
267 * command-line parsing code will issue warnings, so we don't need to.
268 *
269 */
270 if ((strcmp(param, "default_hugepagesz") == 0) ||
271 (strcmp(param, "hugepagesz") == 0)) {
272 size = memparse(val, NULL);
273 } else if (strcmp(param, "hugepages") == 0) {
274 if (size != 0) {
275 if (sscanf(val, "%lu", &npages) <= 0)
276 npages = 0;
277 gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages;
278 size = 0;
279 }
280 }
281 return 0;
282}
283
284
285/*
286 * This function allocates physical space for pages that are larger than the
287 * buddy allocator can handle. We want to allocate these in highmem because
288 * the amount of lowmem is limited. This means that this function MUST be
289 * called before lowmem_end_addr is set up in MMU_init() in order for the lmb
290 * allocate to grab highmem.
291 */
292void __init reserve_hugetlb_gpages(void)
293{
294 static __initdata char cmdline[COMMAND_LINE_SIZE];
295 phys_addr_t size, base;
296 int i;
297
298 strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
299 parse_args("hugetlb gpages", cmdline, NULL, 0, &do_gpage_early_setup);
300
301 /*
302 * Walk gpage list in reverse, allocating larger page sizes first.
303 * Skip over unsupported sizes, or sizes that have 0 gpages allocated.
304 * When we reach the point in the list where pages are no longer
305 * considered gpages, we're done.
306 */
307 for (i = MMU_PAGE_COUNT-1; i >= 0; i--) {
308 if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0)
309 continue;
310 else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT))
311 break;
312
313 size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i));
314 base = memblock_alloc_base(size * gpage_npages[i], size,
315 MEMBLOCK_ALLOC_ANYWHERE);
316 add_gpage(base, size, gpage_npages[i]);
317 }
318}
319
320#else /* PPC64 */
321
322/* Build list of addresses of gigantic pages. This function is used in early
323 * boot before the buddy or bootmem allocator is setup.
324 */
325void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
177{ 326{
178 if (!addr) 327 if (!addr)
179 return; 328 return;
@@ -199,19 +348,79 @@ int alloc_bootmem_huge_page(struct hstate *hstate)
199 m->hstate = hstate; 348 m->hstate = hstate;
200 return 1; 349 return 1;
201} 350}
351#endif
202 352
203int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 353int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
204{ 354{
205 return 0; 355 return 0;
206} 356}
207 357
358#ifdef CONFIG_PPC32
359#define HUGEPD_FREELIST_SIZE \
360 ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
361
362struct hugepd_freelist {
363 struct rcu_head rcu;
364 unsigned int index;
365 void *ptes[0];
366};
367
368static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);
369
370static void hugepd_free_rcu_callback(struct rcu_head *head)
371{
372 struct hugepd_freelist *batch =
373 container_of(head, struct hugepd_freelist, rcu);
374 unsigned int i;
375
376 for (i = 0; i < batch->index; i++)
377 kmem_cache_free(hugepte_cache, batch->ptes[i]);
378
379 free_page((unsigned long)batch);
380}
381
382static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
383{
384 struct hugepd_freelist **batchp;
385
386 batchp = &__get_cpu_var(hugepd_freelist_cur);
387
388 if (atomic_read(&tlb->mm->mm_users) < 2 ||
389 cpumask_equal(mm_cpumask(tlb->mm),
390 cpumask_of(smp_processor_id()))) {
391 kmem_cache_free(hugepte_cache, hugepte);
392 return;
393 }
394
395 if (*batchp == NULL) {
396 *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC);
397 (*batchp)->index = 0;
398 }
399
400 (*batchp)->ptes[(*batchp)->index++] = hugepte;
401 if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
402 call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
403 *batchp = NULL;
404 }
405}
406#endif
407
208static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, 408static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
209 unsigned long start, unsigned long end, 409 unsigned long start, unsigned long end,
210 unsigned long floor, unsigned long ceiling) 410 unsigned long floor, unsigned long ceiling)
211{ 411{
212 pte_t *hugepte = hugepd_page(*hpdp); 412 pte_t *hugepte = hugepd_page(*hpdp);
213 unsigned shift = hugepd_shift(*hpdp); 413 int i;
414
214 unsigned long pdmask = ~((1UL << pdshift) - 1); 415 unsigned long pdmask = ~((1UL << pdshift) - 1);
416 unsigned int num_hugepd = 1;
417
418#ifdef CONFIG_PPC64
419 unsigned int shift = hugepd_shift(*hpdp);
420#else
421 /* Note: On 32-bit the hpdp may be the first of several */
422 num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift));
423#endif
215 424
216 start &= pdmask; 425 start &= pdmask;
217 if (start < floor) 426 if (start < floor)
@@ -224,9 +433,15 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
224 if (end - 1 > ceiling - 1) 433 if (end - 1 > ceiling - 1)
225 return; 434 return;
226 435
227 hpdp->pd = 0; 436 for (i = 0; i < num_hugepd; i++, hpdp++)
437 hpdp->pd = 0;
438
228 tlb->need_flush = 1; 439 tlb->need_flush = 1;
440#ifdef CONFIG_PPC64
229 pgtable_free_tlb(tlb, hugepte, pdshift - shift); 441 pgtable_free_tlb(tlb, hugepte, pdshift - shift);
442#else
443 hugepd_free(tlb, hugepte);
444#endif
230} 445}
231 446
232static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 447static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -331,18 +546,27 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
331 * too. 546 * too.
332 */ 547 */
333 548
334 pgd = pgd_offset(tlb->mm, addr);
335 do { 549 do {
336 next = pgd_addr_end(addr, end); 550 next = pgd_addr_end(addr, end);
551 pgd = pgd_offset(tlb->mm, addr);
337 if (!is_hugepd(pgd)) { 552 if (!is_hugepd(pgd)) {
338 if (pgd_none_or_clear_bad(pgd)) 553 if (pgd_none_or_clear_bad(pgd))
339 continue; 554 continue;
340 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); 555 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
341 } else { 556 } else {
557#ifdef CONFIG_PPC32
558 /*
559 * Increment next by the size of the huge mapping since
560 * on 32-bit there may be more than one entry at the pgd
561 * level for a single hugepage, but all of them point to
562 * the same kmem cache that holds the hugepte.
563 */
564 next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
565#endif
342 free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, 566 free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
343 addr, next, floor, ceiling); 567 addr, next, floor, ceiling);
344 } 568 }
345 } while (pgd++, addr = next, addr != end); 569 } while (addr = next, addr != end);
346} 570}
347 571
348struct page * 572struct page *
@@ -466,17 +690,35 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
466 unsigned long len, unsigned long pgoff, 690 unsigned long len, unsigned long pgoff,
467 unsigned long flags) 691 unsigned long flags)
468{ 692{
693#ifdef CONFIG_MM_SLICES
469 struct hstate *hstate = hstate_file(file); 694 struct hstate *hstate = hstate_file(file);
470 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); 695 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
471 696
472 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); 697 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
698#else
699 return get_unmapped_area(file, addr, len, pgoff, flags);
700#endif
473} 701}
474 702
475unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 703unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
476{ 704{
705#ifdef CONFIG_MM_SLICES
477 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); 706 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
478 707
479 return 1UL << mmu_psize_to_shift(psize); 708 return 1UL << mmu_psize_to_shift(psize);
709#else
710 if (!is_vm_hugetlb_page(vma))
711 return PAGE_SIZE;
712
713 return huge_page_size(hstate_vma(vma));
714#endif
715}
716
717static inline bool is_power_of_4(unsigned long x)
718{
719 if (is_power_of_2(x))
720 return (__ilog2(x) % 2) ? false : true;
721 return false;
480} 722}
481 723
482static int __init add_huge_page_size(unsigned long long size) 724static int __init add_huge_page_size(unsigned long long size)
@@ -486,9 +728,14 @@ static int __init add_huge_page_size(unsigned long long size)
486 728
487 /* Check that it is a page size supported by the hardware and 729 /* Check that it is a page size supported by the hardware and
488 * that it fits within pagetable and slice limits. */ 730 * that it fits within pagetable and slice limits. */
731#ifdef CONFIG_PPC_FSL_BOOK3E
732 if ((size < PAGE_SIZE) || !is_power_of_4(size))
733 return -EINVAL;
734#else
489 if (!is_power_of_2(size) 735 if (!is_power_of_2(size)
490 || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) 736 || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
491 return -EINVAL; 737 return -EINVAL;
738#endif
492 739
493 if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) 740 if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
494 return -EINVAL; 741 return -EINVAL;
@@ -525,6 +772,46 @@ static int __init hugepage_setup_sz(char *str)
525} 772}
526__setup("hugepagesz=", hugepage_setup_sz); 773__setup("hugepagesz=", hugepage_setup_sz);
527 774
775#ifdef CONFIG_FSL_BOOKE
776struct kmem_cache *hugepte_cache;
777static int __init hugetlbpage_init(void)
778{
779 int psize;
780
781 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
782 unsigned shift;
783
784 if (!mmu_psize_defs[psize].shift)
785 continue;
786
787 shift = mmu_psize_to_shift(psize);
788
789 /* Don't treat normal page sizes as huge... */
790 if (shift != PAGE_SHIFT)
791 if (add_huge_page_size(1ULL << shift) < 0)
792 continue;
793 }
794
795 /*
796 * Create a kmem cache for hugeptes. The bottom bits in the pte have
797 * size information encoded in them, so align them to allow this
798 */
799 hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t),
800 HUGEPD_SHIFT_MASK + 1, 0, NULL);
801 if (hugepte_cache == NULL)
802 panic("%s: Unable to create kmem cache for hugeptes\n",
803 __func__);
804
805 /* Default hpage size = 4M */
806 if (mmu_psize_defs[MMU_PAGE_4M].shift)
807 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
808 else
809 panic("%s: Unable to set default huge page size\n", __func__);
810
811
812 return 0;
813}
814#else
528static int __init hugetlbpage_init(void) 815static int __init hugetlbpage_init(void)
529{ 816{
530 int psize; 817 int psize;
@@ -567,15 +854,23 @@ static int __init hugetlbpage_init(void)
567 854
568 return 0; 855 return 0;
569} 856}
570 857#endif
571module_init(hugetlbpage_init); 858module_init(hugetlbpage_init);
572 859
573void flush_dcache_icache_hugepage(struct page *page) 860void flush_dcache_icache_hugepage(struct page *page)
574{ 861{
575 int i; 862 int i;
863 void *start;
576 864
577 BUG_ON(!PageCompound(page)); 865 BUG_ON(!PageCompound(page));
578 866
579 for (i = 0; i < (1UL << compound_order(page)); i++) 867 for (i = 0; i < (1UL << compound_order(page)); i++) {
580 __flush_dcache_icache(page_address(page+i)); 868 if (!PageHighMem(page)) {
869 __flush_dcache_icache(page_address(page+i));
870 } else {
871 start = kmap_atomic(page+i, KM_PPC_SYNC_ICACHE);
872 __flush_dcache_icache(start);
873 kunmap_atomic(start, KM_PPC_SYNC_ICACHE);
874 }
875 }
581} 876}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index c77fef56dad6..161cefde5c15 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -32,6 +32,8 @@
32#include <linux/pagemap.h> 32#include <linux/pagemap.h>
33#include <linux/memblock.h> 33#include <linux/memblock.h>
34#include <linux/gfp.h> 34#include <linux/gfp.h>
35#include <linux/slab.h>
36#include <linux/hugetlb.h>
35 37
36#include <asm/pgalloc.h> 38#include <asm/pgalloc.h>
37#include <asm/prom.h> 39#include <asm/prom.h>
@@ -44,6 +46,7 @@
44#include <asm/tlb.h> 46#include <asm/tlb.h>
45#include <asm/sections.h> 47#include <asm/sections.h>
46#include <asm/system.h> 48#include <asm/system.h>
49#include <asm/hugetlb.h>
47 50
48#include "mmu_decl.h" 51#include "mmu_decl.h"
49 52
@@ -123,6 +126,12 @@ void __init MMU_init(void)
123 /* parse args from command line */ 126 /* parse args from command line */
124 MMU_setup(); 127 MMU_setup();
125 128
129 /*
130 * Reserve gigantic pages for hugetlb. This MUST occur before
131 * lowmem_end_addr is initialized below.
132 */
133 reserve_hugetlb_gpages();
134
126 if (memblock.memory.cnt > 1) { 135 if (memblock.memory.cnt > 1) {
127#ifndef CONFIG_WII 136#ifndef CONFIG_WII
128 memblock.memory.cnt = 1; 137 memblock.memory.cnt = 1;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index c781bbcf7338..ad9cf49dfb89 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -548,4 +548,9 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
548 return; 548 return;
549 hash_preload(vma->vm_mm, address, access, trap); 549 hash_preload(vma->vm_mm, address, access, trap);
550#endif /* CONFIG_PPC_STD_MMU */ 550#endif /* CONFIG_PPC_STD_MMU */
551#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
552 && defined(CONFIG_HUGETLB_PAGE)
553 if (is_vm_hugetlb_page(vma))
554 book3e_hugetlb_preload(vma->vm_mm, address, *ptep);
555#endif
551} 556}
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 336807de550e..5b63bd3da4a9 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -292,6 +292,11 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
292 mm->context.id = MMU_NO_CONTEXT; 292 mm->context.id = MMU_NO_CONTEXT;
293 mm->context.active = 0; 293 mm->context.active = 0;
294 294
295#ifdef CONFIG_PPC_MM_SLICES
296 if (slice_mm_new_context(mm))
297 slice_set_user_psize(mm, mmu_virtual_psize);
298#endif
299
295 return 0; 300 return 0;
296} 301}
297 302
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index af40c8768a78..214130a4edc6 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -27,6 +27,7 @@
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/percpu.h> 28#include <linux/percpu.h>
29#include <linux/hardirq.h> 29#include <linux/hardirq.h>
30#include <linux/hugetlb.h>
30#include <asm/pgalloc.h> 31#include <asm/pgalloc.h>
31#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
32#include <asm/tlb.h> 33#include <asm/tlb.h>
@@ -212,7 +213,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
212 entry = set_access_flags_filter(entry, vma, dirty); 213 entry = set_access_flags_filter(entry, vma, dirty);
213 changed = !pte_same(*(ptep), entry); 214 changed = !pte_same(*(ptep), entry);
214 if (changed) { 215 if (changed) {
215 if (!(vma->vm_flags & VM_HUGETLB)) 216 if (!is_vm_hugetlb_page(vma))
216 assert_pte_locked(vma->vm_mm, address); 217 assert_pte_locked(vma->vm_mm, address);
217 __ptep_set_access_flags(ptep, entry); 218 __ptep_set_access_flags(ptep, entry);
218 flush_tlb_page_nohash(vma, address); 219 flush_tlb_page_nohash(vma, address);
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index 4ebb34bc01d6..dc4a5f385e41 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -553,24 +553,24 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
553 rldicl r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3 553 rldicl r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3
554 clrrdi r10,r11,3 554 clrrdi r10,r11,3
555 ldx r15,r10,r15 555 ldx r15,r10,r15
556 cmpldi cr0,r15,0 556 cmpdi cr0,r15,0
557 beq virt_page_table_tlb_miss_fault 557 bge virt_page_table_tlb_miss_fault
558 558
559#ifndef CONFIG_PPC_64K_PAGES 559#ifndef CONFIG_PPC_64K_PAGES
560 /* Get to PUD entry */ 560 /* Get to PUD entry */
561 rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3 561 rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3
562 clrrdi r10,r11,3 562 clrrdi r10,r11,3
563 ldx r15,r10,r15 563 ldx r15,r10,r15
564 cmpldi cr0,r15,0 564 cmpdi cr0,r15,0
565 beq virt_page_table_tlb_miss_fault 565 bge virt_page_table_tlb_miss_fault
566#endif /* CONFIG_PPC_64K_PAGES */ 566#endif /* CONFIG_PPC_64K_PAGES */
567 567
568 /* Get to PMD entry */ 568 /* Get to PMD entry */
569 rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3 569 rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3
570 clrrdi r10,r11,3 570 clrrdi r10,r11,3
571 ldx r15,r10,r15 571 ldx r15,r10,r15
572 cmpldi cr0,r15,0 572 cmpdi cr0,r15,0
573 beq virt_page_table_tlb_miss_fault 573 bge virt_page_table_tlb_miss_fault
574 574
575 /* Ok, we're all right, we can now create a kernel translation for 575 /* Ok, we're all right, we can now create a kernel translation for
576 * a 4K or 64K page from r16 -> r15. 576 * a 4K or 64K page from r16 -> r15.
@@ -802,24 +802,24 @@ htw_tlb_miss:
802 rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3 802 rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3
803 clrrdi r10,r11,3 803 clrrdi r10,r11,3
804 ldx r15,r10,r15 804 ldx r15,r10,r15
805 cmpldi cr0,r15,0 805 cmpdi cr0,r15,0
806 beq htw_tlb_miss_fault 806 bge htw_tlb_miss_fault
807 807
808#ifndef CONFIG_PPC_64K_PAGES 808#ifndef CONFIG_PPC_64K_PAGES
809 /* Get to PUD entry */ 809 /* Get to PUD entry */
810 rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3 810 rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3
811 clrrdi r10,r11,3 811 clrrdi r10,r11,3
812 ldx r15,r10,r15 812 ldx r15,r10,r15
813 cmpldi cr0,r15,0 813 cmpdi cr0,r15,0
814 beq htw_tlb_miss_fault 814 bge htw_tlb_miss_fault
815#endif /* CONFIG_PPC_64K_PAGES */ 815#endif /* CONFIG_PPC_64K_PAGES */
816 816
817 /* Get to PMD entry */ 817 /* Get to PMD entry */
818 rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3 818 rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3
819 clrrdi r10,r11,3 819 clrrdi r10,r11,3
820 ldx r15,r10,r15 820 ldx r15,r10,r15
821 cmpldi cr0,r15,0 821 cmpdi cr0,r15,0
822 beq htw_tlb_miss_fault 822 bge htw_tlb_miss_fault
823 823
824 /* Ok, we're all right, we can now create an indirect entry for 824 /* Ok, we're all right, we can now create an indirect entry for
825 * a 1M or 256M page. 825 * a 1M or 256M page.
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index d32ec643c231..afc95c7304ae 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -36,14 +36,49 @@
36#include <linux/spinlock.h> 36#include <linux/spinlock.h>
37#include <linux/memblock.h> 37#include <linux/memblock.h>
38#include <linux/of_fdt.h> 38#include <linux/of_fdt.h>
39#include <linux/hugetlb.h>
39 40
40#include <asm/tlbflush.h> 41#include <asm/tlbflush.h>
41#include <asm/tlb.h> 42#include <asm/tlb.h>
42#include <asm/code-patching.h> 43#include <asm/code-patching.h>
44#include <asm/hugetlb.h>
43 45
44#include "mmu_decl.h" 46#include "mmu_decl.h"
45 47
46#ifdef CONFIG_PPC_BOOK3E 48/*
49 * This struct lists the sw-supported page sizes. The hardawre MMU may support
50 * other sizes not listed here. The .ind field is only used on MMUs that have
51 * indirect page table entries.
52 */
53#ifdef CONFIG_PPC_BOOK3E_MMU
54#ifdef CONFIG_FSL_BOOKE
55struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
56 [MMU_PAGE_4K] = {
57 .shift = 12,
58 .enc = BOOK3E_PAGESZ_4K,
59 },
60 [MMU_PAGE_4M] = {
61 .shift = 22,
62 .enc = BOOK3E_PAGESZ_4M,
63 },
64 [MMU_PAGE_16M] = {
65 .shift = 24,
66 .enc = BOOK3E_PAGESZ_16M,
67 },
68 [MMU_PAGE_64M] = {
69 .shift = 26,
70 .enc = BOOK3E_PAGESZ_64M,
71 },
72 [MMU_PAGE_256M] = {
73 .shift = 28,
74 .enc = BOOK3E_PAGESZ_256M,
75 },
76 [MMU_PAGE_1G] = {
77 .shift = 30,
78 .enc = BOOK3E_PAGESZ_1GB,
79 },
80};
81#else
47struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { 82struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
48 [MMU_PAGE_4K] = { 83 [MMU_PAGE_4K] = {
49 .shift = 12, 84 .shift = 12,
@@ -77,6 +112,8 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
77 .enc = BOOK3E_PAGESZ_1GB, 112 .enc = BOOK3E_PAGESZ_1GB,
78 }, 113 },
79}; 114};
115#endif /* CONFIG_FSL_BOOKE */
116
80static inline int mmu_get_tsize(int psize) 117static inline int mmu_get_tsize(int psize)
81{ 118{
82 return mmu_psize_defs[psize].enc; 119 return mmu_psize_defs[psize].enc;
@@ -87,7 +124,7 @@ static inline int mmu_get_tsize(int psize)
87 /* This isn't used on !Book3E for now */ 124 /* This isn't used on !Book3E for now */
88 return 0; 125 return 0;
89} 126}
90#endif 127#endif /* CONFIG_PPC_BOOK3E_MMU */
91 128
92/* The variables below are currently only used on 64-bit Book3E 129/* The variables below are currently only used on 64-bit Book3E
93 * though this will probably be made common with other nohash 130 * though this will probably be made common with other nohash
@@ -266,6 +303,11 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
266 303
267void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 304void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
268{ 305{
306#ifdef CONFIG_HUGETLB_PAGE
307 if (is_vm_hugetlb_page(vma))
308 flush_hugetlb_page(vma, vmaddr);
309#endif
310
269 __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, 311 __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
270 mmu_get_tsize(mmu_virtual_psize), 0); 312 mmu_get_tsize(mmu_virtual_psize), 0);
271} 313}
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index e06e39589a09..a85990c886e9 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -69,6 +69,7 @@ config PPC_BOOK3S_64
69 bool "Server processors" 69 bool "Server processors"
70 select PPC_FPU 70 select PPC_FPU
71 select PPC_HAVE_PMU_SUPPORT 71 select PPC_HAVE_PMU_SUPPORT
72 select SYS_SUPPORTS_HUGETLBFS
72 73
73config PPC_BOOK3E_64 74config PPC_BOOK3E_64
74 bool "Embedded processors" 75 bool "Embedded processors"
@@ -173,6 +174,7 @@ config BOOKE
173config FSL_BOOKE 174config FSL_BOOKE
174 bool 175 bool
175 depends on (E200 || E500) && PPC32 176 depends on (E200 || E500) && PPC32
177 select SYS_SUPPORTS_HUGETLBFS if PHYS_64BIT
176 default y 178 default y
177 179
178# this is for common code between PPC32 & PPC64 FSL BOOKE 180# this is for common code between PPC32 & PPC64 FSL BOOKE
@@ -296,7 +298,7 @@ config PPC_BOOK3E_MMU
296 298
297config PPC_MM_SLICES 299config PPC_MM_SLICES
298 bool 300 bool
299 default y if HUGETLB_PAGE || (PPC_STD_MMU_64 && PPC_64K_PAGES) 301 default y if (PPC64 && HUGETLB_PAGE) || (PPC_STD_MMU_64 && PPC_64K_PAGES)
300 default n 302 default n
301 303
302config VIRT_CPU_ACCOUNTING 304config VIRT_CPU_ACCOUNTING