diff options
-rw-r--r-- | arch/powerpc/include/asm/hugetlb.h | 1 | ||||
-rw-r--r-- | arch/powerpc/include/asm/mmu-hash64.h | 14 | ||||
-rw-r--r-- | arch/powerpc/include/asm/page.h | 14 | ||||
-rw-r--r-- | arch/powerpc/include/asm/pgtable-ppc64.h | 13 | ||||
-rw-r--r-- | arch/powerpc/include/asm/pgtable.h | 3 | ||||
-rw-r--r-- | arch/powerpc/kernel/perf_callchain.c | 20 | ||||
-rw-r--r-- | arch/powerpc/mm/gup.c | 149 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 26 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 473 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 10 |
10 files changed, 313 insertions, 410 deletions
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index b1dafb6a9743..a4f08f10fe1f 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h | |||
@@ -3,7 +3,6 @@ | |||
3 | 3 | ||
4 | #include <asm/page.h> | 4 | #include <asm/page.h> |
5 | 5 | ||
6 | |||
7 | int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, | 6 | int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, |
8 | unsigned long len); | 7 | unsigned long len); |
9 | 8 | ||
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index bebe31c2e907..dd50ea15e648 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h | |||
@@ -173,14 +173,6 @@ extern unsigned long tce_alloc_start, tce_alloc_end; | |||
173 | */ | 173 | */ |
174 | extern int mmu_ci_restrictions; | 174 | extern int mmu_ci_restrictions; |
175 | 175 | ||
176 | #ifdef CONFIG_HUGETLB_PAGE | ||
177 | /* | ||
178 | * The page size indexes of the huge pages for use by hugetlbfs | ||
179 | */ | ||
180 | extern unsigned int mmu_huge_psizes[MMU_PAGE_COUNT]; | ||
181 | |||
182 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
183 | |||
184 | /* | 176 | /* |
185 | * This function sets the AVPN and L fields of the HPTE appropriately | 177 | * This function sets the AVPN and L fields of the HPTE appropriately |
186 | * for the page size | 178 | * for the page size |
@@ -254,9 +246,9 @@ extern int __hash_page_64K(unsigned long ea, unsigned long access, | |||
254 | unsigned int local, int ssize); | 246 | unsigned int local, int ssize); |
255 | struct mm_struct; | 247 | struct mm_struct; |
256 | extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); | 248 | extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); |
257 | extern int hash_huge_page(struct mm_struct *mm, unsigned long access, | 249 | int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, |
258 | unsigned long ea, unsigned long vsid, int local, | 250 | pte_t *ptep, unsigned long trap, int local, int ssize, |
259 | unsigned long trap); | 251 | unsigned int shift, unsigned int mmu_psize); |
260 | 252 | ||
261 | extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, | 253 | extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, |
262 | unsigned long pstart, unsigned long prot, | 254 | unsigned long pstart, unsigned long prot, |
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index ff24254990e1..e96d52a516ba 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h | |||
@@ -229,6 +229,20 @@ typedef unsigned long pgprot_t; | |||
229 | 229 | ||
230 | #endif | 230 | #endif |
231 | 231 | ||
232 | typedef struct { signed long pd; } hugepd_t; | ||
233 | #define HUGEPD_SHIFT_MASK 0x3f | ||
234 | |||
235 | #ifdef CONFIG_HUGETLB_PAGE | ||
236 | static inline int hugepd_ok(hugepd_t hpd) | ||
237 | { | ||
238 | return (hpd.pd > 0); | ||
239 | } | ||
240 | |||
241 | #define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep)))) | ||
242 | #else /* CONFIG_HUGETLB_PAGE */ | ||
243 | #define is_hugepd(pdep) 0 | ||
244 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
245 | |||
232 | struct page; | 246 | struct page; |
233 | extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); | 247 | extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); |
234 | extern void copy_user_page(void *to, void *from, unsigned long vaddr, | 248 | extern void copy_user_page(void *to, void *from, unsigned long vaddr, |
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 8697d6555090..49865045d56f 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h | |||
@@ -379,7 +379,18 @@ void pgtable_cache_init(void); | |||
379 | return pt; | 379 | return pt; |
380 | } | 380 | } |
381 | 381 | ||
382 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long address); | 382 | #ifdef CONFIG_HUGETLB_PAGE |
383 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, | ||
384 | unsigned *shift); | ||
385 | #else | ||
386 | static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, | ||
387 | unsigned *shift) | ||
388 | { | ||
389 | if (shift) | ||
390 | *shift = 0; | ||
391 | return find_linux_pte(pgdir, ea); | ||
392 | } | ||
393 | #endif /* !CONFIG_HUGETLB_PAGE */ | ||
383 | 394 | ||
384 | #endif /* __ASSEMBLY__ */ | 395 | #endif /* __ASSEMBLY__ */ |
385 | 396 | ||
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 2a5da069714e..21207e54825b 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h | |||
@@ -211,6 +211,9 @@ extern void paging_init(void); | |||
211 | */ | 211 | */ |
212 | extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); | 212 | extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); |
213 | 213 | ||
214 | extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr, | ||
215 | unsigned long end, int write, struct page **pages, int *nr); | ||
216 | |||
214 | #endif /* __ASSEMBLY__ */ | 217 | #endif /* __ASSEMBLY__ */ |
215 | 218 | ||
216 | #endif /* __KERNEL__ */ | 219 | #endif /* __KERNEL__ */ |
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index 0a03cf70d247..936f04dbfc6f 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c | |||
@@ -119,13 +119,6 @@ static void perf_callchain_kernel(struct pt_regs *regs, | |||
119 | } | 119 | } |
120 | 120 | ||
121 | #ifdef CONFIG_PPC64 | 121 | #ifdef CONFIG_PPC64 |
122 | |||
123 | #ifdef CONFIG_HUGETLB_PAGE | ||
124 | #define is_huge_psize(pagesize) (HPAGE_SHIFT && mmu_huge_psizes[pagesize]) | ||
125 | #else | ||
126 | #define is_huge_psize(pagesize) 0 | ||
127 | #endif | ||
128 | |||
129 | /* | 122 | /* |
130 | * On 64-bit we don't want to invoke hash_page on user addresses from | 123 | * On 64-bit we don't want to invoke hash_page on user addresses from |
131 | * interrupt context, so if the access faults, we read the page tables | 124 | * interrupt context, so if the access faults, we read the page tables |
@@ -135,7 +128,7 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb) | |||
135 | { | 128 | { |
136 | pgd_t *pgdir; | 129 | pgd_t *pgdir; |
137 | pte_t *ptep, pte; | 130 | pte_t *ptep, pte; |
138 | int pagesize; | 131 | unsigned shift; |
139 | unsigned long addr = (unsigned long) ptr; | 132 | unsigned long addr = (unsigned long) ptr; |
140 | unsigned long offset; | 133 | unsigned long offset; |
141 | unsigned long pfn; | 134 | unsigned long pfn; |
@@ -145,17 +138,14 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb) | |||
145 | if (!pgdir) | 138 | if (!pgdir) |
146 | return -EFAULT; | 139 | return -EFAULT; |
147 | 140 | ||
148 | pagesize = get_slice_psize(current->mm, addr); | 141 | ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift); |
142 | if (!shift) | ||
143 | shift = PAGE_SHIFT; | ||
149 | 144 | ||
150 | /* align address to page boundary */ | 145 | /* align address to page boundary */ |
151 | offset = addr & ((1ul << mmu_psize_defs[pagesize].shift) - 1); | 146 | offset = addr & ((1UL << shift) - 1); |
152 | addr -= offset; | 147 | addr -= offset; |
153 | 148 | ||
154 | if (is_huge_psize(pagesize)) | ||
155 | ptep = huge_pte_offset(current->mm, addr); | ||
156 | else | ||
157 | ptep = find_linux_pte(pgdir, addr); | ||
158 | |||
159 | if (ptep == NULL) | 149 | if (ptep == NULL) |
160 | return -EFAULT; | 150 | return -EFAULT; |
161 | pte = *ptep; | 151 | pte = *ptep; |
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index bc122a120bf0..d7efdbf640c7 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c | |||
@@ -55,57 +55,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | |||
55 | return 1; | 55 | return 1; |
56 | } | 56 | } |
57 | 57 | ||
58 | #ifdef CONFIG_HUGETLB_PAGE | ||
59 | static noinline int gup_huge_pte(pte_t *ptep, struct hstate *hstate, | ||
60 | unsigned long *addr, unsigned long end, | ||
61 | int write, struct page **pages, int *nr) | ||
62 | { | ||
63 | unsigned long mask; | ||
64 | unsigned long pte_end; | ||
65 | struct page *head, *page; | ||
66 | pte_t pte; | ||
67 | int refs; | ||
68 | |||
69 | pte_end = (*addr + huge_page_size(hstate)) & huge_page_mask(hstate); | ||
70 | if (pte_end < end) | ||
71 | end = pte_end; | ||
72 | |||
73 | pte = *ptep; | ||
74 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
75 | if (write) | ||
76 | mask |= _PAGE_RW; | ||
77 | if ((pte_val(pte) & mask) != mask) | ||
78 | return 0; | ||
79 | /* hugepages are never "special" */ | ||
80 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
81 | |||
82 | refs = 0; | ||
83 | head = pte_page(pte); | ||
84 | page = head + ((*addr & ~huge_page_mask(hstate)) >> PAGE_SHIFT); | ||
85 | do { | ||
86 | VM_BUG_ON(compound_head(page) != head); | ||
87 | pages[*nr] = page; | ||
88 | (*nr)++; | ||
89 | page++; | ||
90 | refs++; | ||
91 | } while (*addr += PAGE_SIZE, *addr != end); | ||
92 | |||
93 | if (!page_cache_add_speculative(head, refs)) { | ||
94 | *nr -= refs; | ||
95 | return 0; | ||
96 | } | ||
97 | if (unlikely(pte_val(pte) != pte_val(*ptep))) { | ||
98 | /* Could be optimized better */ | ||
99 | while (*nr) { | ||
100 | put_page(page); | ||
101 | (*nr)--; | ||
102 | } | ||
103 | } | ||
104 | |||
105 | return 1; | ||
106 | } | ||
107 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
108 | |||
109 | static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | 58 | static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, |
110 | int write, struct page **pages, int *nr) | 59 | int write, struct page **pages, int *nr) |
111 | { | 60 | { |
@@ -119,7 +68,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | |||
119 | next = pmd_addr_end(addr, end); | 68 | next = pmd_addr_end(addr, end); |
120 | if (pmd_none(pmd)) | 69 | if (pmd_none(pmd)) |
121 | return 0; | 70 | return 0; |
122 | if (!gup_pte_range(pmd, addr, next, write, pages, nr)) | 71 | if (is_hugepd(pmdp)) { |
72 | if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, | ||
73 | addr, next, write, pages, nr)) | ||
74 | return 0; | ||
75 | } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) | ||
123 | return 0; | 76 | return 0; |
124 | } while (pmdp++, addr = next, addr != end); | 77 | } while (pmdp++, addr = next, addr != end); |
125 | 78 | ||
@@ -139,7 +92,11 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, | |||
139 | next = pud_addr_end(addr, end); | 92 | next = pud_addr_end(addr, end); |
140 | if (pud_none(pud)) | 93 | if (pud_none(pud)) |
141 | return 0; | 94 | return 0; |
142 | if (!gup_pmd_range(pud, addr, next, write, pages, nr)) | 95 | if (is_hugepd(pudp)) { |
96 | if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT, | ||
97 | addr, next, write, pages, nr)) | ||
98 | return 0; | ||
99 | } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) | ||
143 | return 0; | 100 | return 0; |
144 | } while (pudp++, addr = next, addr != end); | 101 | } while (pudp++, addr = next, addr != end); |
145 | 102 | ||
@@ -154,10 +111,6 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
154 | unsigned long next; | 111 | unsigned long next; |
155 | pgd_t *pgdp; | 112 | pgd_t *pgdp; |
156 | int nr = 0; | 113 | int nr = 0; |
157 | #ifdef CONFIG_PPC64 | ||
158 | unsigned int shift; | ||
159 | int psize; | ||
160 | #endif | ||
161 | 114 | ||
162 | pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); | 115 | pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); |
163 | 116 | ||
@@ -172,25 +125,6 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
172 | 125 | ||
173 | pr_devel(" aligned: %lx .. %lx\n", start, end); | 126 | pr_devel(" aligned: %lx .. %lx\n", start, end); |
174 | 127 | ||
175 | #ifdef CONFIG_HUGETLB_PAGE | ||
176 | /* We bail out on slice boundary crossing when hugetlb is | ||
177 | * enabled in order to not have to deal with two different | ||
178 | * page table formats | ||
179 | */ | ||
180 | if (addr < SLICE_LOW_TOP) { | ||
181 | if (end > SLICE_LOW_TOP) | ||
182 | goto slow_irqon; | ||
183 | |||
184 | if (unlikely(GET_LOW_SLICE_INDEX(addr) != | ||
185 | GET_LOW_SLICE_INDEX(end - 1))) | ||
186 | goto slow_irqon; | ||
187 | } else { | ||
188 | if (unlikely(GET_HIGH_SLICE_INDEX(addr) != | ||
189 | GET_HIGH_SLICE_INDEX(end - 1))) | ||
190 | goto slow_irqon; | ||
191 | } | ||
192 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
193 | |||
194 | /* | 128 | /* |
195 | * XXX: batch / limit 'nr', to avoid large irq off latency | 129 | * XXX: batch / limit 'nr', to avoid large irq off latency |
196 | * needs some instrumenting to determine the common sizes used by | 130 | * needs some instrumenting to determine the common sizes used by |
@@ -210,54 +144,23 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
210 | */ | 144 | */ |
211 | local_irq_disable(); | 145 | local_irq_disable(); |
212 | 146 | ||
213 | #ifdef CONFIG_PPC64 | 147 | pgdp = pgd_offset(mm, addr); |
214 | /* Those bits are related to hugetlbfs implementation and only exist | 148 | do { |
215 | * on 64-bit for now | 149 | pgd_t pgd = *pgdp; |
216 | */ | 150 | |
217 | psize = get_slice_psize(mm, addr); | 151 | pr_devel(" %016lx: normal pgd %p\n", addr, |
218 | shift = mmu_psize_defs[psize].shift; | 152 | (void *)pgd_val(pgd)); |
219 | #endif /* CONFIG_PPC64 */ | 153 | next = pgd_addr_end(addr, end); |
220 | 154 | if (pgd_none(pgd)) | |
221 | #ifdef CONFIG_HUGETLB_PAGE | 155 | goto slow; |
222 | if (unlikely(mmu_huge_psizes[psize])) { | 156 | if (is_hugepd(pgdp)) { |
223 | pte_t *ptep; | 157 | if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, |
224 | unsigned long a = addr; | 158 | addr, next, write, pages, &nr)) |
225 | unsigned long sz = ((1UL) << shift); | ||
226 | struct hstate *hstate = size_to_hstate(sz); | ||
227 | |||
228 | BUG_ON(!hstate); | ||
229 | /* | ||
230 | * XXX: could be optimized to avoid hstate | ||
231 | * lookup entirely (just use shift) | ||
232 | */ | ||
233 | |||
234 | do { | ||
235 | VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, a)].shift); | ||
236 | ptep = huge_pte_offset(mm, a); | ||
237 | pr_devel(" %016lx: huge ptep %p\n", a, ptep); | ||
238 | if (!ptep || !gup_huge_pte(ptep, hstate, &a, end, write, pages, | ||
239 | &nr)) | ||
240 | goto slow; | ||
241 | } while (a != end); | ||
242 | } else | ||
243 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
244 | { | ||
245 | pgdp = pgd_offset(mm, addr); | ||
246 | do { | ||
247 | pgd_t pgd = *pgdp; | ||
248 | |||
249 | #ifdef CONFIG_PPC64 | ||
250 | VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift); | ||
251 | #endif | ||
252 | pr_devel(" %016lx: normal pgd %p\n", addr, | ||
253 | (void *)pgd_val(pgd)); | ||
254 | next = pgd_addr_end(addr, end); | ||
255 | if (pgd_none(pgd)) | ||
256 | goto slow; | ||
257 | if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) | ||
258 | goto slow; | 159 | goto slow; |
259 | } while (pgdp++, addr = next, addr != end); | 160 | } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) |
260 | } | 161 | goto slow; |
162 | } while (pgdp++, addr = next, addr != end); | ||
163 | |||
261 | local_irq_enable(); | 164 | local_irq_enable(); |
262 | 165 | ||
263 | VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); | 166 | VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); |
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 1ade7eb6ae00..485dcd197a61 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c | |||
@@ -891,6 +891,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
891 | unsigned long vsid; | 891 | unsigned long vsid; |
892 | struct mm_struct *mm; | 892 | struct mm_struct *mm; |
893 | pte_t *ptep; | 893 | pte_t *ptep; |
894 | unsigned hugeshift; | ||
894 | const struct cpumask *tmp; | 895 | const struct cpumask *tmp; |
895 | int rc, user_region = 0, local = 0; | 896 | int rc, user_region = 0, local = 0; |
896 | int psize, ssize; | 897 | int psize, ssize; |
@@ -943,30 +944,31 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
943 | if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) | 944 | if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) |
944 | local = 1; | 945 | local = 1; |
945 | 946 | ||
946 | #ifdef CONFIG_HUGETLB_PAGE | ||
947 | /* Handle hugepage regions */ | ||
948 | if (HPAGE_SHIFT && mmu_huge_psizes[psize]) { | ||
949 | DBG_LOW(" -> huge page !\n"); | ||
950 | return hash_huge_page(mm, access, ea, vsid, local, trap); | ||
951 | } | ||
952 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
953 | |||
954 | #ifndef CONFIG_PPC_64K_PAGES | 947 | #ifndef CONFIG_PPC_64K_PAGES |
955 | /* If we use 4K pages and our psize is not 4K, then we are hitting | 948 | /* If we use 4K pages and our psize is not 4K, then we might |
956 | * a special driver mapping, we need to align the address before | 949 | * be hitting a special driver mapping, and need to align the |
957 | * we fetch the PTE | 950 | * address before we fetch the PTE. |
951 | * | ||
952 | * It could also be a hugepage mapping, in which case this is | ||
953 | * not necessary, but it's not harmful, either. | ||
958 | */ | 954 | */ |
959 | if (psize != MMU_PAGE_4K) | 955 | if (psize != MMU_PAGE_4K) |
960 | ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1); | 956 | ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1); |
961 | #endif /* CONFIG_PPC_64K_PAGES */ | 957 | #endif /* CONFIG_PPC_64K_PAGES */ |
962 | 958 | ||
963 | /* Get PTE and page size from page tables */ | 959 | /* Get PTE and page size from page tables */ |
964 | ptep = find_linux_pte(pgdir, ea); | 960 | ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift); |
965 | if (ptep == NULL || !pte_present(*ptep)) { | 961 | if (ptep == NULL || !pte_present(*ptep)) { |
966 | DBG_LOW(" no PTE !\n"); | 962 | DBG_LOW(" no PTE !\n"); |
967 | return 1; | 963 | return 1; |
968 | } | 964 | } |
969 | 965 | ||
966 | #ifdef CONFIG_HUGETLB_PAGE | ||
967 | if (hugeshift) | ||
968 | return __hash_page_huge(ea, access, vsid, ptep, trap, local, | ||
969 | ssize, hugeshift, psize); | ||
970 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
971 | |||
970 | #ifndef CONFIG_PPC_64K_PAGES | 972 | #ifndef CONFIG_PPC_64K_PAGES |
971 | DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); | 973 | DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); |
972 | #else | 974 | #else |
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7230d7a4fbd9..95220a5dee58 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -40,25 +40,11 @@ static unsigned nr_gpages; | |||
40 | /* Array of valid huge page sizes - non-zero value(hugepte_shift) is | 40 | /* Array of valid huge page sizes - non-zero value(hugepte_shift) is |
41 | * stored for the huge page sizes that are valid. | 41 | * stored for the huge page sizes that are valid. |
42 | */ | 42 | */ |
43 | unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ | 43 | static unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ |
44 | |||
45 | #define hugepte_shift mmu_huge_psizes | ||
46 | #define HUGEPTE_INDEX_SIZE(psize) (mmu_huge_psizes[(psize)]) | ||
47 | #define PTRS_PER_HUGEPTE(psize) (1 << mmu_huge_psizes[psize]) | ||
48 | |||
49 | #define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \ | ||
50 | + HUGEPTE_INDEX_SIZE(psize)) | ||
51 | #define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize)) | ||
52 | #define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1)) | ||
53 | 44 | ||
54 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() | 45 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() |
55 | * will choke on pointers to hugepte tables, which is handy for | 46 | * will choke on pointers to hugepte tables, which is handy for |
56 | * catching screwups early. */ | 47 | * catching screwups early. */ |
57 | #define HUGEPD_OK 0x1 | ||
58 | |||
59 | typedef struct { unsigned long pd; } hugepd_t; | ||
60 | |||
61 | #define hugepd_none(hpd) ((hpd).pd == 0) | ||
62 | 48 | ||
63 | static inline int shift_to_mmu_psize(unsigned int shift) | 49 | static inline int shift_to_mmu_psize(unsigned int shift) |
64 | { | 50 | { |
@@ -82,71 +68,126 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) | |||
82 | BUG(); | 68 | BUG(); |
83 | } | 69 | } |
84 | 70 | ||
71 | #define hugepd_none(hpd) ((hpd).pd == 0) | ||
72 | |||
85 | static inline pte_t *hugepd_page(hugepd_t hpd) | 73 | static inline pte_t *hugepd_page(hugepd_t hpd) |
86 | { | 74 | { |
87 | BUG_ON(!(hpd.pd & HUGEPD_OK)); | 75 | BUG_ON(!hugepd_ok(hpd)); |
88 | return (pte_t *)(hpd.pd & ~HUGEPD_OK); | 76 | return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000); |
77 | } | ||
78 | |||
79 | static inline unsigned int hugepd_shift(hugepd_t hpd) | ||
80 | { | ||
81 | return hpd.pd & HUGEPD_SHIFT_MASK; | ||
89 | } | 82 | } |
90 | 83 | ||
91 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, | 84 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift) |
92 | struct hstate *hstate) | ||
93 | { | 85 | { |
94 | unsigned int shift = huge_page_shift(hstate); | 86 | unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); |
95 | int psize = shift_to_mmu_psize(shift); | ||
96 | unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1)); | ||
97 | pte_t *dir = hugepd_page(*hpdp); | 87 | pte_t *dir = hugepd_page(*hpdp); |
98 | 88 | ||
99 | return dir + idx; | 89 | return dir + idx; |
100 | } | 90 | } |
101 | 91 | ||
92 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) | ||
93 | { | ||
94 | pgd_t *pg; | ||
95 | pud_t *pu; | ||
96 | pmd_t *pm; | ||
97 | hugepd_t *hpdp = NULL; | ||
98 | unsigned pdshift = PGDIR_SHIFT; | ||
99 | |||
100 | if (shift) | ||
101 | *shift = 0; | ||
102 | |||
103 | pg = pgdir + pgd_index(ea); | ||
104 | if (is_hugepd(pg)) { | ||
105 | hpdp = (hugepd_t *)pg; | ||
106 | } else if (!pgd_none(*pg)) { | ||
107 | pdshift = PUD_SHIFT; | ||
108 | pu = pud_offset(pg, ea); | ||
109 | if (is_hugepd(pu)) | ||
110 | hpdp = (hugepd_t *)pu; | ||
111 | else if (!pud_none(*pu)) { | ||
112 | pdshift = PMD_SHIFT; | ||
113 | pm = pmd_offset(pu, ea); | ||
114 | if (is_hugepd(pm)) | ||
115 | hpdp = (hugepd_t *)pm; | ||
116 | else if (!pmd_none(*pm)) { | ||
117 | return pte_offset_map(pm, ea); | ||
118 | } | ||
119 | } | ||
120 | } | ||
121 | |||
122 | if (!hpdp) | ||
123 | return NULL; | ||
124 | |||
125 | if (shift) | ||
126 | *shift = hugepd_shift(*hpdp); | ||
127 | return hugepte_offset(hpdp, ea, pdshift); | ||
128 | } | ||
129 | |||
130 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
131 | { | ||
132 | return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); | ||
133 | } | ||
134 | |||
102 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | 135 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, |
103 | unsigned long address, unsigned int psize) | 136 | unsigned long address, unsigned pdshift, unsigned pshift) |
104 | { | 137 | { |
105 | pte_t *new = kmem_cache_zalloc(PGT_CACHE(hugepte_shift[psize]), | 138 | pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift), |
106 | GFP_KERNEL|__GFP_REPEAT); | 139 | GFP_KERNEL|__GFP_REPEAT); |
107 | 140 | ||
141 | BUG_ON(pshift > HUGEPD_SHIFT_MASK); | ||
142 | BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); | ||
143 | |||
108 | if (! new) | 144 | if (! new) |
109 | return -ENOMEM; | 145 | return -ENOMEM; |
110 | 146 | ||
111 | spin_lock(&mm->page_table_lock); | 147 | spin_lock(&mm->page_table_lock); |
112 | if (!hugepd_none(*hpdp)) | 148 | if (!hugepd_none(*hpdp)) |
113 | kmem_cache_free(PGT_CACHE(hugepte_shift[psize]), new); | 149 | kmem_cache_free(PGT_CACHE(pdshift - pshift), new); |
114 | else | 150 | else |
115 | hpdp->pd = (unsigned long)new | HUGEPD_OK; | 151 | hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift; |
116 | spin_unlock(&mm->page_table_lock); | 152 | spin_unlock(&mm->page_table_lock); |
117 | return 0; | 153 | return 0; |
118 | } | 154 | } |
119 | 155 | ||
120 | 156 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) | |
121 | static pud_t *hpud_offset(pgd_t *pgd, unsigned long addr, struct hstate *hstate) | ||
122 | { | 157 | { |
123 | if (huge_page_shift(hstate) < PUD_SHIFT) | 158 | pgd_t *pg; |
124 | return pud_offset(pgd, addr); | 159 | pud_t *pu; |
125 | else | 160 | pmd_t *pm; |
126 | return (pud_t *) pgd; | 161 | hugepd_t *hpdp = NULL; |
127 | } | 162 | unsigned pshift = __ffs(sz); |
128 | static pud_t *hpud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, | 163 | unsigned pdshift = PGDIR_SHIFT; |
129 | struct hstate *hstate) | 164 | |
130 | { | 165 | addr &= ~(sz-1); |
131 | if (huge_page_shift(hstate) < PUD_SHIFT) | 166 | |
132 | return pud_alloc(mm, pgd, addr); | 167 | pg = pgd_offset(mm, addr); |
133 | else | 168 | if (pshift >= PUD_SHIFT) { |
134 | return (pud_t *) pgd; | 169 | hpdp = (hugepd_t *)pg; |
135 | } | 170 | } else { |
136 | static pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate) | 171 | pdshift = PUD_SHIFT; |
137 | { | 172 | pu = pud_alloc(mm, pg, addr); |
138 | if (huge_page_shift(hstate) < PMD_SHIFT) | 173 | if (pshift >= PMD_SHIFT) { |
139 | return pmd_offset(pud, addr); | 174 | hpdp = (hugepd_t *)pu; |
140 | else | 175 | } else { |
141 | return (pmd_t *) pud; | 176 | pdshift = PMD_SHIFT; |
142 | } | 177 | pm = pmd_alloc(mm, pu, addr); |
143 | static pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr, | 178 | hpdp = (hugepd_t *)pm; |
144 | struct hstate *hstate) | 179 | } |
145 | { | 180 | } |
146 | if (huge_page_shift(hstate) < PMD_SHIFT) | 181 | |
147 | return pmd_alloc(mm, pud, addr); | 182 | if (!hpdp) |
148 | else | 183 | return NULL; |
149 | return (pmd_t *) pud; | 184 | |
185 | BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); | ||
186 | |||
187 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) | ||
188 | return NULL; | ||
189 | |||
190 | return hugepte_offset(hpdp, addr, pdshift); | ||
150 | } | 191 | } |
151 | 192 | ||
152 | /* Build list of addresses of gigantic pages. This function is used in early | 193 | /* Build list of addresses of gigantic pages. This function is used in early |
@@ -180,92 +221,38 @@ int alloc_bootmem_huge_page(struct hstate *hstate) | |||
180 | return 1; | 221 | return 1; |
181 | } | 222 | } |
182 | 223 | ||
183 | |||
184 | /* Modelled after find_linux_pte() */ | ||
185 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
186 | { | ||
187 | pgd_t *pg; | ||
188 | pud_t *pu; | ||
189 | pmd_t *pm; | ||
190 | |||
191 | unsigned int psize; | ||
192 | unsigned int shift; | ||
193 | unsigned long sz; | ||
194 | struct hstate *hstate; | ||
195 | psize = get_slice_psize(mm, addr); | ||
196 | shift = mmu_psize_to_shift(psize); | ||
197 | sz = ((1UL) << shift); | ||
198 | hstate = size_to_hstate(sz); | ||
199 | |||
200 | addr &= hstate->mask; | ||
201 | |||
202 | pg = pgd_offset(mm, addr); | ||
203 | if (!pgd_none(*pg)) { | ||
204 | pu = hpud_offset(pg, addr, hstate); | ||
205 | if (!pud_none(*pu)) { | ||
206 | pm = hpmd_offset(pu, addr, hstate); | ||
207 | if (!pmd_none(*pm)) | ||
208 | return hugepte_offset((hugepd_t *)pm, addr, | ||
209 | hstate); | ||
210 | } | ||
211 | } | ||
212 | |||
213 | return NULL; | ||
214 | } | ||
215 | |||
216 | pte_t *huge_pte_alloc(struct mm_struct *mm, | ||
217 | unsigned long addr, unsigned long sz) | ||
218 | { | ||
219 | pgd_t *pg; | ||
220 | pud_t *pu; | ||
221 | pmd_t *pm; | ||
222 | hugepd_t *hpdp = NULL; | ||
223 | struct hstate *hstate; | ||
224 | unsigned int psize; | ||
225 | hstate = size_to_hstate(sz); | ||
226 | |||
227 | psize = get_slice_psize(mm, addr); | ||
228 | BUG_ON(!mmu_huge_psizes[psize]); | ||
229 | |||
230 | addr &= hstate->mask; | ||
231 | |||
232 | pg = pgd_offset(mm, addr); | ||
233 | pu = hpud_alloc(mm, pg, addr, hstate); | ||
234 | |||
235 | if (pu) { | ||
236 | pm = hpmd_alloc(mm, pu, addr, hstate); | ||
237 | if (pm) | ||
238 | hpdp = (hugepd_t *)pm; | ||
239 | } | ||
240 | |||
241 | if (! hpdp) | ||
242 | return NULL; | ||
243 | |||
244 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize)) | ||
245 | return NULL; | ||
246 | |||
247 | return hugepte_offset(hpdp, addr, hstate); | ||
248 | } | ||
249 | |||
250 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | 224 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) |
251 | { | 225 | { |
252 | return 0; | 226 | return 0; |
253 | } | 227 | } |
254 | 228 | ||
255 | static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp, | 229 | static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, |
256 | unsigned int psize) | 230 | unsigned long start, unsigned long end, |
231 | unsigned long floor, unsigned long ceiling) | ||
257 | { | 232 | { |
258 | pte_t *hugepte = hugepd_page(*hpdp); | 233 | pte_t *hugepte = hugepd_page(*hpdp); |
234 | unsigned shift = hugepd_shift(*hpdp); | ||
235 | unsigned long pdmask = ~((1UL << pdshift) - 1); | ||
236 | |||
237 | start &= pdmask; | ||
238 | if (start < floor) | ||
239 | return; | ||
240 | if (ceiling) { | ||
241 | ceiling &= pdmask; | ||
242 | if (! ceiling) | ||
243 | return; | ||
244 | } | ||
245 | if (end - 1 > ceiling - 1) | ||
246 | return; | ||
259 | 247 | ||
260 | hpdp->pd = 0; | 248 | hpdp->pd = 0; |
261 | tlb->need_flush = 1; | 249 | tlb->need_flush = 1; |
262 | pgtable_free_tlb(tlb, hugepte, hugepte_shift[psize]); | 250 | pgtable_free_tlb(tlb, hugepte, pdshift - shift); |
263 | } | 251 | } |
264 | 252 | ||
265 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | 253 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
266 | unsigned long addr, unsigned long end, | 254 | unsigned long addr, unsigned long end, |
267 | unsigned long floor, unsigned long ceiling, | 255 | unsigned long floor, unsigned long ceiling) |
268 | unsigned int psize) | ||
269 | { | 256 | { |
270 | pmd_t *pmd; | 257 | pmd_t *pmd; |
271 | unsigned long next; | 258 | unsigned long next; |
@@ -277,7 +264,8 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | |||
277 | next = pmd_addr_end(addr, end); | 264 | next = pmd_addr_end(addr, end); |
278 | if (pmd_none(*pmd)) | 265 | if (pmd_none(*pmd)) |
279 | continue; | 266 | continue; |
280 | free_hugepte_range(tlb, (hugepd_t *)pmd, psize); | 267 | free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT, |
268 | addr, next, floor, ceiling); | ||
281 | } while (pmd++, addr = next, addr != end); | 269 | } while (pmd++, addr = next, addr != end); |
282 | 270 | ||
283 | start &= PUD_MASK; | 271 | start &= PUD_MASK; |
@@ -303,23 +291,19 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
303 | pud_t *pud; | 291 | pud_t *pud; |
304 | unsigned long next; | 292 | unsigned long next; |
305 | unsigned long start; | 293 | unsigned long start; |
306 | unsigned int shift; | ||
307 | unsigned int psize = get_slice_psize(tlb->mm, addr); | ||
308 | shift = mmu_psize_to_shift(psize); | ||
309 | 294 | ||
310 | start = addr; | 295 | start = addr; |
311 | pud = pud_offset(pgd, addr); | 296 | pud = pud_offset(pgd, addr); |
312 | do { | 297 | do { |
313 | next = pud_addr_end(addr, end); | 298 | next = pud_addr_end(addr, end); |
314 | if (shift < PMD_SHIFT) { | 299 | if (!is_hugepd(pud)) { |
315 | if (pud_none_or_clear_bad(pud)) | 300 | if (pud_none_or_clear_bad(pud)) |
316 | continue; | 301 | continue; |
317 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, | 302 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, |
318 | ceiling, psize); | 303 | ceiling); |
319 | } else { | 304 | } else { |
320 | if (pud_none(*pud)) | 305 | free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT, |
321 | continue; | 306 | addr, next, floor, ceiling); |
322 | free_hugepte_range(tlb, (hugepd_t *)pud, psize); | ||
323 | } | 307 | } |
324 | } while (pud++, addr = next, addr != end); | 308 | } while (pud++, addr = next, addr != end); |
325 | 309 | ||
@@ -350,74 +334,34 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, | |||
350 | { | 334 | { |
351 | pgd_t *pgd; | 335 | pgd_t *pgd; |
352 | unsigned long next; | 336 | unsigned long next; |
353 | unsigned long start; | ||
354 | 337 | ||
355 | /* | 338 | /* |
356 | * Comments below take from the normal free_pgd_range(). They | 339 | * Because there are a number of different possible pagetable |
357 | * apply here too. The tests against HUGEPD_MASK below are | 340 | * layouts for hugepage ranges, we limit knowledge of how |
358 | * essential, because we *don't* test for this at the bottom | 341 | * things should be laid out to the allocation path |
359 | * level. Without them we'll attempt to free a hugepte table | 342 | * (huge_pte_alloc(), above). Everything else works out the |
360 | * when we unmap just part of it, even if there are other | 343 | * structure as it goes from information in the hugepd |
361 | * active mappings using it. | 344 | * pointers. That means that we can't here use the |
362 | * | 345 | * optimization used in the normal page free_pgd_range(), of |
363 | * The next few lines have given us lots of grief... | 346 | * checking whether we're actually covering a large enough |
364 | * | 347 | * range to have to do anything at the top level of the walk |
365 | * Why are we testing HUGEPD* at this top level? Because | 348 | * instead of at the bottom. |
366 | * often there will be no work to do at all, and we'd prefer | ||
367 | * not to go all the way down to the bottom just to discover | ||
368 | * that. | ||
369 | * | ||
370 | * Why all these "- 1"s? Because 0 represents both the bottom | ||
371 | * of the address space and the top of it (using -1 for the | ||
372 | * top wouldn't help much: the masks would do the wrong thing). | ||
373 | * The rule is that addr 0 and floor 0 refer to the bottom of | ||
374 | * the address space, but end 0 and ceiling 0 refer to the top | ||
375 | * Comparisons need to use "end - 1" and "ceiling - 1" (though | ||
376 | * that end 0 case should be mythical). | ||
377 | * | ||
378 | * Wherever addr is brought up or ceiling brought down, we | ||
379 | * must be careful to reject "the opposite 0" before it | ||
380 | * confuses the subsequent tests. But what about where end is | ||
381 | * brought down by HUGEPD_SIZE below? no, end can't go down to | ||
382 | * 0 there. | ||
383 | * | 349 | * |
384 | * Whereas we round start (addr) and ceiling down, by different | 350 | * To make sense of this, you should probably go read the big |
385 | * masks at different levels, in order to test whether a table | 351 | * block comment at the top of the normal free_pgd_range(), |
386 | * now has no other vmas using it, so can be freed, we don't | 352 | * too. |
387 | * bother to round floor or end up - the tests don't need that. | ||
388 | */ | 353 | */ |
389 | unsigned int psize = get_slice_psize(tlb->mm, addr); | ||
390 | |||
391 | addr &= HUGEPD_MASK(psize); | ||
392 | if (addr < floor) { | ||
393 | addr += HUGEPD_SIZE(psize); | ||
394 | if (!addr) | ||
395 | return; | ||
396 | } | ||
397 | if (ceiling) { | ||
398 | ceiling &= HUGEPD_MASK(psize); | ||
399 | if (!ceiling) | ||
400 | return; | ||
401 | } | ||
402 | if (end - 1 > ceiling - 1) | ||
403 | end -= HUGEPD_SIZE(psize); | ||
404 | if (addr > end - 1) | ||
405 | return; | ||
406 | 354 | ||
407 | start = addr; | ||
408 | pgd = pgd_offset(tlb->mm, addr); | 355 | pgd = pgd_offset(tlb->mm, addr); |
409 | do { | 356 | do { |
410 | psize = get_slice_psize(tlb->mm, addr); | ||
411 | BUG_ON(!mmu_huge_psizes[psize]); | ||
412 | next = pgd_addr_end(addr, end); | 357 | next = pgd_addr_end(addr, end); |
413 | if (mmu_psize_to_shift(psize) < PUD_SHIFT) { | 358 | if (!is_hugepd(pgd)) { |
414 | if (pgd_none_or_clear_bad(pgd)) | 359 | if (pgd_none_or_clear_bad(pgd)) |
415 | continue; | 360 | continue; |
416 | hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); | 361 | hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); |
417 | } else { | 362 | } else { |
418 | if (pgd_none(*pgd)) | 363 | free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, |
419 | continue; | 364 | addr, next, floor, ceiling); |
420 | free_hugepte_range(tlb, (hugepd_t *)pgd, psize); | ||
421 | } | 365 | } |
422 | } while (pgd++, addr = next, addr != end); | 366 | } while (pgd++, addr = next, addr != end); |
423 | } | 367 | } |
@@ -448,19 +392,19 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | |||
448 | { | 392 | { |
449 | pte_t *ptep; | 393 | pte_t *ptep; |
450 | struct page *page; | 394 | struct page *page; |
451 | unsigned int mmu_psize = get_slice_psize(mm, address); | 395 | unsigned shift; |
396 | unsigned long mask; | ||
397 | |||
398 | ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); | ||
452 | 399 | ||
453 | /* Verify it is a huge page else bail. */ | 400 | /* Verify it is a huge page else bail. */ |
454 | if (!mmu_huge_psizes[mmu_psize]) | 401 | if (!ptep || !shift) |
455 | return ERR_PTR(-EINVAL); | 402 | return ERR_PTR(-EINVAL); |
456 | 403 | ||
457 | ptep = huge_pte_offset(mm, address); | 404 | mask = (1UL << shift) - 1; |
458 | page = pte_page(*ptep); | 405 | page = pte_page(*ptep); |
459 | if (page) { | 406 | if (page) |
460 | unsigned int shift = mmu_psize_to_shift(mmu_psize); | 407 | page += (address & mask) / PAGE_SIZE; |
461 | unsigned long sz = ((1UL) << shift); | ||
462 | page += (address % sz) / PAGE_SIZE; | ||
463 | } | ||
464 | 408 | ||
465 | return page; | 409 | return page; |
466 | } | 410 | } |
@@ -483,6 +427,73 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
483 | return NULL; | 427 | return NULL; |
484 | } | 428 | } |
485 | 429 | ||
430 | static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, | ||
431 | unsigned long end, int write, struct page **pages, int *nr) | ||
432 | { | ||
433 | unsigned long mask; | ||
434 | unsigned long pte_end; | ||
435 | struct page *head, *page; | ||
436 | pte_t pte; | ||
437 | int refs; | ||
438 | |||
439 | pte_end = (addr + sz) & ~(sz-1); | ||
440 | if (pte_end < end) | ||
441 | end = pte_end; | ||
442 | |||
443 | pte = *ptep; | ||
444 | mask = _PAGE_PRESENT | _PAGE_USER; | ||
445 | if (write) | ||
446 | mask |= _PAGE_RW; | ||
447 | |||
448 | if ((pte_val(pte) & mask) != mask) | ||
449 | return 0; | ||
450 | |||
451 | /* hugepages are never "special" */ | ||
452 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
453 | |||
454 | refs = 0; | ||
455 | head = pte_page(pte); | ||
456 | |||
457 | page = head + ((addr & (sz-1)) >> PAGE_SHIFT); | ||
458 | do { | ||
459 | VM_BUG_ON(compound_head(page) != head); | ||
460 | pages[*nr] = page; | ||
461 | (*nr)++; | ||
462 | page++; | ||
463 | refs++; | ||
464 | } while (addr += PAGE_SIZE, addr != end); | ||
465 | |||
466 | if (!page_cache_add_speculative(head, refs)) { | ||
467 | *nr -= refs; | ||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | if (unlikely(pte_val(pte) != pte_val(*ptep))) { | ||
472 | /* Could be optimized better */ | ||
473 | while (*nr) { | ||
474 | put_page(page); | ||
475 | (*nr)--; | ||
476 | } | ||
477 | } | ||
478 | |||
479 | return 1; | ||
480 | } | ||
481 | |||
482 | int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, | ||
483 | unsigned long addr, unsigned long end, | ||
484 | int write, struct page **pages, int *nr) | ||
485 | { | ||
486 | pte_t *ptep; | ||
487 | unsigned long sz = 1UL << hugepd_shift(*hugepd); | ||
488 | |||
489 | ptep = hugepte_offset(hugepd, addr, pdshift); | ||
490 | do { | ||
491 | if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) | ||
492 | return 0; | ||
493 | } while (ptep++, addr += sz, addr != end); | ||
494 | |||
495 | return 1; | ||
496 | } | ||
486 | 497 | ||
487 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | 498 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, |
488 | unsigned long len, unsigned long pgoff, | 499 | unsigned long len, unsigned long pgoff, |
@@ -530,34 +541,20 @@ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, | |||
530 | return rflags; | 541 | return rflags; |
531 | } | 542 | } |
532 | 543 | ||
533 | int hash_huge_page(struct mm_struct *mm, unsigned long access, | 544 | int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, |
534 | unsigned long ea, unsigned long vsid, int local, | 545 | pte_t *ptep, unsigned long trap, int local, int ssize, |
535 | unsigned long trap) | 546 | unsigned int shift, unsigned int mmu_psize) |
536 | { | 547 | { |
537 | pte_t *ptep; | ||
538 | unsigned long old_pte, new_pte; | 548 | unsigned long old_pte, new_pte; |
539 | unsigned long va, rflags, pa, sz; | 549 | unsigned long va, rflags, pa, sz; |
540 | long slot; | 550 | long slot; |
541 | int err = 1; | 551 | int err = 1; |
542 | int ssize = user_segment_size(ea); | ||
543 | unsigned int mmu_psize; | ||
544 | int shift; | ||
545 | mmu_psize = get_slice_psize(mm, ea); | ||
546 | 552 | ||
547 | if (!mmu_huge_psizes[mmu_psize]) | 553 | BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); |
548 | goto out; | ||
549 | ptep = huge_pte_offset(mm, ea); | ||
550 | 554 | ||
551 | /* Search the Linux page table for a match with va */ | 555 | /* Search the Linux page table for a match with va */ |
552 | va = hpt_va(ea, vsid, ssize); | 556 | va = hpt_va(ea, vsid, ssize); |
553 | 557 | ||
554 | /* | ||
555 | * If no pte found or not present, send the problem up to | ||
556 | * do_page_fault | ||
557 | */ | ||
558 | if (unlikely(!ptep || pte_none(*ptep))) | ||
559 | goto out; | ||
560 | |||
561 | /* | 558 | /* |
562 | * Check the user's access rights to the page. If access should be | 559 | * Check the user's access rights to the page. If access should be |
563 | * prevented then send the problem up to do_page_fault. | 560 | * prevented then send the problem up to do_page_fault. |
@@ -588,7 +585,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, | |||
588 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); | 585 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); |
589 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ | 586 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ |
590 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); | 587 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); |
591 | shift = mmu_psize_to_shift(mmu_psize); | ||
592 | sz = ((1UL) << shift); | 588 | sz = ((1UL) << shift); |
593 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | 589 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) |
594 | /* No CPU has hugepages but lacks no execute, so we | 590 | /* No CPU has hugepages but lacks no execute, so we |
@@ -672,6 +668,8 @@ repeat: | |||
672 | 668 | ||
673 | static void __init set_huge_psize(int psize) | 669 | static void __init set_huge_psize(int psize) |
674 | { | 670 | { |
671 | unsigned pdshift; | ||
672 | |||
675 | /* Check that it is a page size supported by the hardware and | 673 | /* Check that it is a page size supported by the hardware and |
676 | * that it fits within pagetable limits. */ | 674 | * that it fits within pagetable limits. */ |
677 | if (mmu_psize_defs[psize].shift && | 675 | if (mmu_psize_defs[psize].shift && |
@@ -686,29 +684,14 @@ static void __init set_huge_psize(int psize) | |||
686 | return; | 684 | return; |
687 | hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); | 685 | hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); |
688 | 686 | ||
689 | switch (mmu_psize_defs[psize].shift) { | 687 | if (mmu_psize_defs[psize].shift < PMD_SHIFT) |
690 | case PAGE_SHIFT_64K: | 688 | pdshift = PMD_SHIFT; |
691 | /* We only allow 64k hpages with 4k base page, | 689 | else if (mmu_psize_defs[psize].shift < PUD_SHIFT) |
692 | * which was checked above, and always put them | 690 | pdshift = PUD_SHIFT; |
693 | * at the PMD */ | 691 | else |
694 | hugepte_shift[psize] = PMD_SHIFT; | 692 | pdshift = PGDIR_SHIFT; |
695 | break; | 693 | mmu_huge_psizes[psize] = pdshift - mmu_psize_defs[psize].shift; |
696 | case PAGE_SHIFT_16M: | 694 | } |
697 | /* 16M pages can be at two different levels | ||
698 | * of pagestables based on base page size */ | ||
699 | if (PAGE_SHIFT == PAGE_SHIFT_64K) | ||
700 | hugepte_shift[psize] = PMD_SHIFT; | ||
701 | else /* 4k base page */ | ||
702 | hugepte_shift[psize] = PUD_SHIFT; | ||
703 | break; | ||
704 | case PAGE_SHIFT_16G: | ||
705 | /* 16G pages are always at PGD level */ | ||
706 | hugepte_shift[psize] = PGDIR_SHIFT; | ||
707 | break; | ||
708 | } | ||
709 | hugepte_shift[psize] -= mmu_psize_defs[psize].shift; | ||
710 | } else | ||
711 | hugepte_shift[psize] = 0; | ||
712 | } | 695 | } |
713 | 696 | ||
714 | static int __init hugepage_setup_sz(char *str) | 697 | static int __init hugepage_setup_sz(char *str) |
@@ -732,7 +715,7 @@ __setup("hugepagesz=", hugepage_setup_sz); | |||
732 | 715 | ||
733 | static int __init hugetlbpage_init(void) | 716 | static int __init hugetlbpage_init(void) |
734 | { | 717 | { |
735 | unsigned int psize; | 718 | int psize; |
736 | 719 | ||
737 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | 720 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) |
738 | return -ENODEV; | 721 | return -ENODEV; |
@@ -753,8 +736,8 @@ static int __init hugetlbpage_init(void) | |||
753 | 736 | ||
754 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { | 737 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { |
755 | if (mmu_huge_psizes[psize]) { | 738 | if (mmu_huge_psizes[psize]) { |
756 | pgtable_cache_add(hugepte_shift[psize], NULL); | 739 | pgtable_cache_add(mmu_huge_psizes[psize], NULL); |
757 | if (!PGT_CACHE(hugepte_shift[psize])) | 740 | if (!PGT_CACHE(mmu_huge_psizes[psize])) |
758 | panic("hugetlbpage_init(): could not create " | 741 | panic("hugetlbpage_init(): could not create " |
759 | "pgtable cache for %d bit pagesize\n", | 742 | "pgtable cache for %d bit pagesize\n", |
760 | mmu_psize_to_shift(psize)); | 743 | mmu_psize_to_shift(psize)); |
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 82ac61dcd3af..776f28d02b6b 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/module.h> | 41 | #include <linux/module.h> |
42 | #include <linux/poison.h> | 42 | #include <linux/poison.h> |
43 | #include <linux/lmb.h> | 43 | #include <linux/lmb.h> |
44 | #include <linux/hugetlb.h> | ||
44 | 45 | ||
45 | #include <asm/pgalloc.h> | 46 | #include <asm/pgalloc.h> |
46 | #include <asm/page.h> | 47 | #include <asm/page.h> |
@@ -136,8 +137,13 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) | |||
136 | 137 | ||
137 | /* When batching pgtable pointers for RCU freeing, we store | 138 | /* When batching pgtable pointers for RCU freeing, we store |
138 | * the index size in the low bits. Table alignment must be | 139 | * the index size in the low bits. Table alignment must be |
139 | * big enough to fit it */ | 140 | * big enough to fit it. |
140 | unsigned long minalign = MAX_PGTABLE_INDEX_SIZE + 1; | 141 | * |
142 | * Likewise, hugeapge pagetable pointers contain a (different) | ||
143 | * shift value in the low bits. All tables must be aligned so | ||
144 | * as to leave enough 0 bits in the address to contain it. */ | ||
145 | unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1, | ||
146 | HUGEPD_SHIFT_MASK + 1); | ||
141 | struct kmem_cache *new; | 147 | struct kmem_cache *new; |
142 | 148 | ||
143 | /* It would be nice if this was a BUILD_BUG_ON(), but at the | 149 | /* It would be nice if this was a BUILD_BUG_ON(), but at the |