diff options
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 473 |
1 files changed, 228 insertions, 245 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7230d7a4fbd9..95220a5dee58 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -40,25 +40,11 @@ static unsigned nr_gpages; | |||
40 | /* Array of valid huge page sizes - non-zero value(hugepte_shift) is | 40 | /* Array of valid huge page sizes - non-zero value(hugepte_shift) is |
41 | * stored for the huge page sizes that are valid. | 41 | * stored for the huge page sizes that are valid. |
42 | */ | 42 | */ |
43 | unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ | 43 | static unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ |
44 | |||
45 | #define hugepte_shift mmu_huge_psizes | ||
46 | #define HUGEPTE_INDEX_SIZE(psize) (mmu_huge_psizes[(psize)]) | ||
47 | #define PTRS_PER_HUGEPTE(psize) (1 << mmu_huge_psizes[psize]) | ||
48 | |||
49 | #define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \ | ||
50 | + HUGEPTE_INDEX_SIZE(psize)) | ||
51 | #define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize)) | ||
52 | #define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1)) | ||
53 | 44 | ||
54 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() | 45 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() |
55 | * will choke on pointers to hugepte tables, which is handy for | 46 | * will choke on pointers to hugepte tables, which is handy for |
56 | * catching screwups early. */ | 47 | * catching screwups early. */ |
57 | #define HUGEPD_OK 0x1 | ||
58 | |||
59 | typedef struct { unsigned long pd; } hugepd_t; | ||
60 | |||
61 | #define hugepd_none(hpd) ((hpd).pd == 0) | ||
62 | 48 | ||
63 | static inline int shift_to_mmu_psize(unsigned int shift) | 49 | static inline int shift_to_mmu_psize(unsigned int shift) |
64 | { | 50 | { |
@@ -82,71 +68,126 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) | |||
82 | BUG(); | 68 | BUG(); |
83 | } | 69 | } |
84 | 70 | ||
71 | #define hugepd_none(hpd) ((hpd).pd == 0) | ||
72 | |||
85 | static inline pte_t *hugepd_page(hugepd_t hpd) | 73 | static inline pte_t *hugepd_page(hugepd_t hpd) |
86 | { | 74 | { |
87 | BUG_ON(!(hpd.pd & HUGEPD_OK)); | 75 | BUG_ON(!hugepd_ok(hpd)); |
88 | return (pte_t *)(hpd.pd & ~HUGEPD_OK); | 76 | return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000); |
77 | } | ||
78 | |||
79 | static inline unsigned int hugepd_shift(hugepd_t hpd) | ||
80 | { | ||
81 | return hpd.pd & HUGEPD_SHIFT_MASK; | ||
89 | } | 82 | } |
90 | 83 | ||
91 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, | 84 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift) |
92 | struct hstate *hstate) | ||
93 | { | 85 | { |
94 | unsigned int shift = huge_page_shift(hstate); | 86 | unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); |
95 | int psize = shift_to_mmu_psize(shift); | ||
96 | unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1)); | ||
97 | pte_t *dir = hugepd_page(*hpdp); | 87 | pte_t *dir = hugepd_page(*hpdp); |
98 | 88 | ||
99 | return dir + idx; | 89 | return dir + idx; |
100 | } | 90 | } |
101 | 91 | ||
92 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) | ||
93 | { | ||
94 | pgd_t *pg; | ||
95 | pud_t *pu; | ||
96 | pmd_t *pm; | ||
97 | hugepd_t *hpdp = NULL; | ||
98 | unsigned pdshift = PGDIR_SHIFT; | ||
99 | |||
100 | if (shift) | ||
101 | *shift = 0; | ||
102 | |||
103 | pg = pgdir + pgd_index(ea); | ||
104 | if (is_hugepd(pg)) { | ||
105 | hpdp = (hugepd_t *)pg; | ||
106 | } else if (!pgd_none(*pg)) { | ||
107 | pdshift = PUD_SHIFT; | ||
108 | pu = pud_offset(pg, ea); | ||
109 | if (is_hugepd(pu)) | ||
110 | hpdp = (hugepd_t *)pu; | ||
111 | else if (!pud_none(*pu)) { | ||
112 | pdshift = PMD_SHIFT; | ||
113 | pm = pmd_offset(pu, ea); | ||
114 | if (is_hugepd(pm)) | ||
115 | hpdp = (hugepd_t *)pm; | ||
116 | else if (!pmd_none(*pm)) { | ||
117 | return pte_offset_map(pm, ea); | ||
118 | } | ||
119 | } | ||
120 | } | ||
121 | |||
122 | if (!hpdp) | ||
123 | return NULL; | ||
124 | |||
125 | if (shift) | ||
126 | *shift = hugepd_shift(*hpdp); | ||
127 | return hugepte_offset(hpdp, ea, pdshift); | ||
128 | } | ||
129 | |||
130 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
131 | { | ||
132 | return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); | ||
133 | } | ||
134 | |||
102 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | 135 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, |
103 | unsigned long address, unsigned int psize) | 136 | unsigned long address, unsigned pdshift, unsigned pshift) |
104 | { | 137 | { |
105 | pte_t *new = kmem_cache_zalloc(PGT_CACHE(hugepte_shift[psize]), | 138 | pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift), |
106 | GFP_KERNEL|__GFP_REPEAT); | 139 | GFP_KERNEL|__GFP_REPEAT); |
107 | 140 | ||
141 | BUG_ON(pshift > HUGEPD_SHIFT_MASK); | ||
142 | BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); | ||
143 | |||
108 | if (! new) | 144 | if (! new) |
109 | return -ENOMEM; | 145 | return -ENOMEM; |
110 | 146 | ||
111 | spin_lock(&mm->page_table_lock); | 147 | spin_lock(&mm->page_table_lock); |
112 | if (!hugepd_none(*hpdp)) | 148 | if (!hugepd_none(*hpdp)) |
113 | kmem_cache_free(PGT_CACHE(hugepte_shift[psize]), new); | 149 | kmem_cache_free(PGT_CACHE(pdshift - pshift), new); |
114 | else | 150 | else |
115 | hpdp->pd = (unsigned long)new | HUGEPD_OK; | 151 | hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift; |
116 | spin_unlock(&mm->page_table_lock); | 152 | spin_unlock(&mm->page_table_lock); |
117 | return 0; | 153 | return 0; |
118 | } | 154 | } |
119 | 155 | ||
120 | 156 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) | |
121 | static pud_t *hpud_offset(pgd_t *pgd, unsigned long addr, struct hstate *hstate) | ||
122 | { | 157 | { |
123 | if (huge_page_shift(hstate) < PUD_SHIFT) | 158 | pgd_t *pg; |
124 | return pud_offset(pgd, addr); | 159 | pud_t *pu; |
125 | else | 160 | pmd_t *pm; |
126 | return (pud_t *) pgd; | 161 | hugepd_t *hpdp = NULL; |
127 | } | 162 | unsigned pshift = __ffs(sz); |
128 | static pud_t *hpud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, | 163 | unsigned pdshift = PGDIR_SHIFT; |
129 | struct hstate *hstate) | 164 | |
130 | { | 165 | addr &= ~(sz-1); |
131 | if (huge_page_shift(hstate) < PUD_SHIFT) | 166 | |
132 | return pud_alloc(mm, pgd, addr); | 167 | pg = pgd_offset(mm, addr); |
133 | else | 168 | if (pshift >= PUD_SHIFT) { |
134 | return (pud_t *) pgd; | 169 | hpdp = (hugepd_t *)pg; |
135 | } | 170 | } else { |
136 | static pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate) | 171 | pdshift = PUD_SHIFT; |
137 | { | 172 | pu = pud_alloc(mm, pg, addr); |
138 | if (huge_page_shift(hstate) < PMD_SHIFT) | 173 | if (pshift >= PMD_SHIFT) { |
139 | return pmd_offset(pud, addr); | 174 | hpdp = (hugepd_t *)pu; |
140 | else | 175 | } else { |
141 | return (pmd_t *) pud; | 176 | pdshift = PMD_SHIFT; |
142 | } | 177 | pm = pmd_alloc(mm, pu, addr); |
143 | static pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr, | 178 | hpdp = (hugepd_t *)pm; |
144 | struct hstate *hstate) | 179 | } |
145 | { | 180 | } |
146 | if (huge_page_shift(hstate) < PMD_SHIFT) | 181 | |
147 | return pmd_alloc(mm, pud, addr); | 182 | if (!hpdp) |
148 | else | 183 | return NULL; |
149 | return (pmd_t *) pud; | 184 | |
185 | BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); | ||
186 | |||
187 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) | ||
188 | return NULL; | ||
189 | |||
190 | return hugepte_offset(hpdp, addr, pdshift); | ||
150 | } | 191 | } |
151 | 192 | ||
152 | /* Build list of addresses of gigantic pages. This function is used in early | 193 | /* Build list of addresses of gigantic pages. This function is used in early |
@@ -180,92 +221,38 @@ int alloc_bootmem_huge_page(struct hstate *hstate) | |||
180 | return 1; | 221 | return 1; |
181 | } | 222 | } |
182 | 223 | ||
183 | |||
184 | /* Modelled after find_linux_pte() */ | ||
185 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
186 | { | ||
187 | pgd_t *pg; | ||
188 | pud_t *pu; | ||
189 | pmd_t *pm; | ||
190 | |||
191 | unsigned int psize; | ||
192 | unsigned int shift; | ||
193 | unsigned long sz; | ||
194 | struct hstate *hstate; | ||
195 | psize = get_slice_psize(mm, addr); | ||
196 | shift = mmu_psize_to_shift(psize); | ||
197 | sz = ((1UL) << shift); | ||
198 | hstate = size_to_hstate(sz); | ||
199 | |||
200 | addr &= hstate->mask; | ||
201 | |||
202 | pg = pgd_offset(mm, addr); | ||
203 | if (!pgd_none(*pg)) { | ||
204 | pu = hpud_offset(pg, addr, hstate); | ||
205 | if (!pud_none(*pu)) { | ||
206 | pm = hpmd_offset(pu, addr, hstate); | ||
207 | if (!pmd_none(*pm)) | ||
208 | return hugepte_offset((hugepd_t *)pm, addr, | ||
209 | hstate); | ||
210 | } | ||
211 | } | ||
212 | |||
213 | return NULL; | ||
214 | } | ||
215 | |||
216 | pte_t *huge_pte_alloc(struct mm_struct *mm, | ||
217 | unsigned long addr, unsigned long sz) | ||
218 | { | ||
219 | pgd_t *pg; | ||
220 | pud_t *pu; | ||
221 | pmd_t *pm; | ||
222 | hugepd_t *hpdp = NULL; | ||
223 | struct hstate *hstate; | ||
224 | unsigned int psize; | ||
225 | hstate = size_to_hstate(sz); | ||
226 | |||
227 | psize = get_slice_psize(mm, addr); | ||
228 | BUG_ON(!mmu_huge_psizes[psize]); | ||
229 | |||
230 | addr &= hstate->mask; | ||
231 | |||
232 | pg = pgd_offset(mm, addr); | ||
233 | pu = hpud_alloc(mm, pg, addr, hstate); | ||
234 | |||
235 | if (pu) { | ||
236 | pm = hpmd_alloc(mm, pu, addr, hstate); | ||
237 | if (pm) | ||
238 | hpdp = (hugepd_t *)pm; | ||
239 | } | ||
240 | |||
241 | if (! hpdp) | ||
242 | return NULL; | ||
243 | |||
244 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize)) | ||
245 | return NULL; | ||
246 | |||
247 | return hugepte_offset(hpdp, addr, hstate); | ||
248 | } | ||
249 | |||
250 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | 224 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) |
251 | { | 225 | { |
252 | return 0; | 226 | return 0; |
253 | } | 227 | } |
254 | 228 | ||
255 | static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp, | 229 | static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, |
256 | unsigned int psize) | 230 | unsigned long start, unsigned long end, |
231 | unsigned long floor, unsigned long ceiling) | ||
257 | { | 232 | { |
258 | pte_t *hugepte = hugepd_page(*hpdp); | 233 | pte_t *hugepte = hugepd_page(*hpdp); |
234 | unsigned shift = hugepd_shift(*hpdp); | ||
235 | unsigned long pdmask = ~((1UL << pdshift) - 1); | ||
236 | |||
237 | start &= pdmask; | ||
238 | if (start < floor) | ||
239 | return; | ||
240 | if (ceiling) { | ||
241 | ceiling &= pdmask; | ||
242 | if (! ceiling) | ||
243 | return; | ||
244 | } | ||
245 | if (end - 1 > ceiling - 1) | ||
246 | return; | ||
259 | 247 | ||
260 | hpdp->pd = 0; | 248 | hpdp->pd = 0; |
261 | tlb->need_flush = 1; | 249 | tlb->need_flush = 1; |
262 | pgtable_free_tlb(tlb, hugepte, hugepte_shift[psize]); | 250 | pgtable_free_tlb(tlb, hugepte, pdshift - shift); |
263 | } | 251 | } |
264 | 252 | ||
265 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | 253 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
266 | unsigned long addr, unsigned long end, | 254 | unsigned long addr, unsigned long end, |
267 | unsigned long floor, unsigned long ceiling, | 255 | unsigned long floor, unsigned long ceiling) |
268 | unsigned int psize) | ||
269 | { | 256 | { |
270 | pmd_t *pmd; | 257 | pmd_t *pmd; |
271 | unsigned long next; | 258 | unsigned long next; |
@@ -277,7 +264,8 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | |||
277 | next = pmd_addr_end(addr, end); | 264 | next = pmd_addr_end(addr, end); |
278 | if (pmd_none(*pmd)) | 265 | if (pmd_none(*pmd)) |
279 | continue; | 266 | continue; |
280 | free_hugepte_range(tlb, (hugepd_t *)pmd, psize); | 267 | free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT, |
268 | addr, next, floor, ceiling); | ||
281 | } while (pmd++, addr = next, addr != end); | 269 | } while (pmd++, addr = next, addr != end); |
282 | 270 | ||
283 | start &= PUD_MASK; | 271 | start &= PUD_MASK; |
@@ -303,23 +291,19 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
303 | pud_t *pud; | 291 | pud_t *pud; |
304 | unsigned long next; | 292 | unsigned long next; |
305 | unsigned long start; | 293 | unsigned long start; |
306 | unsigned int shift; | ||
307 | unsigned int psize = get_slice_psize(tlb->mm, addr); | ||
308 | shift = mmu_psize_to_shift(psize); | ||
309 | 294 | ||
310 | start = addr; | 295 | start = addr; |
311 | pud = pud_offset(pgd, addr); | 296 | pud = pud_offset(pgd, addr); |
312 | do { | 297 | do { |
313 | next = pud_addr_end(addr, end); | 298 | next = pud_addr_end(addr, end); |
314 | if (shift < PMD_SHIFT) { | 299 | if (!is_hugepd(pud)) { |
315 | if (pud_none_or_clear_bad(pud)) | 300 | if (pud_none_or_clear_bad(pud)) |
316 | continue; | 301 | continue; |
317 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, | 302 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, |
318 | ceiling, psize); | 303 | ceiling); |
319 | } else { | 304 | } else { |
320 | if (pud_none(*pud)) | 305 | free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT, |
321 | continue; | 306 | addr, next, floor, ceiling); |
322 | free_hugepte_range(tlb, (hugepd_t *)pud, psize); | ||
323 | } | 307 | } |
324 | } while (pud++, addr = next, addr != end); | 308 | } while (pud++, addr = next, addr != end); |
325 | 309 | ||
@@ -350,74 +334,34 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, | |||
350 | { | 334 | { |
351 | pgd_t *pgd; | 335 | pgd_t *pgd; |
352 | unsigned long next; | 336 | unsigned long next; |
353 | unsigned long start; | ||
354 | 337 | ||
355 | /* | 338 | /* |
356 | * Comments below take from the normal free_pgd_range(). They | 339 | * Because there are a number of different possible pagetable |
357 | * apply here too. The tests against HUGEPD_MASK below are | 340 | * layouts for hugepage ranges, we limit knowledge of how |
358 | * essential, because we *don't* test for this at the bottom | 341 | * things should be laid out to the allocation path |
359 | * level. Without them we'll attempt to free a hugepte table | 342 | * (huge_pte_alloc(), above). Everything else works out the |
360 | * when we unmap just part of it, even if there are other | 343 | * structure as it goes from information in the hugepd |
361 | * active mappings using it. | 344 | * pointers. That means that we can't here use the |
362 | * | 345 | * optimization used in the normal page free_pgd_range(), of |
363 | * The next few lines have given us lots of grief... | 346 | * checking whether we're actually covering a large enough |
364 | * | 347 | * range to have to do anything at the top level of the walk |
365 | * Why are we testing HUGEPD* at this top level? Because | 348 | * instead of at the bottom. |
366 | * often there will be no work to do at all, and we'd prefer | ||
367 | * not to go all the way down to the bottom just to discover | ||
368 | * that. | ||
369 | * | ||
370 | * Why all these "- 1"s? Because 0 represents both the bottom | ||
371 | * of the address space and the top of it (using -1 for the | ||
372 | * top wouldn't help much: the masks would do the wrong thing). | ||
373 | * The rule is that addr 0 and floor 0 refer to the bottom of | ||
374 | * the address space, but end 0 and ceiling 0 refer to the top | ||
375 | * Comparisons need to use "end - 1" and "ceiling - 1" (though | ||
376 | * that end 0 case should be mythical). | ||
377 | * | ||
378 | * Wherever addr is brought up or ceiling brought down, we | ||
379 | * must be careful to reject "the opposite 0" before it | ||
380 | * confuses the subsequent tests. But what about where end is | ||
381 | * brought down by HUGEPD_SIZE below? no, end can't go down to | ||
382 | * 0 there. | ||
383 | * | 349 | * |
384 | * Whereas we round start (addr) and ceiling down, by different | 350 | * To make sense of this, you should probably go read the big |
385 | * masks at different levels, in order to test whether a table | 351 | * block comment at the top of the normal free_pgd_range(), |
386 | * now has no other vmas using it, so can be freed, we don't | 352 | * too. |
387 | * bother to round floor or end up - the tests don't need that. | ||
388 | */ | 353 | */ |
389 | unsigned int psize = get_slice_psize(tlb->mm, addr); | ||
390 | |||
391 | addr &= HUGEPD_MASK(psize); | ||
392 | if (addr < floor) { | ||
393 | addr += HUGEPD_SIZE(psize); | ||
394 | if (!addr) | ||
395 | return; | ||
396 | } | ||
397 | if (ceiling) { | ||
398 | ceiling &= HUGEPD_MASK(psize); | ||
399 | if (!ceiling) | ||
400 | return; | ||
401 | } | ||
402 | if (end - 1 > ceiling - 1) | ||
403 | end -= HUGEPD_SIZE(psize); | ||
404 | if (addr > end - 1) | ||
405 | return; | ||
406 | 354 | ||
407 | start = addr; | ||
408 | pgd = pgd_offset(tlb->mm, addr); | 355 | pgd = pgd_offset(tlb->mm, addr); |
409 | do { | 356 | do { |
410 | psize = get_slice_psize(tlb->mm, addr); | ||
411 | BUG_ON(!mmu_huge_psizes[psize]); | ||
412 | next = pgd_addr_end(addr, end); | 357 | next = pgd_addr_end(addr, end); |
413 | if (mmu_psize_to_shift(psize) < PUD_SHIFT) { | 358 | if (!is_hugepd(pgd)) { |
414 | if (pgd_none_or_clear_bad(pgd)) | 359 | if (pgd_none_or_clear_bad(pgd)) |
415 | continue; | 360 | continue; |
416 | hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); | 361 | hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); |
417 | } else { | 362 | } else { |
418 | if (pgd_none(*pgd)) | 363 | free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, |
419 | continue; | 364 | addr, next, floor, ceiling); |
420 | free_hugepte_range(tlb, (hugepd_t *)pgd, psize); | ||
421 | } | 365 | } |
422 | } while (pgd++, addr = next, addr != end); | 366 | } while (pgd++, addr = next, addr != end); |
423 | } | 367 | } |
@@ -448,19 +392,19 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | |||
448 | { | 392 | { |
449 | pte_t *ptep; | 393 | pte_t *ptep; |
450 | struct page *page; | 394 | struct page *page; |
451 | unsigned int mmu_psize = get_slice_psize(mm, address); | 395 | unsigned shift; |
396 | unsigned long mask; | ||
397 | |||
398 | ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); | ||
452 | 399 | ||
453 | /* Verify it is a huge page else bail. */ | 400 | /* Verify it is a huge page else bail. */ |
454 | if (!mmu_huge_psizes[mmu_psize]) | 401 | if (!ptep || !shift) |
455 | return ERR_PTR(-EINVAL); | 402 | return ERR_PTR(-EINVAL); |
456 | 403 | ||
457 | ptep = huge_pte_offset(mm, address); | 404 | mask = (1UL << shift) - 1; |
458 | page = pte_page(*ptep); | 405 | page = pte_page(*ptep); |
459 | if (page) { | 406 | if (page) |
460 | unsigned int shift = mmu_psize_to_shift(mmu_psize); | 407 | page += (address & mask) / PAGE_SIZE; |
461 | unsigned long sz = ((1UL) << shift); | ||
462 | page += (address % sz) / PAGE_SIZE; | ||
463 | } | ||
464 | 408 | ||
465 | return page; | 409 | return page; |
466 | } | 410 | } |
@@ -483,6 +427,73 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
483 | return NULL; | 427 | return NULL; |
484 | } | 428 | } |
485 | 429 | ||
430 | static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, | ||
431 | unsigned long end, int write, struct page **pages, int *nr) | ||
432 | { | ||
433 | unsigned long mask; | ||
434 | unsigned long pte_end; | ||
435 | struct page *head, *page; | ||
436 | pte_t pte; | ||
437 | int refs; | ||
438 | |||
439 | pte_end = (addr + sz) & ~(sz-1); | ||
440 | if (pte_end < end) | ||
441 | end = pte_end; | ||
442 | |||
443 | pte = *ptep; | ||
444 | mask = _PAGE_PRESENT | _PAGE_USER; | ||
445 | if (write) | ||
446 | mask |= _PAGE_RW; | ||
447 | |||
448 | if ((pte_val(pte) & mask) != mask) | ||
449 | return 0; | ||
450 | |||
451 | /* hugepages are never "special" */ | ||
452 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
453 | |||
454 | refs = 0; | ||
455 | head = pte_page(pte); | ||
456 | |||
457 | page = head + ((addr & (sz-1)) >> PAGE_SHIFT); | ||
458 | do { | ||
459 | VM_BUG_ON(compound_head(page) != head); | ||
460 | pages[*nr] = page; | ||
461 | (*nr)++; | ||
462 | page++; | ||
463 | refs++; | ||
464 | } while (addr += PAGE_SIZE, addr != end); | ||
465 | |||
466 | if (!page_cache_add_speculative(head, refs)) { | ||
467 | *nr -= refs; | ||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | if (unlikely(pte_val(pte) != pte_val(*ptep))) { | ||
472 | /* Could be optimized better */ | ||
473 | while (*nr) { | ||
474 | put_page(page); | ||
475 | (*nr)--; | ||
476 | } | ||
477 | } | ||
478 | |||
479 | return 1; | ||
480 | } | ||
481 | |||
482 | int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, | ||
483 | unsigned long addr, unsigned long end, | ||
484 | int write, struct page **pages, int *nr) | ||
485 | { | ||
486 | pte_t *ptep; | ||
487 | unsigned long sz = 1UL << hugepd_shift(*hugepd); | ||
488 | |||
489 | ptep = hugepte_offset(hugepd, addr, pdshift); | ||
490 | do { | ||
491 | if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) | ||
492 | return 0; | ||
493 | } while (ptep++, addr += sz, addr != end); | ||
494 | |||
495 | return 1; | ||
496 | } | ||
486 | 497 | ||
487 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | 498 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, |
488 | unsigned long len, unsigned long pgoff, | 499 | unsigned long len, unsigned long pgoff, |
@@ -530,34 +541,20 @@ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, | |||
530 | return rflags; | 541 | return rflags; |
531 | } | 542 | } |
532 | 543 | ||
533 | int hash_huge_page(struct mm_struct *mm, unsigned long access, | 544 | int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, |
534 | unsigned long ea, unsigned long vsid, int local, | 545 | pte_t *ptep, unsigned long trap, int local, int ssize, |
535 | unsigned long trap) | 546 | unsigned int shift, unsigned int mmu_psize) |
536 | { | 547 | { |
537 | pte_t *ptep; | ||
538 | unsigned long old_pte, new_pte; | 548 | unsigned long old_pte, new_pte; |
539 | unsigned long va, rflags, pa, sz; | 549 | unsigned long va, rflags, pa, sz; |
540 | long slot; | 550 | long slot; |
541 | int err = 1; | 551 | int err = 1; |
542 | int ssize = user_segment_size(ea); | ||
543 | unsigned int mmu_psize; | ||
544 | int shift; | ||
545 | mmu_psize = get_slice_psize(mm, ea); | ||
546 | 552 | ||
547 | if (!mmu_huge_psizes[mmu_psize]) | 553 | BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); |
548 | goto out; | ||
549 | ptep = huge_pte_offset(mm, ea); | ||
550 | 554 | ||
551 | /* Search the Linux page table for a match with va */ | 555 | /* Search the Linux page table for a match with va */ |
552 | va = hpt_va(ea, vsid, ssize); | 556 | va = hpt_va(ea, vsid, ssize); |
553 | 557 | ||
554 | /* | ||
555 | * If no pte found or not present, send the problem up to | ||
556 | * do_page_fault | ||
557 | */ | ||
558 | if (unlikely(!ptep || pte_none(*ptep))) | ||
559 | goto out; | ||
560 | |||
561 | /* | 558 | /* |
562 | * Check the user's access rights to the page. If access should be | 559 | * Check the user's access rights to the page. If access should be |
563 | * prevented then send the problem up to do_page_fault. | 560 | * prevented then send the problem up to do_page_fault. |
@@ -588,7 +585,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, | |||
588 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); | 585 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); |
589 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ | 586 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ |
590 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); | 587 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); |
591 | shift = mmu_psize_to_shift(mmu_psize); | ||
592 | sz = ((1UL) << shift); | 588 | sz = ((1UL) << shift); |
593 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | 589 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) |
594 | /* No CPU has hugepages but lacks no execute, so we | 590 | /* No CPU has hugepages but lacks no execute, so we |
@@ -672,6 +668,8 @@ repeat: | |||
672 | 668 | ||
673 | static void __init set_huge_psize(int psize) | 669 | static void __init set_huge_psize(int psize) |
674 | { | 670 | { |
671 | unsigned pdshift; | ||
672 | |||
675 | /* Check that it is a page size supported by the hardware and | 673 | /* Check that it is a page size supported by the hardware and |
676 | * that it fits within pagetable limits. */ | 674 | * that it fits within pagetable limits. */ |
677 | if (mmu_psize_defs[psize].shift && | 675 | if (mmu_psize_defs[psize].shift && |
@@ -686,29 +684,14 @@ static void __init set_huge_psize(int psize) | |||
686 | return; | 684 | return; |
687 | hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); | 685 | hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); |
688 | 686 | ||
689 | switch (mmu_psize_defs[psize].shift) { | 687 | if (mmu_psize_defs[psize].shift < PMD_SHIFT) |
690 | case PAGE_SHIFT_64K: | 688 | pdshift = PMD_SHIFT; |
691 | /* We only allow 64k hpages with 4k base page, | 689 | else if (mmu_psize_defs[psize].shift < PUD_SHIFT) |
692 | * which was checked above, and always put them | 690 | pdshift = PUD_SHIFT; |
693 | * at the PMD */ | 691 | else |
694 | hugepte_shift[psize] = PMD_SHIFT; | 692 | pdshift = PGDIR_SHIFT; |
695 | break; | 693 | mmu_huge_psizes[psize] = pdshift - mmu_psize_defs[psize].shift; |
696 | case PAGE_SHIFT_16M: | 694 | } |
697 | /* 16M pages can be at two different levels | ||
698 | * of pagestables based on base page size */ | ||
699 | if (PAGE_SHIFT == PAGE_SHIFT_64K) | ||
700 | hugepte_shift[psize] = PMD_SHIFT; | ||
701 | else /* 4k base page */ | ||
702 | hugepte_shift[psize] = PUD_SHIFT; | ||
703 | break; | ||
704 | case PAGE_SHIFT_16G: | ||
705 | /* 16G pages are always at PGD level */ | ||
706 | hugepte_shift[psize] = PGDIR_SHIFT; | ||
707 | break; | ||
708 | } | ||
709 | hugepte_shift[psize] -= mmu_psize_defs[psize].shift; | ||
710 | } else | ||
711 | hugepte_shift[psize] = 0; | ||
712 | } | 695 | } |
713 | 696 | ||
714 | static int __init hugepage_setup_sz(char *str) | 697 | static int __init hugepage_setup_sz(char *str) |
@@ -732,7 +715,7 @@ __setup("hugepagesz=", hugepage_setup_sz); | |||
732 | 715 | ||
733 | static int __init hugetlbpage_init(void) | 716 | static int __init hugetlbpage_init(void) |
734 | { | 717 | { |
735 | unsigned int psize; | 718 | int psize; |
736 | 719 | ||
737 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | 720 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) |
738 | return -ENODEV; | 721 | return -ENODEV; |
@@ -753,8 +736,8 @@ static int __init hugetlbpage_init(void) | |||
753 | 736 | ||
754 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { | 737 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { |
755 | if (mmu_huge_psizes[psize]) { | 738 | if (mmu_huge_psizes[psize]) { |
756 | pgtable_cache_add(hugepte_shift[psize], NULL); | 739 | pgtable_cache_add(mmu_huge_psizes[psize], NULL); |
757 | if (!PGT_CACHE(hugepte_shift[psize])) | 740 | if (!PGT_CACHE(mmu_huge_psizes[psize])) |
758 | panic("hugetlbpage_init(): could not create " | 741 | panic("hugetlbpage_init(): could not create " |
759 | "pgtable cache for %d bit pagesize\n", | 742 | "pgtable cache for %d bit pagesize\n", |
760 | mmu_psize_to_shift(psize)); | 743 | mmu_psize_to_shift(psize)); |