diff options
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 745 |
1 files changed, 745 insertions, 0 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c new file mode 100644 index 000000000000..0ea0994ed974 --- /dev/null +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -0,0 +1,745 @@ | |||
1 | /* | ||
2 | * PPC64 (POWER4) Huge TLB Page Support for Kernel. | ||
3 | * | ||
4 | * Copyright (C) 2003 David Gibson, IBM Corporation. | ||
5 | * | ||
6 | * Based on the IA-32 version: | ||
7 | * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> | ||
8 | */ | ||
9 | |||
10 | #include <linux/init.h> | ||
11 | #include <linux/fs.h> | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/hugetlb.h> | ||
14 | #include <linux/pagemap.h> | ||
15 | #include <linux/smp_lock.h> | ||
16 | #include <linux/slab.h> | ||
17 | #include <linux/err.h> | ||
18 | #include <linux/sysctl.h> | ||
19 | #include <asm/mman.h> | ||
20 | #include <asm/pgalloc.h> | ||
21 | #include <asm/tlb.h> | ||
22 | #include <asm/tlbflush.h> | ||
23 | #include <asm/mmu_context.h> | ||
24 | #include <asm/machdep.h> | ||
25 | #include <asm/cputable.h> | ||
26 | #include <asm/tlb.h> | ||
27 | |||
28 | #include <linux/sysctl.h> | ||
29 | |||
30 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) | ||
31 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) | ||
32 | |||
33 | /* Modelled after find_linux_pte() */ | ||
34 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
35 | { | ||
36 | pgd_t *pg; | ||
37 | pud_t *pu; | ||
38 | pmd_t *pm; | ||
39 | pte_t *pt; | ||
40 | |||
41 | BUG_ON(! in_hugepage_area(mm->context, addr)); | ||
42 | |||
43 | addr &= HPAGE_MASK; | ||
44 | |||
45 | pg = pgd_offset(mm, addr); | ||
46 | if (!pgd_none(*pg)) { | ||
47 | pu = pud_offset(pg, addr); | ||
48 | if (!pud_none(*pu)) { | ||
49 | pm = pmd_offset(pu, addr); | ||
50 | pt = (pte_t *)pm; | ||
51 | BUG_ON(!pmd_none(*pm) | ||
52 | && !(pte_present(*pt) && pte_huge(*pt))); | ||
53 | return pt; | ||
54 | } | ||
55 | } | ||
56 | |||
57 | return NULL; | ||
58 | } | ||
59 | |||
60 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | ||
61 | { | ||
62 | pgd_t *pg; | ||
63 | pud_t *pu; | ||
64 | pmd_t *pm; | ||
65 | pte_t *pt; | ||
66 | |||
67 | BUG_ON(! in_hugepage_area(mm->context, addr)); | ||
68 | |||
69 | addr &= HPAGE_MASK; | ||
70 | |||
71 | pg = pgd_offset(mm, addr); | ||
72 | pu = pud_alloc(mm, pg, addr); | ||
73 | |||
74 | if (pu) { | ||
75 | pm = pmd_alloc(mm, pu, addr); | ||
76 | if (pm) { | ||
77 | pt = (pte_t *)pm; | ||
78 | BUG_ON(!pmd_none(*pm) | ||
79 | && !(pte_present(*pt) && pte_huge(*pt))); | ||
80 | return pt; | ||
81 | } | ||
82 | } | ||
83 | |||
84 | return NULL; | ||
85 | } | ||
86 | |||
87 | #define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE) | ||
88 | |||
89 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | ||
90 | pte_t *ptep, pte_t pte) | ||
91 | { | ||
92 | int i; | ||
93 | |||
94 | if (pte_present(*ptep)) { | ||
95 | pte_clear(mm, addr, ptep); | ||
96 | flush_tlb_pending(); | ||
97 | } | ||
98 | |||
99 | for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) { | ||
100 | *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); | ||
101 | ptep++; | ||
102 | } | ||
103 | } | ||
104 | |||
105 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | ||
106 | pte_t *ptep) | ||
107 | { | ||
108 | unsigned long old = pte_update(ptep, ~0UL); | ||
109 | int i; | ||
110 | |||
111 | if (old & _PAGE_HASHPTE) | ||
112 | hpte_update(mm, addr, old, 0); | ||
113 | |||
114 | for (i = 1; i < HUGEPTE_BATCH_SIZE; i++) | ||
115 | ptep[i] = __pte(0); | ||
116 | |||
117 | return __pte(old); | ||
118 | } | ||
119 | |||
120 | /* | ||
121 | * This function checks for proper alignment of input addr and len parameters. | ||
122 | */ | ||
123 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | ||
124 | { | ||
125 | if (len & ~HPAGE_MASK) | ||
126 | return -EINVAL; | ||
127 | if (addr & ~HPAGE_MASK) | ||
128 | return -EINVAL; | ||
129 | if (! (within_hugepage_low_range(addr, len) | ||
130 | || within_hugepage_high_range(addr, len)) ) | ||
131 | return -EINVAL; | ||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | static void flush_low_segments(void *parm) | ||
136 | { | ||
137 | u16 areas = (unsigned long) parm; | ||
138 | unsigned long i; | ||
139 | |||
140 | asm volatile("isync" : : : "memory"); | ||
141 | |||
142 | BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS); | ||
143 | |||
144 | for (i = 0; i < NUM_LOW_AREAS; i++) { | ||
145 | if (! (areas & (1U << i))) | ||
146 | continue; | ||
147 | asm volatile("slbie %0" | ||
148 | : : "r" ((i << SID_SHIFT) | SLBIE_C)); | ||
149 | } | ||
150 | |||
151 | asm volatile("isync" : : : "memory"); | ||
152 | } | ||
153 | |||
154 | static void flush_high_segments(void *parm) | ||
155 | { | ||
156 | u16 areas = (unsigned long) parm; | ||
157 | unsigned long i, j; | ||
158 | |||
159 | asm volatile("isync" : : : "memory"); | ||
160 | |||
161 | BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS); | ||
162 | |||
163 | for (i = 0; i < NUM_HIGH_AREAS; i++) { | ||
164 | if (! (areas & (1U << i))) | ||
165 | continue; | ||
166 | for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) | ||
167 | asm volatile("slbie %0" | ||
168 | :: "r" (((i << HTLB_AREA_SHIFT) | ||
169 | + (j << SID_SHIFT)) | SLBIE_C)); | ||
170 | } | ||
171 | |||
172 | asm volatile("isync" : : : "memory"); | ||
173 | } | ||
174 | |||
175 | static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) | ||
176 | { | ||
177 | unsigned long start = area << SID_SHIFT; | ||
178 | unsigned long end = (area+1) << SID_SHIFT; | ||
179 | struct vm_area_struct *vma; | ||
180 | |||
181 | BUG_ON(area >= NUM_LOW_AREAS); | ||
182 | |||
183 | /* Check no VMAs are in the region */ | ||
184 | vma = find_vma(mm, start); | ||
185 | if (vma && (vma->vm_start < end)) | ||
186 | return -EBUSY; | ||
187 | |||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) | ||
192 | { | ||
193 | unsigned long start = area << HTLB_AREA_SHIFT; | ||
194 | unsigned long end = (area+1) << HTLB_AREA_SHIFT; | ||
195 | struct vm_area_struct *vma; | ||
196 | |||
197 | BUG_ON(area >= NUM_HIGH_AREAS); | ||
198 | |||
199 | /* Check no VMAs are in the region */ | ||
200 | vma = find_vma(mm, start); | ||
201 | if (vma && (vma->vm_start < end)) | ||
202 | return -EBUSY; | ||
203 | |||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) | ||
208 | { | ||
209 | unsigned long i; | ||
210 | |||
211 | BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); | ||
212 | BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); | ||
213 | |||
214 | newareas &= ~(mm->context.low_htlb_areas); | ||
215 | if (! newareas) | ||
216 | return 0; /* The segments we want are already open */ | ||
217 | |||
218 | for (i = 0; i < NUM_LOW_AREAS; i++) | ||
219 | if ((1 << i) & newareas) | ||
220 | if (prepare_low_area_for_htlb(mm, i) != 0) | ||
221 | return -EBUSY; | ||
222 | |||
223 | mm->context.low_htlb_areas |= newareas; | ||
224 | |||
225 | /* update the paca copy of the context struct */ | ||
226 | get_paca()->context = mm->context; | ||
227 | |||
228 | /* the context change must make it to memory before the flush, | ||
229 | * so that further SLB misses do the right thing. */ | ||
230 | mb(); | ||
231 | on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1); | ||
232 | |||
233 | return 0; | ||
234 | } | ||
235 | |||
236 | static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) | ||
237 | { | ||
238 | unsigned long i; | ||
239 | |||
240 | BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); | ||
241 | BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) | ||
242 | != NUM_HIGH_AREAS); | ||
243 | |||
244 | newareas &= ~(mm->context.high_htlb_areas); | ||
245 | if (! newareas) | ||
246 | return 0; /* The areas we want are already open */ | ||
247 | |||
248 | for (i = 0; i < NUM_HIGH_AREAS; i++) | ||
249 | if ((1 << i) & newareas) | ||
250 | if (prepare_high_area_for_htlb(mm, i) != 0) | ||
251 | return -EBUSY; | ||
252 | |||
253 | mm->context.high_htlb_areas |= newareas; | ||
254 | |||
255 | /* update the paca copy of the context struct */ | ||
256 | get_paca()->context = mm->context; | ||
257 | |||
258 | /* the context change must make it to memory before the flush, | ||
259 | * so that further SLB misses do the right thing. */ | ||
260 | mb(); | ||
261 | on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1); | ||
262 | |||
263 | return 0; | ||
264 | } | ||
265 | |||
266 | int prepare_hugepage_range(unsigned long addr, unsigned long len) | ||
267 | { | ||
268 | int err; | ||
269 | |||
270 | if ( (addr+len) < addr ) | ||
271 | return -EINVAL; | ||
272 | |||
273 | if ((addr + len) < 0x100000000UL) | ||
274 | err = open_low_hpage_areas(current->mm, | ||
275 | LOW_ESID_MASK(addr, len)); | ||
276 | else | ||
277 | err = open_high_hpage_areas(current->mm, | ||
278 | HTLB_AREA_MASK(addr, len)); | ||
279 | if (err) { | ||
280 | printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" | ||
281 | " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", | ||
282 | addr, len, | ||
283 | LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); | ||
284 | return err; | ||
285 | } | ||
286 | |||
287 | return 0; | ||
288 | } | ||
289 | |||
290 | struct page * | ||
291 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | ||
292 | { | ||
293 | pte_t *ptep; | ||
294 | struct page *page; | ||
295 | |||
296 | if (! in_hugepage_area(mm->context, address)) | ||
297 | return ERR_PTR(-EINVAL); | ||
298 | |||
299 | ptep = huge_pte_offset(mm, address); | ||
300 | page = pte_page(*ptep); | ||
301 | if (page) | ||
302 | page += (address % HPAGE_SIZE) / PAGE_SIZE; | ||
303 | |||
304 | return page; | ||
305 | } | ||
306 | |||
307 | int pmd_huge(pmd_t pmd) | ||
308 | { | ||
309 | return 0; | ||
310 | } | ||
311 | |||
312 | struct page * | ||
313 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | ||
314 | pmd_t *pmd, int write) | ||
315 | { | ||
316 | BUG(); | ||
317 | return NULL; | ||
318 | } | ||
319 | |||
320 | /* Because we have an exclusive hugepage region which lies within the | ||
321 | * normal user address space, we have to take special measures to make | ||
322 | * non-huge mmap()s evade the hugepage reserved regions. */ | ||
323 | unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, | ||
324 | unsigned long len, unsigned long pgoff, | ||
325 | unsigned long flags) | ||
326 | { | ||
327 | struct mm_struct *mm = current->mm; | ||
328 | struct vm_area_struct *vma; | ||
329 | unsigned long start_addr; | ||
330 | |||
331 | if (len > TASK_SIZE) | ||
332 | return -ENOMEM; | ||
333 | |||
334 | if (addr) { | ||
335 | addr = PAGE_ALIGN(addr); | ||
336 | vma = find_vma(mm, addr); | ||
337 | if (((TASK_SIZE - len) >= addr) | ||
338 | && (!vma || (addr+len) <= vma->vm_start) | ||
339 | && !is_hugepage_only_range(mm, addr,len)) | ||
340 | return addr; | ||
341 | } | ||
342 | if (len > mm->cached_hole_size) { | ||
343 | start_addr = addr = mm->free_area_cache; | ||
344 | } else { | ||
345 | start_addr = addr = TASK_UNMAPPED_BASE; | ||
346 | mm->cached_hole_size = 0; | ||
347 | } | ||
348 | |||
349 | full_search: | ||
350 | vma = find_vma(mm, addr); | ||
351 | while (TASK_SIZE - len >= addr) { | ||
352 | BUG_ON(vma && (addr >= vma->vm_end)); | ||
353 | |||
354 | if (touches_hugepage_low_range(mm, addr, len)) { | ||
355 | addr = ALIGN(addr+1, 1<<SID_SHIFT); | ||
356 | vma = find_vma(mm, addr); | ||
357 | continue; | ||
358 | } | ||
359 | if (touches_hugepage_high_range(mm, addr, len)) { | ||
360 | addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); | ||
361 | vma = find_vma(mm, addr); | ||
362 | continue; | ||
363 | } | ||
364 | if (!vma || addr + len <= vma->vm_start) { | ||
365 | /* | ||
366 | * Remember the place where we stopped the search: | ||
367 | */ | ||
368 | mm->free_area_cache = addr + len; | ||
369 | return addr; | ||
370 | } | ||
371 | if (addr + mm->cached_hole_size < vma->vm_start) | ||
372 | mm->cached_hole_size = vma->vm_start - addr; | ||
373 | addr = vma->vm_end; | ||
374 | vma = vma->vm_next; | ||
375 | } | ||
376 | |||
377 | /* Make sure we didn't miss any holes */ | ||
378 | if (start_addr != TASK_UNMAPPED_BASE) { | ||
379 | start_addr = addr = TASK_UNMAPPED_BASE; | ||
380 | mm->cached_hole_size = 0; | ||
381 | goto full_search; | ||
382 | } | ||
383 | return -ENOMEM; | ||
384 | } | ||
385 | |||
386 | /* | ||
387 | * This mmap-allocator allocates new areas top-down from below the | ||
388 | * stack's low limit (the base): | ||
389 | * | ||
390 | * Because we have an exclusive hugepage region which lies within the | ||
391 | * normal user address space, we have to take special measures to make | ||
392 | * non-huge mmap()s evade the hugepage reserved regions. | ||
393 | */ | ||
394 | unsigned long | ||
395 | arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | ||
396 | const unsigned long len, const unsigned long pgoff, | ||
397 | const unsigned long flags) | ||
398 | { | ||
399 | struct vm_area_struct *vma, *prev_vma; | ||
400 | struct mm_struct *mm = current->mm; | ||
401 | unsigned long base = mm->mmap_base, addr = addr0; | ||
402 | unsigned long largest_hole = mm->cached_hole_size; | ||
403 | int first_time = 1; | ||
404 | |||
405 | /* requested length too big for entire address space */ | ||
406 | if (len > TASK_SIZE) | ||
407 | return -ENOMEM; | ||
408 | |||
409 | /* dont allow allocations above current base */ | ||
410 | if (mm->free_area_cache > base) | ||
411 | mm->free_area_cache = base; | ||
412 | |||
413 | /* requesting a specific address */ | ||
414 | if (addr) { | ||
415 | addr = PAGE_ALIGN(addr); | ||
416 | vma = find_vma(mm, addr); | ||
417 | if (TASK_SIZE - len >= addr && | ||
418 | (!vma || addr + len <= vma->vm_start) | ||
419 | && !is_hugepage_only_range(mm, addr,len)) | ||
420 | return addr; | ||
421 | } | ||
422 | |||
423 | if (len <= largest_hole) { | ||
424 | largest_hole = 0; | ||
425 | mm->free_area_cache = base; | ||
426 | } | ||
427 | try_again: | ||
428 | /* make sure it can fit in the remaining address space */ | ||
429 | if (mm->free_area_cache < len) | ||
430 | goto fail; | ||
431 | |||
432 | /* either no address requested or cant fit in requested address hole */ | ||
433 | addr = (mm->free_area_cache - len) & PAGE_MASK; | ||
434 | do { | ||
435 | hugepage_recheck: | ||
436 | if (touches_hugepage_low_range(mm, addr, len)) { | ||
437 | addr = (addr & ((~0) << SID_SHIFT)) - len; | ||
438 | goto hugepage_recheck; | ||
439 | } else if (touches_hugepage_high_range(mm, addr, len)) { | ||
440 | addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; | ||
441 | goto hugepage_recheck; | ||
442 | } | ||
443 | |||
444 | /* | ||
445 | * Lookup failure means no vma is above this address, | ||
446 | * i.e. return with success: | ||
447 | */ | ||
448 | if (!(vma = find_vma_prev(mm, addr, &prev_vma))) | ||
449 | return addr; | ||
450 | |||
451 | /* | ||
452 | * new region fits between prev_vma->vm_end and | ||
453 | * vma->vm_start, use it: | ||
454 | */ | ||
455 | if (addr+len <= vma->vm_start && | ||
456 | (!prev_vma || (addr >= prev_vma->vm_end))) { | ||
457 | /* remember the address as a hint for next time */ | ||
458 | mm->cached_hole_size = largest_hole; | ||
459 | return (mm->free_area_cache = addr); | ||
460 | } else { | ||
461 | /* pull free_area_cache down to the first hole */ | ||
462 | if (mm->free_area_cache == vma->vm_end) { | ||
463 | mm->free_area_cache = vma->vm_start; | ||
464 | mm->cached_hole_size = largest_hole; | ||
465 | } | ||
466 | } | ||
467 | |||
468 | /* remember the largest hole we saw so far */ | ||
469 | if (addr + largest_hole < vma->vm_start) | ||
470 | largest_hole = vma->vm_start - addr; | ||
471 | |||
472 | /* try just below the current vma->vm_start */ | ||
473 | addr = vma->vm_start-len; | ||
474 | } while (len <= vma->vm_start); | ||
475 | |||
476 | fail: | ||
477 | /* | ||
478 | * if hint left us with no space for the requested | ||
479 | * mapping then try again: | ||
480 | */ | ||
481 | if (first_time) { | ||
482 | mm->free_area_cache = base; | ||
483 | largest_hole = 0; | ||
484 | first_time = 0; | ||
485 | goto try_again; | ||
486 | } | ||
487 | /* | ||
488 | * A failed mmap() very likely causes application failure, | ||
489 | * so fall back to the bottom-up function here. This scenario | ||
490 | * can happen with large stack limits and large mmap() | ||
491 | * allocations. | ||
492 | */ | ||
493 | mm->free_area_cache = TASK_UNMAPPED_BASE; | ||
494 | mm->cached_hole_size = ~0UL; | ||
495 | addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); | ||
496 | /* | ||
497 | * Restore the topdown base: | ||
498 | */ | ||
499 | mm->free_area_cache = base; | ||
500 | mm->cached_hole_size = ~0UL; | ||
501 | |||
502 | return addr; | ||
503 | } | ||
504 | |||
505 | static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) | ||
506 | { | ||
507 | unsigned long addr = 0; | ||
508 | struct vm_area_struct *vma; | ||
509 | |||
510 | vma = find_vma(current->mm, addr); | ||
511 | while (addr + len <= 0x100000000UL) { | ||
512 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ | ||
513 | |||
514 | if (! __within_hugepage_low_range(addr, len, segmask)) { | ||
515 | addr = ALIGN(addr+1, 1<<SID_SHIFT); | ||
516 | vma = find_vma(current->mm, addr); | ||
517 | continue; | ||
518 | } | ||
519 | |||
520 | if (!vma || (addr + len) <= vma->vm_start) | ||
521 | return addr; | ||
522 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | ||
523 | /* Depending on segmask this might not be a confirmed | ||
524 | * hugepage region, so the ALIGN could have skipped | ||
525 | * some VMAs */ | ||
526 | vma = find_vma(current->mm, addr); | ||
527 | } | ||
528 | |||
529 | return -ENOMEM; | ||
530 | } | ||
531 | |||
532 | static unsigned long htlb_get_high_area(unsigned long len, u16 areamask) | ||
533 | { | ||
534 | unsigned long addr = 0x100000000UL; | ||
535 | struct vm_area_struct *vma; | ||
536 | |||
537 | vma = find_vma(current->mm, addr); | ||
538 | while (addr + len <= TASK_SIZE_USER64) { | ||
539 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ | ||
540 | |||
541 | if (! __within_hugepage_high_range(addr, len, areamask)) { | ||
542 | addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); | ||
543 | vma = find_vma(current->mm, addr); | ||
544 | continue; | ||
545 | } | ||
546 | |||
547 | if (!vma || (addr + len) <= vma->vm_start) | ||
548 | return addr; | ||
549 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | ||
550 | /* Depending on segmask this might not be a confirmed | ||
551 | * hugepage region, so the ALIGN could have skipped | ||
552 | * some VMAs */ | ||
553 | vma = find_vma(current->mm, addr); | ||
554 | } | ||
555 | |||
556 | return -ENOMEM; | ||
557 | } | ||
558 | |||
559 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | ||
560 | unsigned long len, unsigned long pgoff, | ||
561 | unsigned long flags) | ||
562 | { | ||
563 | int lastshift; | ||
564 | u16 areamask, curareas; | ||
565 | |||
566 | if (len & ~HPAGE_MASK) | ||
567 | return -EINVAL; | ||
568 | |||
569 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | ||
570 | return -EINVAL; | ||
571 | |||
572 | if (test_thread_flag(TIF_32BIT)) { | ||
573 | curareas = current->mm->context.low_htlb_areas; | ||
574 | |||
575 | /* First see if we can do the mapping in the existing | ||
576 | * low areas */ | ||
577 | addr = htlb_get_low_area(len, curareas); | ||
578 | if (addr != -ENOMEM) | ||
579 | return addr; | ||
580 | |||
581 | lastshift = 0; | ||
582 | for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); | ||
583 | ! lastshift; areamask >>=1) { | ||
584 | if (areamask & 1) | ||
585 | lastshift = 1; | ||
586 | |||
587 | addr = htlb_get_low_area(len, curareas | areamask); | ||
588 | if ((addr != -ENOMEM) | ||
589 | && open_low_hpage_areas(current->mm, areamask) == 0) | ||
590 | return addr; | ||
591 | } | ||
592 | } else { | ||
593 | curareas = current->mm->context.high_htlb_areas; | ||
594 | |||
595 | /* First see if we can do the mapping in the existing | ||
596 | * high areas */ | ||
597 | addr = htlb_get_high_area(len, curareas); | ||
598 | if (addr != -ENOMEM) | ||
599 | return addr; | ||
600 | |||
601 | lastshift = 0; | ||
602 | for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); | ||
603 | ! lastshift; areamask >>=1) { | ||
604 | if (areamask & 1) | ||
605 | lastshift = 1; | ||
606 | |||
607 | addr = htlb_get_high_area(len, curareas | areamask); | ||
608 | if ((addr != -ENOMEM) | ||
609 | && open_high_hpage_areas(current->mm, areamask) == 0) | ||
610 | return addr; | ||
611 | } | ||
612 | } | ||
613 | printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" | ||
614 | " enough areas\n"); | ||
615 | return -ENOMEM; | ||
616 | } | ||
617 | |||
618 | int hash_huge_page(struct mm_struct *mm, unsigned long access, | ||
619 | unsigned long ea, unsigned long vsid, int local) | ||
620 | { | ||
621 | pte_t *ptep; | ||
622 | unsigned long va, vpn; | ||
623 | pte_t old_pte, new_pte; | ||
624 | unsigned long rflags, prpn; | ||
625 | long slot; | ||
626 | int err = 1; | ||
627 | |||
628 | spin_lock(&mm->page_table_lock); | ||
629 | |||
630 | ptep = huge_pte_offset(mm, ea); | ||
631 | |||
632 | /* Search the Linux page table for a match with va */ | ||
633 | va = (vsid << 28) | (ea & 0x0fffffff); | ||
634 | vpn = va >> HPAGE_SHIFT; | ||
635 | |||
636 | /* | ||
637 | * If no pte found or not present, send the problem up to | ||
638 | * do_page_fault | ||
639 | */ | ||
640 | if (unlikely(!ptep || pte_none(*ptep))) | ||
641 | goto out; | ||
642 | |||
643 | /* BUG_ON(pte_bad(*ptep)); */ | ||
644 | |||
645 | /* | ||
646 | * Check the user's access rights to the page. If access should be | ||
647 | * prevented then send the problem up to do_page_fault. | ||
648 | */ | ||
649 | if (unlikely(access & ~pte_val(*ptep))) | ||
650 | goto out; | ||
651 | /* | ||
652 | * At this point, we have a pte (old_pte) which can be used to build | ||
653 | * or update an HPTE. There are 2 cases: | ||
654 | * | ||
655 | * 1. There is a valid (present) pte with no associated HPTE (this is | ||
656 | * the most common case) | ||
657 | * 2. There is a valid (present) pte with an associated HPTE. The | ||
658 | * current values of the pp bits in the HPTE prevent access | ||
659 | * because we are doing software DIRTY bit management and the | ||
660 | * page is currently not DIRTY. | ||
661 | */ | ||
662 | |||
663 | |||
664 | old_pte = *ptep; | ||
665 | new_pte = old_pte; | ||
666 | |||
667 | rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); | ||
668 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ | ||
669 | rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); | ||
670 | |||
671 | /* Check if pte already has an hpte (case 2) */ | ||
672 | if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) { | ||
673 | /* There MIGHT be an HPTE for this pte */ | ||
674 | unsigned long hash, slot; | ||
675 | |||
676 | hash = hpt_hash(vpn, 1); | ||
677 | if (pte_val(old_pte) & _PAGE_SECONDARY) | ||
678 | hash = ~hash; | ||
679 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | ||
680 | slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; | ||
681 | |||
682 | if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) | ||
683 | pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; | ||
684 | } | ||
685 | |||
686 | if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) { | ||
687 | unsigned long hash = hpt_hash(vpn, 1); | ||
688 | unsigned long hpte_group; | ||
689 | |||
690 | prpn = pte_pfn(old_pte); | ||
691 | |||
692 | repeat: | ||
693 | hpte_group = ((hash & htab_hash_mask) * | ||
694 | HPTES_PER_GROUP) & ~0x7UL; | ||
695 | |||
696 | /* Update the linux pte with the HPTE slot */ | ||
697 | pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; | ||
698 | pte_val(new_pte) |= _PAGE_HASHPTE; | ||
699 | |||
700 | /* Add in WIMG bits */ | ||
701 | /* XXX We should store these in the pte */ | ||
702 | rflags |= _PAGE_COHERENT; | ||
703 | |||
704 | slot = ppc_md.hpte_insert(hpte_group, va, prpn, | ||
705 | HPTE_V_LARGE, rflags); | ||
706 | |||
707 | /* Primary is full, try the secondary */ | ||
708 | if (unlikely(slot == -1)) { | ||
709 | pte_val(new_pte) |= _PAGE_SECONDARY; | ||
710 | hpte_group = ((~hash & htab_hash_mask) * | ||
711 | HPTES_PER_GROUP) & ~0x7UL; | ||
712 | slot = ppc_md.hpte_insert(hpte_group, va, prpn, | ||
713 | HPTE_V_LARGE | | ||
714 | HPTE_V_SECONDARY, | ||
715 | rflags); | ||
716 | if (slot == -1) { | ||
717 | if (mftb() & 0x1) | ||
718 | hpte_group = ((hash & htab_hash_mask) * | ||
719 | HPTES_PER_GROUP)&~0x7UL; | ||
720 | |||
721 | ppc_md.hpte_remove(hpte_group); | ||
722 | goto repeat; | ||
723 | } | ||
724 | } | ||
725 | |||
726 | if (unlikely(slot == -2)) | ||
727 | panic("hash_huge_page: pte_insert failed\n"); | ||
728 | |||
729 | pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; | ||
730 | |||
731 | /* | ||
732 | * No need to use ldarx/stdcx here because all who | ||
733 | * might be updating the pte will hold the | ||
734 | * page_table_lock | ||
735 | */ | ||
736 | *ptep = new_pte; | ||
737 | } | ||
738 | |||
739 | err = 0; | ||
740 | |||
741 | out: | ||
742 | spin_unlock(&mm->page_table_lock); | ||
743 | |||
744 | return err; | ||
745 | } | ||