diff options
author | David Gibson <david@gibson.dropbear.id.au> | 2005-08-11 02:55:21 -0400 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2005-08-28 20:53:38 -0400 |
commit | c594adad5653491813959277fb87a2fef54c4e05 (patch) | |
tree | baefdce10a0b490017b0a5acbe9dbeb8cb14c003 /arch/ppc64/mm/hugetlbpage.c | |
parent | 9a5573e378c5c8976c6000a7643b52e2a0481688 (diff) |
[PATCH] Dynamic hugepage addresses for ppc64
Paulus, I think this is now a reasonable candidate for the post-2.6.13
queue.
Relax address restrictions for hugepages on ppc64
Presently, 64-bit applications on ppc64 may only use hugepages in the
address region from 1-1.5T. Furthermore, if hugepages are enabled in
the kernel config, they may only use hugepages and never normal pages
in this area. This patch relaxes this restriction, allowing any
address to be used with hugepages, but with a 1TB granularity. That
is if you map a hugepage anywhere in the region 1TB-2TB, that entire
area will be reserved exclusively for hugepages for the remainder of
the process's lifetime. This works analagously to hugepages in 32-bit
applications, where hugepages can be mapped anywhere, but with 256MB
(mmu segment) granularity.
This patch applies on top of the four level pagetable patch
(http://patchwork.ozlabs.org/linuxppc64/patch?id=1936).
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/ppc64/mm/hugetlbpage.c')
-rw-r--r-- | arch/ppc64/mm/hugetlbpage.c | 211 |
1 files changed, 158 insertions, 53 deletions
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c index a13e44230a6f..e7833c80eb68 100644 --- a/arch/ppc64/mm/hugetlbpage.c +++ b/arch/ppc64/mm/hugetlbpage.c | |||
@@ -27,6 +27,9 @@ | |||
27 | 27 | ||
28 | #include <linux/sysctl.h> | 28 | #include <linux/sysctl.h> |
29 | 29 | ||
30 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) | ||
31 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) | ||
32 | |||
30 | /* Modelled after find_linux_pte() */ | 33 | /* Modelled after find_linux_pte() */ |
31 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 34 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
32 | { | 35 | { |
@@ -129,15 +132,17 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | |||
129 | return 0; | 132 | return 0; |
130 | } | 133 | } |
131 | 134 | ||
132 | static void flush_segments(void *parm) | 135 | static void flush_low_segments(void *parm) |
133 | { | 136 | { |
134 | u16 segs = (unsigned long) parm; | 137 | u16 areas = (unsigned long) parm; |
135 | unsigned long i; | 138 | unsigned long i; |
136 | 139 | ||
137 | asm volatile("isync" : : : "memory"); | 140 | asm volatile("isync" : : : "memory"); |
138 | 141 | ||
139 | for (i = 0; i < 16; i++) { | 142 | BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS); |
140 | if (! (segs & (1U << i))) | 143 | |
144 | for (i = 0; i < NUM_LOW_AREAS; i++) { | ||
145 | if (! (areas & (1U << i))) | ||
141 | continue; | 146 | continue; |
142 | asm volatile("slbie %0" : : "r" (i << SID_SHIFT)); | 147 | asm volatile("slbie %0" : : "r" (i << SID_SHIFT)); |
143 | } | 148 | } |
@@ -145,13 +150,33 @@ static void flush_segments(void *parm) | |||
145 | asm volatile("isync" : : : "memory"); | 150 | asm volatile("isync" : : : "memory"); |
146 | } | 151 | } |
147 | 152 | ||
148 | static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg) | 153 | static void flush_high_segments(void *parm) |
154 | { | ||
155 | u16 areas = (unsigned long) parm; | ||
156 | unsigned long i, j; | ||
157 | |||
158 | asm volatile("isync" : : : "memory"); | ||
159 | |||
160 | BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS); | ||
161 | |||
162 | for (i = 0; i < NUM_HIGH_AREAS; i++) { | ||
163 | if (! (areas & (1U << i))) | ||
164 | continue; | ||
165 | for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) | ||
166 | asm volatile("slbie %0" | ||
167 | :: "r" ((i << HTLB_AREA_SHIFT) + (j << SID_SHIFT))); | ||
168 | } | ||
169 | |||
170 | asm volatile("isync" : : : "memory"); | ||
171 | } | ||
172 | |||
173 | static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) | ||
149 | { | 174 | { |
150 | unsigned long start = seg << SID_SHIFT; | 175 | unsigned long start = area << SID_SHIFT; |
151 | unsigned long end = (seg+1) << SID_SHIFT; | 176 | unsigned long end = (area+1) << SID_SHIFT; |
152 | struct vm_area_struct *vma; | 177 | struct vm_area_struct *vma; |
153 | 178 | ||
154 | BUG_ON(seg >= 16); | 179 | BUG_ON(area >= NUM_LOW_AREAS); |
155 | 180 | ||
156 | /* Check no VMAs are in the region */ | 181 | /* Check no VMAs are in the region */ |
157 | vma = find_vma(mm, start); | 182 | vma = find_vma(mm, start); |
@@ -161,20 +186,69 @@ static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg) | |||
161 | return 0; | 186 | return 0; |
162 | } | 187 | } |
163 | 188 | ||
164 | static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs) | 189 | static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) |
190 | { | ||
191 | unsigned long start = area << HTLB_AREA_SHIFT; | ||
192 | unsigned long end = (area+1) << HTLB_AREA_SHIFT; | ||
193 | struct vm_area_struct *vma; | ||
194 | |||
195 | BUG_ON(area >= NUM_HIGH_AREAS); | ||
196 | |||
197 | /* Check no VMAs are in the region */ | ||
198 | vma = find_vma(mm, start); | ||
199 | if (vma && (vma->vm_start < end)) | ||
200 | return -EBUSY; | ||
201 | |||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) | ||
165 | { | 206 | { |
166 | unsigned long i; | 207 | unsigned long i; |
167 | 208 | ||
168 | newsegs &= ~(mm->context.htlb_segs); | 209 | BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); |
169 | if (! newsegs) | 210 | BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); |
211 | |||
212 | newareas &= ~(mm->context.low_htlb_areas); | ||
213 | if (! newareas) | ||
170 | return 0; /* The segments we want are already open */ | 214 | return 0; /* The segments we want are already open */ |
171 | 215 | ||
172 | for (i = 0; i < 16; i++) | 216 | for (i = 0; i < NUM_LOW_AREAS; i++) |
173 | if ((1 << i) & newsegs) | 217 | if ((1 << i) & newareas) |
174 | if (prepare_low_seg_for_htlb(mm, i) != 0) | 218 | if (prepare_low_area_for_htlb(mm, i) != 0) |
219 | return -EBUSY; | ||
220 | |||
221 | mm->context.low_htlb_areas |= newareas; | ||
222 | |||
223 | /* update the paca copy of the context struct */ | ||
224 | get_paca()->context = mm->context; | ||
225 | |||
226 | /* the context change must make it to memory before the flush, | ||
227 | * so that further SLB misses do the right thing. */ | ||
228 | mb(); | ||
229 | on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1); | ||
230 | |||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) | ||
235 | { | ||
236 | unsigned long i; | ||
237 | |||
238 | BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); | ||
239 | BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) | ||
240 | != NUM_HIGH_AREAS); | ||
241 | |||
242 | newareas &= ~(mm->context.high_htlb_areas); | ||
243 | if (! newareas) | ||
244 | return 0; /* The areas we want are already open */ | ||
245 | |||
246 | for (i = 0; i < NUM_HIGH_AREAS; i++) | ||
247 | if ((1 << i) & newareas) | ||
248 | if (prepare_high_area_for_htlb(mm, i) != 0) | ||
175 | return -EBUSY; | 249 | return -EBUSY; |
176 | 250 | ||
177 | mm->context.htlb_segs |= newsegs; | 251 | mm->context.high_htlb_areas |= newareas; |
178 | 252 | ||
179 | /* update the paca copy of the context struct */ | 253 | /* update the paca copy of the context struct */ |
180 | get_paca()->context = mm->context; | 254 | get_paca()->context = mm->context; |
@@ -182,29 +256,33 @@ static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs) | |||
182 | /* the context change must make it to memory before the flush, | 256 | /* the context change must make it to memory before the flush, |
183 | * so that further SLB misses do the right thing. */ | 257 | * so that further SLB misses do the right thing. */ |
184 | mb(); | 258 | mb(); |
185 | on_each_cpu(flush_segments, (void *)(unsigned long)newsegs, 0, 1); | 259 | on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1); |
186 | 260 | ||
187 | return 0; | 261 | return 0; |
188 | } | 262 | } |
189 | 263 | ||
190 | int prepare_hugepage_range(unsigned long addr, unsigned long len) | 264 | int prepare_hugepage_range(unsigned long addr, unsigned long len) |
191 | { | 265 | { |
192 | if (within_hugepage_high_range(addr, len)) | 266 | int err; |
193 | return 0; | 267 | |
194 | else if ((addr < 0x100000000UL) && ((addr+len) < 0x100000000UL)) { | 268 | if ( (addr+len) < addr ) |
195 | int err; | 269 | return -EINVAL; |
196 | /* Yes, we need both tests, in case addr+len overflows | 270 | |
197 | * 64-bit arithmetic */ | 271 | if ((addr + len) < 0x100000000UL) |
198 | err = open_low_hpage_segs(current->mm, | 272 | err = open_low_hpage_areas(current->mm, |
199 | LOW_ESID_MASK(addr, len)); | 273 | LOW_ESID_MASK(addr, len)); |
200 | if (err) | 274 | else |
201 | printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" | 275 | err = open_high_hpage_areas(current->mm, |
202 | " failed (segs: 0x%04hx)\n", addr, len, | 276 | HTLB_AREA_MASK(addr, len)); |
203 | LOW_ESID_MASK(addr, len)); | 277 | if (err) { |
278 | printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" | ||
279 | " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", | ||
280 | addr, len, | ||
281 | LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); | ||
204 | return err; | 282 | return err; |
205 | } | 283 | } |
206 | 284 | ||
207 | return -EINVAL; | 285 | return 0; |
208 | } | 286 | } |
209 | 287 | ||
210 | struct page * | 288 | struct page * |
@@ -276,8 +354,8 @@ full_search: | |||
276 | vma = find_vma(mm, addr); | 354 | vma = find_vma(mm, addr); |
277 | continue; | 355 | continue; |
278 | } | 356 | } |
279 | if (touches_hugepage_high_range(addr, len)) { | 357 | if (touches_hugepage_high_range(mm, addr, len)) { |
280 | addr = TASK_HPAGE_END; | 358 | addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); |
281 | vma = find_vma(mm, addr); | 359 | vma = find_vma(mm, addr); |
282 | continue; | 360 | continue; |
283 | } | 361 | } |
@@ -356,8 +434,9 @@ hugepage_recheck: | |||
356 | if (touches_hugepage_low_range(mm, addr, len)) { | 434 | if (touches_hugepage_low_range(mm, addr, len)) { |
357 | addr = (addr & ((~0) << SID_SHIFT)) - len; | 435 | addr = (addr & ((~0) << SID_SHIFT)) - len; |
358 | goto hugepage_recheck; | 436 | goto hugepage_recheck; |
359 | } else if (touches_hugepage_high_range(addr, len)) { | 437 | } else if (touches_hugepage_high_range(mm, addr, len)) { |
360 | addr = TASK_HPAGE_BASE - len; | 438 | addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; |
439 | goto hugepage_recheck; | ||
361 | } | 440 | } |
362 | 441 | ||
363 | /* | 442 | /* |
@@ -448,23 +527,28 @@ static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) | |||
448 | return -ENOMEM; | 527 | return -ENOMEM; |
449 | } | 528 | } |
450 | 529 | ||
451 | static unsigned long htlb_get_high_area(unsigned long len) | 530 | static unsigned long htlb_get_high_area(unsigned long len, u16 areamask) |
452 | { | 531 | { |
453 | unsigned long addr = TASK_HPAGE_BASE; | 532 | unsigned long addr = 0x100000000UL; |
454 | struct vm_area_struct *vma; | 533 | struct vm_area_struct *vma; |
455 | 534 | ||
456 | vma = find_vma(current->mm, addr); | 535 | vma = find_vma(current->mm, addr); |
457 | for (vma = find_vma(current->mm, addr); | 536 | while (addr + len <= TASK_SIZE_USER64) { |
458 | addr + len <= TASK_HPAGE_END; | ||
459 | vma = vma->vm_next) { | ||
460 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ | 537 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ |
461 | BUG_ON(! within_hugepage_high_range(addr, len)); | 538 | |
539 | if (! __within_hugepage_high_range(addr, len, areamask)) { | ||
540 | addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); | ||
541 | vma = find_vma(current->mm, addr); | ||
542 | continue; | ||
543 | } | ||
462 | 544 | ||
463 | if (!vma || (addr + len) <= vma->vm_start) | 545 | if (!vma || (addr + len) <= vma->vm_start) |
464 | return addr; | 546 | return addr; |
465 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | 547 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); |
466 | /* Because we're in a hugepage region, this alignment | 548 | /* Depending on segmask this might not be a confirmed |
467 | * should not skip us over any VMAs */ | 549 | * hugepage region, so the ALIGN could have skipped |
550 | * some VMAs */ | ||
551 | vma = find_vma(current->mm, addr); | ||
468 | } | 552 | } |
469 | 553 | ||
470 | return -ENOMEM; | 554 | return -ENOMEM; |
@@ -474,6 +558,9 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
474 | unsigned long len, unsigned long pgoff, | 558 | unsigned long len, unsigned long pgoff, |
475 | unsigned long flags) | 559 | unsigned long flags) |
476 | { | 560 | { |
561 | int lastshift; | ||
562 | u16 areamask, curareas; | ||
563 | |||
477 | if (len & ~HPAGE_MASK) | 564 | if (len & ~HPAGE_MASK) |
478 | return -EINVAL; | 565 | return -EINVAL; |
479 | 566 | ||
@@ -481,31 +568,49 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
481 | return -EINVAL; | 568 | return -EINVAL; |
482 | 569 | ||
483 | if (test_thread_flag(TIF_32BIT)) { | 570 | if (test_thread_flag(TIF_32BIT)) { |
484 | int lastshift = 0; | 571 | curareas = current->mm->context.low_htlb_areas; |
485 | u16 segmask, cursegs = current->mm->context.htlb_segs; | ||
486 | 572 | ||
487 | /* First see if we can do the mapping in the existing | 573 | /* First see if we can do the mapping in the existing |
488 | * low hpage segments */ | 574 | * low areas */ |
489 | addr = htlb_get_low_area(len, cursegs); | 575 | addr = htlb_get_low_area(len, curareas); |
490 | if (addr != -ENOMEM) | 576 | if (addr != -ENOMEM) |
491 | return addr; | 577 | return addr; |
492 | 578 | ||
493 | for (segmask = LOW_ESID_MASK(0x100000000UL-len, len); | 579 | lastshift = 0; |
494 | ! lastshift; segmask >>=1) { | 580 | for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); |
495 | if (segmask & 1) | 581 | ! lastshift; areamask >>=1) { |
582 | if (areamask & 1) | ||
496 | lastshift = 1; | 583 | lastshift = 1; |
497 | 584 | ||
498 | addr = htlb_get_low_area(len, cursegs | segmask); | 585 | addr = htlb_get_low_area(len, curareas | areamask); |
499 | if ((addr != -ENOMEM) | 586 | if ((addr != -ENOMEM) |
500 | && open_low_hpage_segs(current->mm, segmask) == 0) | 587 | && open_low_hpage_areas(current->mm, areamask) == 0) |
501 | return addr; | 588 | return addr; |
502 | } | 589 | } |
503 | printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" | ||
504 | " enough segments\n"); | ||
505 | return -ENOMEM; | ||
506 | } else { | 590 | } else { |
507 | return htlb_get_high_area(len); | 591 | curareas = current->mm->context.high_htlb_areas; |
592 | |||
593 | /* First see if we can do the mapping in the existing | ||
594 | * high areas */ | ||
595 | addr = htlb_get_high_area(len, curareas); | ||
596 | if (addr != -ENOMEM) | ||
597 | return addr; | ||
598 | |||
599 | lastshift = 0; | ||
600 | for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); | ||
601 | ! lastshift; areamask >>=1) { | ||
602 | if (areamask & 1) | ||
603 | lastshift = 1; | ||
604 | |||
605 | addr = htlb_get_high_area(len, curareas | areamask); | ||
606 | if ((addr != -ENOMEM) | ||
607 | && open_high_hpage_areas(current->mm, areamask) == 0) | ||
608 | return addr; | ||
609 | } | ||
508 | } | 610 | } |
611 | printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" | ||
612 | " enough areas\n"); | ||
613 | return -ENOMEM; | ||
509 | } | 614 | } |
510 | 615 | ||
511 | int hash_huge_page(struct mm_struct *mm, unsigned long access, | 616 | int hash_huge_page(struct mm_struct *mm, unsigned long access, |