aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ppc64
diff options
context:
space:
mode:
authorDavid Gibson <david@gibson.dropbear.id.au>2005-08-11 02:55:21 -0400
committerPaul Mackerras <paulus@samba.org>2005-08-28 20:53:38 -0400
commitc594adad5653491813959277fb87a2fef54c4e05 (patch)
treebaefdce10a0b490017b0a5acbe9dbeb8cb14c003 /arch/ppc64
parent9a5573e378c5c8976c6000a7643b52e2a0481688 (diff)
[PATCH] Dynamic hugepage addresses for ppc64
Paulus, I think this is now a reasonable candidate for the post-2.6.13 queue. Relax address restrictions for hugepages on ppc64 Presently, 64-bit applications on ppc64 may only use hugepages in the address region from 1-1.5T. Furthermore, if hugepages are enabled in the kernel config, they may only use hugepages and never normal pages in this area. This patch relaxes this restriction, allowing any address to be used with hugepages, but with a 1TB granularity. That is if you map a hugepage anywhere in the region 1TB-2TB, that entire area will be reserved exclusively for hugepages for the remainder of the process's lifetime. This works analagously to hugepages in 32-bit applications, where hugepages can be mapped anywhere, but with 256MB (mmu segment) granularity. This patch applies on top of the four level pagetable patch (http://patchwork.ozlabs.org/linuxppc64/patch?id=1936). Signed-off-by: David Gibson <dwg@au1.ibm.com> Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/ppc64')
-rw-r--r--arch/ppc64/kernel/asm-offsets.c3
-rw-r--r--arch/ppc64/mm/hugetlbpage.c211
-rw-r--r--arch/ppc64/mm/slb_low.S23
3 files changed, 172 insertions, 65 deletions
diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c
index abb9e5b5da03..17e35d0fed09 100644
--- a/arch/ppc64/kernel/asm-offsets.c
+++ b/arch/ppc64/kernel/asm-offsets.c
@@ -94,7 +94,8 @@ int main(void)
94 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); 94 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
95 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); 95 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
96#ifdef CONFIG_HUGETLB_PAGE 96#ifdef CONFIG_HUGETLB_PAGE
97 DEFINE(PACAHTLBSEGS, offsetof(struct paca_struct, context.htlb_segs)); 97 DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
98 DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
98#endif /* CONFIG_HUGETLB_PAGE */ 99#endif /* CONFIG_HUGETLB_PAGE */
99 DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr)); 100 DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr));
100 DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); 101 DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c
index a13e44230a6f..e7833c80eb68 100644
--- a/arch/ppc64/mm/hugetlbpage.c
+++ b/arch/ppc64/mm/hugetlbpage.c
@@ -27,6 +27,9 @@
27 27
28#include <linux/sysctl.h> 28#include <linux/sysctl.h>
29 29
30#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)
31#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
32
30/* Modelled after find_linux_pte() */ 33/* Modelled after find_linux_pte() */
31pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 34pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
32{ 35{
@@ -129,15 +132,17 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
129 return 0; 132 return 0;
130} 133}
131 134
132static void flush_segments(void *parm) 135static void flush_low_segments(void *parm)
133{ 136{
134 u16 segs = (unsigned long) parm; 137 u16 areas = (unsigned long) parm;
135 unsigned long i; 138 unsigned long i;
136 139
137 asm volatile("isync" : : : "memory"); 140 asm volatile("isync" : : : "memory");
138 141
139 for (i = 0; i < 16; i++) { 142 BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS);
140 if (! (segs & (1U << i))) 143
144 for (i = 0; i < NUM_LOW_AREAS; i++) {
145 if (! (areas & (1U << i)))
141 continue; 146 continue;
142 asm volatile("slbie %0" : : "r" (i << SID_SHIFT)); 147 asm volatile("slbie %0" : : "r" (i << SID_SHIFT));
143 } 148 }
@@ -145,13 +150,33 @@ static void flush_segments(void *parm)
145 asm volatile("isync" : : : "memory"); 150 asm volatile("isync" : : : "memory");
146} 151}
147 152
148static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg) 153static void flush_high_segments(void *parm)
154{
155 u16 areas = (unsigned long) parm;
156 unsigned long i, j;
157
158 asm volatile("isync" : : : "memory");
159
160 BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS);
161
162 for (i = 0; i < NUM_HIGH_AREAS; i++) {
163 if (! (areas & (1U << i)))
164 continue;
165 for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)
166 asm volatile("slbie %0"
167 :: "r" ((i << HTLB_AREA_SHIFT) + (j << SID_SHIFT)));
168 }
169
170 asm volatile("isync" : : : "memory");
171}
172
173static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area)
149{ 174{
150 unsigned long start = seg << SID_SHIFT; 175 unsigned long start = area << SID_SHIFT;
151 unsigned long end = (seg+1) << SID_SHIFT; 176 unsigned long end = (area+1) << SID_SHIFT;
152 struct vm_area_struct *vma; 177 struct vm_area_struct *vma;
153 178
154 BUG_ON(seg >= 16); 179 BUG_ON(area >= NUM_LOW_AREAS);
155 180
156 /* Check no VMAs are in the region */ 181 /* Check no VMAs are in the region */
157 vma = find_vma(mm, start); 182 vma = find_vma(mm, start);
@@ -161,20 +186,69 @@ static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg)
161 return 0; 186 return 0;
162} 187}
163 188
164static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs) 189static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
190{
191 unsigned long start = area << HTLB_AREA_SHIFT;
192 unsigned long end = (area+1) << HTLB_AREA_SHIFT;
193 struct vm_area_struct *vma;
194
195 BUG_ON(area >= NUM_HIGH_AREAS);
196
197 /* Check no VMAs are in the region */
198 vma = find_vma(mm, start);
199 if (vma && (vma->vm_start < end))
200 return -EBUSY;
201
202 return 0;
203}
204
205static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
165{ 206{
166 unsigned long i; 207 unsigned long i;
167 208
168 newsegs &= ~(mm->context.htlb_segs); 209 BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);
169 if (! newsegs) 210 BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);
211
212 newareas &= ~(mm->context.low_htlb_areas);
213 if (! newareas)
170 return 0; /* The segments we want are already open */ 214 return 0; /* The segments we want are already open */
171 215
172 for (i = 0; i < 16; i++) 216 for (i = 0; i < NUM_LOW_AREAS; i++)
173 if ((1 << i) & newsegs) 217 if ((1 << i) & newareas)
174 if (prepare_low_seg_for_htlb(mm, i) != 0) 218 if (prepare_low_area_for_htlb(mm, i) != 0)
219 return -EBUSY;
220
221 mm->context.low_htlb_areas |= newareas;
222
223 /* update the paca copy of the context struct */
224 get_paca()->context = mm->context;
225
226 /* the context change must make it to memory before the flush,
227 * so that further SLB misses do the right thing. */
228 mb();
229 on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1);
230
231 return 0;
232}
233
234static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
235{
236 unsigned long i;
237
238 BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);
239 BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8)
240 != NUM_HIGH_AREAS);
241
242 newareas &= ~(mm->context.high_htlb_areas);
243 if (! newareas)
244 return 0; /* The areas we want are already open */
245
246 for (i = 0; i < NUM_HIGH_AREAS; i++)
247 if ((1 << i) & newareas)
248 if (prepare_high_area_for_htlb(mm, i) != 0)
175 return -EBUSY; 249 return -EBUSY;
176 250
177 mm->context.htlb_segs |= newsegs; 251 mm->context.high_htlb_areas |= newareas;
178 252
179 /* update the paca copy of the context struct */ 253 /* update the paca copy of the context struct */
180 get_paca()->context = mm->context; 254 get_paca()->context = mm->context;
@@ -182,29 +256,33 @@ static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs)
182 /* the context change must make it to memory before the flush, 256 /* the context change must make it to memory before the flush,
183 * so that further SLB misses do the right thing. */ 257 * so that further SLB misses do the right thing. */
184 mb(); 258 mb();
185 on_each_cpu(flush_segments, (void *)(unsigned long)newsegs, 0, 1); 259 on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1);
186 260
187 return 0; 261 return 0;
188} 262}
189 263
190int prepare_hugepage_range(unsigned long addr, unsigned long len) 264int prepare_hugepage_range(unsigned long addr, unsigned long len)
191{ 265{
192 if (within_hugepage_high_range(addr, len)) 266 int err;
193 return 0; 267
194 else if ((addr < 0x100000000UL) && ((addr+len) < 0x100000000UL)) { 268 if ( (addr+len) < addr )
195 int err; 269 return -EINVAL;
196 /* Yes, we need both tests, in case addr+len overflows 270
197 * 64-bit arithmetic */ 271 if ((addr + len) < 0x100000000UL)
198 err = open_low_hpage_segs(current->mm, 272 err = open_low_hpage_areas(current->mm,
199 LOW_ESID_MASK(addr, len)); 273 LOW_ESID_MASK(addr, len));
200 if (err) 274 else
201 printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" 275 err = open_high_hpage_areas(current->mm,
202 " failed (segs: 0x%04hx)\n", addr, len, 276 HTLB_AREA_MASK(addr, len));
203 LOW_ESID_MASK(addr, len)); 277 if (err) {
278 printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
279 " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
280 addr, len,
281 LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len));
204 return err; 282 return err;
205 } 283 }
206 284
207 return -EINVAL; 285 return 0;
208} 286}
209 287
210struct page * 288struct page *
@@ -276,8 +354,8 @@ full_search:
276 vma = find_vma(mm, addr); 354 vma = find_vma(mm, addr);
277 continue; 355 continue;
278 } 356 }
279 if (touches_hugepage_high_range(addr, len)) { 357 if (touches_hugepage_high_range(mm, addr, len)) {
280 addr = TASK_HPAGE_END; 358 addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
281 vma = find_vma(mm, addr); 359 vma = find_vma(mm, addr);
282 continue; 360 continue;
283 } 361 }
@@ -356,8 +434,9 @@ hugepage_recheck:
356 if (touches_hugepage_low_range(mm, addr, len)) { 434 if (touches_hugepage_low_range(mm, addr, len)) {
357 addr = (addr & ((~0) << SID_SHIFT)) - len; 435 addr = (addr & ((~0) << SID_SHIFT)) - len;
358 goto hugepage_recheck; 436 goto hugepage_recheck;
359 } else if (touches_hugepage_high_range(addr, len)) { 437 } else if (touches_hugepage_high_range(mm, addr, len)) {
360 addr = TASK_HPAGE_BASE - len; 438 addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len;
439 goto hugepage_recheck;
361 } 440 }
362 441
363 /* 442 /*
@@ -448,23 +527,28 @@ static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
448 return -ENOMEM; 527 return -ENOMEM;
449} 528}
450 529
451static unsigned long htlb_get_high_area(unsigned long len) 530static unsigned long htlb_get_high_area(unsigned long len, u16 areamask)
452{ 531{
453 unsigned long addr = TASK_HPAGE_BASE; 532 unsigned long addr = 0x100000000UL;
454 struct vm_area_struct *vma; 533 struct vm_area_struct *vma;
455 534
456 vma = find_vma(current->mm, addr); 535 vma = find_vma(current->mm, addr);
457 for (vma = find_vma(current->mm, addr); 536 while (addr + len <= TASK_SIZE_USER64) {
458 addr + len <= TASK_HPAGE_END;
459 vma = vma->vm_next) {
460 BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ 537 BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
461 BUG_ON(! within_hugepage_high_range(addr, len)); 538
539 if (! __within_hugepage_high_range(addr, len, areamask)) {
540 addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
541 vma = find_vma(current->mm, addr);
542 continue;
543 }
462 544
463 if (!vma || (addr + len) <= vma->vm_start) 545 if (!vma || (addr + len) <= vma->vm_start)
464 return addr; 546 return addr;
465 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 547 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
466 /* Because we're in a hugepage region, this alignment 548 /* Depending on segmask this might not be a confirmed
467 * should not skip us over any VMAs */ 549 * hugepage region, so the ALIGN could have skipped
550 * some VMAs */
551 vma = find_vma(current->mm, addr);
468 } 552 }
469 553
470 return -ENOMEM; 554 return -ENOMEM;
@@ -474,6 +558,9 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
474 unsigned long len, unsigned long pgoff, 558 unsigned long len, unsigned long pgoff,
475 unsigned long flags) 559 unsigned long flags)
476{ 560{
561 int lastshift;
562 u16 areamask, curareas;
563
477 if (len & ~HPAGE_MASK) 564 if (len & ~HPAGE_MASK)
478 return -EINVAL; 565 return -EINVAL;
479 566
@@ -481,31 +568,49 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
481 return -EINVAL; 568 return -EINVAL;
482 569
483 if (test_thread_flag(TIF_32BIT)) { 570 if (test_thread_flag(TIF_32BIT)) {
484 int lastshift = 0; 571 curareas = current->mm->context.low_htlb_areas;
485 u16 segmask, cursegs = current->mm->context.htlb_segs;
486 572
487 /* First see if we can do the mapping in the existing 573 /* First see if we can do the mapping in the existing
488 * low hpage segments */ 574 * low areas */
489 addr = htlb_get_low_area(len, cursegs); 575 addr = htlb_get_low_area(len, curareas);
490 if (addr != -ENOMEM) 576 if (addr != -ENOMEM)
491 return addr; 577 return addr;
492 578
493 for (segmask = LOW_ESID_MASK(0x100000000UL-len, len); 579 lastshift = 0;
494 ! lastshift; segmask >>=1) { 580 for (areamask = LOW_ESID_MASK(0x100000000UL-len, len);
495 if (segmask & 1) 581 ! lastshift; areamask >>=1) {
582 if (areamask & 1)
496 lastshift = 1; 583 lastshift = 1;
497 584
498 addr = htlb_get_low_area(len, cursegs | segmask); 585 addr = htlb_get_low_area(len, curareas | areamask);
499 if ((addr != -ENOMEM) 586 if ((addr != -ENOMEM)
500 && open_low_hpage_segs(current->mm, segmask) == 0) 587 && open_low_hpage_areas(current->mm, areamask) == 0)
501 return addr; 588 return addr;
502 } 589 }
503 printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
504 " enough segments\n");
505 return -ENOMEM;
506 } else { 590 } else {
507 return htlb_get_high_area(len); 591 curareas = current->mm->context.high_htlb_areas;
592
593 /* First see if we can do the mapping in the existing
594 * high areas */
595 addr = htlb_get_high_area(len, curareas);
596 if (addr != -ENOMEM)
597 return addr;
598
599 lastshift = 0;
600 for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len);
601 ! lastshift; areamask >>=1) {
602 if (areamask & 1)
603 lastshift = 1;
604
605 addr = htlb_get_high_area(len, curareas | areamask);
606 if ((addr != -ENOMEM)
607 && open_high_hpage_areas(current->mm, areamask) == 0)
608 return addr;
609 }
508 } 610 }
611 printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
612 " enough areas\n");
613 return -ENOMEM;
509} 614}
510 615
511int hash_huge_page(struct mm_struct *mm, unsigned long access, 616int hash_huge_page(struct mm_struct *mm, unsigned long access,
diff --git a/arch/ppc64/mm/slb_low.S b/arch/ppc64/mm/slb_low.S
index f20fc52483a7..bab255889c58 100644
--- a/arch/ppc64/mm/slb_low.S
+++ b/arch/ppc64/mm/slb_low.S
@@ -89,28 +89,29 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
89 b 9f 89 b 9f
90 90
910: /* user address: proto-VSID = context<<15 | ESID */ 910: /* user address: proto-VSID = context<<15 | ESID */
92 li r11,SLB_VSID_USER
93
94 srdi. r9,r3,USER_ESID_BITS 92 srdi. r9,r3,USER_ESID_BITS
95 bne- 8f /* invalid ea bits set */ 93 bne- 8f /* invalid ea bits set */
96 94
97#ifdef CONFIG_HUGETLB_PAGE 95#ifdef CONFIG_HUGETLB_PAGE
98BEGIN_FTR_SECTION 96BEGIN_FTR_SECTION
99 /* check against the hugepage ranges */ 97 lhz r9,PACAHIGHHTLBAREAS(r13)
100 cmpldi r3,(TASK_HPAGE_END>>SID_SHIFT) 98 srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT)
101 bge 6f /* >= TASK_HPAGE_END */ 99 srd r9,r9,r11
102 cmpldi r3,(TASK_HPAGE_BASE>>SID_SHIFT) 100 andi. r9,r9,1
103 bge 5f /* TASK_HPAGE_BASE..TASK_HPAGE_END */ 101 bne 5f
102
103 li r11,SLB_VSID_USER
104
104 cmpldi r3,16 105 cmpldi r3,16
105 bge 6f /* 4GB..TASK_HPAGE_BASE */ 106 bge 6f
106 107
107 lhz r9,PACAHTLBSEGS(r13) 108 lhz r9,PACALOWHTLBAREAS(r13)
108 srd r9,r9,r3 109 srd r9,r9,r3
109 andi. r9,r9,1 110 andi. r9,r9,1
111
110 beq 6f 112 beq 6f
111 113
1125: /* this is a hugepage user address */ 1145: li r11,SLB_VSID_USER|SLB_VSID_L
113 li r11,(SLB_VSID_USER|SLB_VSID_L)
114END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) 115END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
115#endif /* CONFIG_HUGETLB_PAGE */ 116#endif /* CONFIG_HUGETLB_PAGE */
116 117