aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Whitcroft <apw@shadowen.org>2008-11-06 15:53:26 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2008-11-06 18:41:18 -0500
commit69d177c2fc702d402b17fdca2190d5a7e3ca55c5 (patch)
tree2040e0a84b7c07c29ac6fb6e51e125de52256f5d
parent22bece00dc1f28dd3374c55e464c9f02eb642876 (diff)
hugetlbfs: handle pages higher order than MAX_ORDER
When working with hugepages, hugetlbfs assumes that those hugepages are smaller than MAX_ORDER. Specifically it assumes that the mem_map is contigious and uses that to optimise access to the elements of the mem_map that represent the hugepage. Gigantic pages (such as 16GB pages on powerpc) by definition are of greater order than MAX_ORDER (larger than MAX_ORDER_NR_PAGES in size). This means that we can no longer make use of the buddy alloctor guarentees for the contiguity of the mem_map, which ensures that the mem_map is at least contigious for maximmally aligned areas of MAX_ORDER_NR_PAGES pages. This patch adds new mem_map accessors and iterator helpers which handle any discontiguity at MAX_ORDER_NR_PAGES boundaries. It then uses these to implement gigantic page versions of copy_huge_page and clear_huge_page, and to allow follow_hugetlb_page handle gigantic pages. Signed-off-by: Andy Whitcroft <apw@shadowen.org> Cc: Jon Tollefson <kniht@linux.vnet.ibm.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: <stable@kernel.org> [2.6.27.x] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/hugetlb.c37
-rw-r--r--mm/internal.h28
2 files changed, 64 insertions, 1 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 421aee99b84a..e6afe527bd09 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -354,11 +354,26 @@ static int vma_has_reserves(struct vm_area_struct *vma)
354 return 0; 354 return 0;
355} 355}
356 356
357static void clear_gigantic_page(struct page *page,
358 unsigned long addr, unsigned long sz)
359{
360 int i;
361 struct page *p = page;
362
363 might_sleep();
364 for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) {
365 cond_resched();
366 clear_user_highpage(p, addr + i * PAGE_SIZE);
367 }
368}
357static void clear_huge_page(struct page *page, 369static void clear_huge_page(struct page *page,
358 unsigned long addr, unsigned long sz) 370 unsigned long addr, unsigned long sz)
359{ 371{
360 int i; 372 int i;
361 373
374 if (unlikely(sz > MAX_ORDER_NR_PAGES))
375 return clear_gigantic_page(page, addr, sz);
376
362 might_sleep(); 377 might_sleep();
363 for (i = 0; i < sz/PAGE_SIZE; i++) { 378 for (i = 0; i < sz/PAGE_SIZE; i++) {
364 cond_resched(); 379 cond_resched();
@@ -366,12 +381,32 @@ static void clear_huge_page(struct page *page,
366 } 381 }
367} 382}
368 383
384static void copy_gigantic_page(struct page *dst, struct page *src,
385 unsigned long addr, struct vm_area_struct *vma)
386{
387 int i;
388 struct hstate *h = hstate_vma(vma);
389 struct page *dst_base = dst;
390 struct page *src_base = src;
391 might_sleep();
392 for (i = 0; i < pages_per_huge_page(h); ) {
393 cond_resched();
394 copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
395
396 i++;
397 dst = mem_map_next(dst, dst_base, i);
398 src = mem_map_next(src, src_base, i);
399 }
400}
369static void copy_huge_page(struct page *dst, struct page *src, 401static void copy_huge_page(struct page *dst, struct page *src,
370 unsigned long addr, struct vm_area_struct *vma) 402 unsigned long addr, struct vm_area_struct *vma)
371{ 403{
372 int i; 404 int i;
373 struct hstate *h = hstate_vma(vma); 405 struct hstate *h = hstate_vma(vma);
374 406
407 if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES))
408 return copy_gigantic_page(dst, src, addr, vma);
409
375 might_sleep(); 410 might_sleep();
376 for (i = 0; i < pages_per_huge_page(h); i++) { 411 for (i = 0; i < pages_per_huge_page(h); i++) {
377 cond_resched(); 412 cond_resched();
@@ -2130,7 +2165,7 @@ same_page:
2130 if (zeropage_ok) 2165 if (zeropage_ok)
2131 pages[i] = ZERO_PAGE(0); 2166 pages[i] = ZERO_PAGE(0);
2132 else 2167 else
2133 pages[i] = page + pfn_offset; 2168 pages[i] = mem_map_offset(page, pfn_offset);
2134 get_page(pages[i]); 2169 get_page(pages[i]);
2135 } 2170 }
2136 2171
diff --git a/mm/internal.h b/mm/internal.h
index e4e728bdf324..f482460de3e6 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -176,6 +176,34 @@ static inline void free_page_mlock(struct page *page) { }
176#endif /* CONFIG_UNEVICTABLE_LRU */ 176#endif /* CONFIG_UNEVICTABLE_LRU */
177 177
178/* 178/*
179 * Return the mem_map entry representing the 'offset' subpage within
180 * the maximally aligned gigantic page 'base'. Handle any discontiguity
181 * in the mem_map at MAX_ORDER_NR_PAGES boundaries.
182 */
183static inline struct page *mem_map_offset(struct page *base, int offset)
184{
185 if (unlikely(offset >= MAX_ORDER_NR_PAGES))
186 return pfn_to_page(page_to_pfn(base) + offset);
187 return base + offset;
188}
189
190/*
191 * Iterator over all subpages withing the maximally aligned gigantic
192 * page 'base'. Handle any discontiguity in the mem_map.
193 */
194static inline struct page *mem_map_next(struct page *iter,
195 struct page *base, int offset)
196{
197 if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) {
198 unsigned long pfn = page_to_pfn(base) + offset;
199 if (!pfn_valid(pfn))
200 return NULL;
201 return pfn_to_page(pfn);
202 }
203 return iter + 1;
204}
205
206/*
179 * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, 207 * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node,
180 * so all functions starting at paging_init should be marked __init 208 * so all functions starting at paging_init should be marked __init
181 * in those cases. SPARSEMEM, however, allows for memory hotplug, 209 * in those cases. SPARSEMEM, however, allows for memory hotplug,