aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Gibson <david@gibson.dropbear.id.au>2006-03-22 03:08:57 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-22 10:54:03 -0500
commit9da61aef0fd5b17dd4bf4baf33db12c470def774 (patch)
treecb4bb0b63c36a0b303ceadc0cec0fae00c49ecf3
parent27a85ef1b81300cfff06b4c8037e9914dfb09acc (diff)
[PATCH] hugepage: Fix hugepage logic in free_pgtables()
free_pgtables() has special logic to call hugetlb_free_pgd_range() instead of the normal free_pgd_range() on hugepage VMAs. However, the test it uses to do so is incorrect: it calls is_hugepage_only_range on a hugepage sized range at the start of the vma. is_hugepage_only_range() will return true if the given range has any intersection with a hugepage address region, and in this case the given region need not be hugepage aligned. So, for example, this test can return true if called on, say, a 4k VMA immediately preceding a (nicely aligned) hugepage VMA. At present we get away with this because the powerpc version of hugetlb_free_pgd_range() is just a call to free_pgd_range(). On ia64 (the only other arch with a non-trivial is_hugepage_only_range()) we get away with it for a different reason; the hugepage area is not contiguous with the rest of the user address space, and VMAs are not permitted in between, so the test can't return a false positive there. Nonetheless this should be fixed. We do that in the patch below by replacing the is_hugepage_only_range() test with an explicit test of the VMA using is_vm_hugetlb_page(). This in turn changes behaviour for platforms where is_hugepage_only_range() returns false always (everything except powerpc and ia64). We address this by ensuring that hugetlb_free_pgd_range() is defined to be identical to free_pgd_range() (instead of a no-op) on everything except ia64. Even so, it will prevent some otherwise possible coalescing of calls down to free_pgd_range(). Since this only happens for hugepage VMAs, removing this small optimization seems unlikely to cause any trouble. This patch causes no regressions on the libhugetlbfs testsuite - ppc64 POWER5 (8-way), ppc64 G5 (2-way) and i386 Pentium M (UP). Signed-off-by: David Gibson <dwg@au1.ibm.com> Cc: William Lee Irwin III <wli@holomorphy.com> Acked-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/asm-ia64/page.h1
-rw-r--r--include/asm-powerpc/pgtable.h5
-rw-r--r--include/linux/hugetlb.h9
-rw-r--r--mm/memory.c5
4 files changed, 8 insertions, 12 deletions
diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h
index 5e6362a786b7..732cf3086741 100644
--- a/include/asm-ia64/page.h
+++ b/include/asm-ia64/page.h
@@ -57,6 +57,7 @@
57 57
58# define HAVE_ARCH_HUGETLB_UNMAPPED_AREA 58# define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
59# define ARCH_HAS_HUGEPAGE_ONLY_RANGE 59# define ARCH_HAS_HUGEPAGE_ONLY_RANGE
60# define ARCH_HAS_HUGETLB_FREE_PGD_RANGE
60#endif /* CONFIG_HUGETLB_PAGE */ 61#endif /* CONFIG_HUGETLB_PAGE */
61 62
62#ifdef __ASSEMBLY__ 63#ifdef __ASSEMBLY__
diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h
index e38931379a72..185ee15963a1 100644
--- a/include/asm-powerpc/pgtable.h
+++ b/include/asm-powerpc/pgtable.h
@@ -468,11 +468,6 @@ extern pgd_t swapper_pg_dir[];
468 468
469extern void paging_init(void); 469extern void paging_init(void);
470 470
471#ifdef CONFIG_HUGETLB_PAGE
472#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
473 free_pgd_range(tlb, addr, end, floor, ceiling)
474#endif
475
476/* 471/*
477 * This gets called at the end of handling a page fault, when 472 * This gets called at the end of handling a page fault, when
478 * the kernel has put a new PTE into the page table for the process. 473 * the kernel has put a new PTE into the page table for the process.
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5d84c368ffe4..e465fbf1ef5f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -43,8 +43,10 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
43 43
44#ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE 44#ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE
45#define is_hugepage_only_range(mm, addr, len) 0 45#define is_hugepage_only_range(mm, addr, len) 0
46#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ 46#endif
47 do { } while (0) 47
48#ifndef ARCH_HAS_HUGETLB_FREE_PGD_RANGE
49#define hugetlb_free_pgd_range free_pgd_range
48#endif 50#endif
49 51
50#ifndef ARCH_HAS_PREPARE_HUGEPAGE_RANGE 52#ifndef ARCH_HAS_PREPARE_HUGEPAGE_RANGE
@@ -93,8 +95,7 @@ static inline unsigned long hugetlb_total_pages(void)
93#define prepare_hugepage_range(addr, len) (-EINVAL) 95#define prepare_hugepage_range(addr, len) (-EINVAL)
94#define pmd_huge(x) 0 96#define pmd_huge(x) 0
95#define is_hugepage_only_range(mm, addr, len) 0 97#define is_hugepage_only_range(mm, addr, len) 0
96#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ 98#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
97 do { } while (0)
98#define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) 99#define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; })
99 100
100#define hugetlb_change_protection(vma, address, end, newprot) 101#define hugetlb_change_protection(vma, address, end, newprot)
diff --git a/mm/memory.c b/mm/memory.c
index 71bc664efed5..f6e3be9cbf5a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -277,7 +277,7 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
277 anon_vma_unlink(vma); 277 anon_vma_unlink(vma);
278 unlink_file_vma(vma); 278 unlink_file_vma(vma);
279 279
280 if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) { 280 if (is_vm_hugetlb_page(vma)) {
281 hugetlb_free_pgd_range(tlb, addr, vma->vm_end, 281 hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
282 floor, next? next->vm_start: ceiling); 282 floor, next? next->vm_start: ceiling);
283 } else { 283 } else {
@@ -285,8 +285,7 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
285 * Optimization: gather nearby vmas into one call down 285 * Optimization: gather nearby vmas into one call down
286 */ 286 */
287 while (next && next->vm_start <= vma->vm_end + PMD_SIZE 287 while (next && next->vm_start <= vma->vm_end + PMD_SIZE
288 && !is_hugepage_only_range(vma->vm_mm, next->vm_start, 288 && !is_vm_hugetlb_page(vma)) {
289 HPAGE_SIZE)) {
290 vma = next; 289 vma = next;
291 next = vma->vm_next; 290 next = vma->vm_next;
292 anon_vma_unlink(vma); 291 anon_vma_unlink(vma);