aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2006-11-14 05:03:32 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-11-14 12:09:27 -0500
commit68589bc353037f233fe510ad9ff432338c95db66 (patch)
treededc58ff66134f54796642917e2a2a26ac6802b0
parent69ae9e3ee4ce99140a7db424bebf55d8d180da2f (diff)
[PATCH] hugetlb: prepare_hugepage_range check offset too
(David:) If hugetlbfs_file_mmap() returns a failure to do_mmap_pgoff() - for example, because the given file offset is not hugepage aligned - then do_mmap_pgoff will go to the unmap_and_free_vma backout path. But at this stage the vma hasn't been marked as hugepage, and the backout path will call unmap_region() on it. That will eventually call down to the non-hugepage version of unmap_page_range(). On ppc64, at least, that will cause serious problems if there are any existing hugepage pagetable entries in the vicinity - for example if there are any other hugepage mappings under the same PUD. unmap_page_range() will trigger a bad_pud() on the hugepage pud entries. I suspect this will also cause bad problems on ia64, though I don't have a machine to test it on. (Hugh:) prepare_hugepage_range() should check file offset alignment when it checks virtual address and length, to stop MAP_FIXED with a bad huge offset from unmapping before it fails further down. PowerPC should apply the same prepare_hugepage_range alignment checks as ia64 and all the others do. Then none of the alignment checks in hugetlbfs_file_mmap are required (nor is the check for too small a mapping); but even so, move up setting of VM_HUGETLB and add a comment to warn of what David Gibson discovered - if hugetlbfs_file_mmap fails before setting it, do_mmap_pgoff's unmap_region when unwinding from error will go the non-huge way, which may cause bad behaviour on architectures (powerpc and ia64) which segregate their huge mappings into a separate region of the address space. Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: "David S. Miller" <davem@davemloft.net> Acked-by: Adam Litke <agl@us.ibm.com> Acked-by: David Gibson <david@gibson.dropbear.id.au> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/ia64/mm/hugetlbpage.c4
-rw-r--r--arch/powerpc/mm/hugetlbpage.c8
-rw-r--r--fs/hugetlbfs/inode.c21
-rw-r--r--include/linux/hugetlb.h10
-rw-r--r--mm/mmap.c2
5 files changed, 25 insertions, 20 deletions
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index eee5c1cfbe3..f3a9585e98a 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -70,8 +70,10 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr)
70 * Don't actually need to do any preparation, but need to make sure 70 * Don't actually need to do any preparation, but need to make sure
71 * the address is in the right region. 71 * the address is in the right region.
72 */ 72 */
73int prepare_hugepage_range(unsigned long addr, unsigned long len) 73int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff)
74{ 74{
75 if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT))
76 return -EINVAL;
75 if (len & ~HPAGE_MASK) 77 if (len & ~HPAGE_MASK)
76 return -EINVAL; 78 return -EINVAL;
77 if (addr & ~HPAGE_MASK) 79 if (addr & ~HPAGE_MASK)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index fd68b74c07c..506d89768d4 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -491,11 +491,15 @@ static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
491 return 0; 491 return 0;
492} 492}
493 493
494int prepare_hugepage_range(unsigned long addr, unsigned long len) 494int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff)
495{ 495{
496 int err = 0; 496 int err = 0;
497 497
498 if ( (addr+len) < addr ) 498 if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT))
499 return -EINVAL;
500 if (len & ~HPAGE_MASK)
501 return -EINVAL;
502 if (addr & ~HPAGE_MASK)
499 return -EINVAL; 503 return -EINVAL;
500 504
501 if (addr < 0x100000000UL) 505 if (addr < 0x100000000UL)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0bea6a619e1..7f4756963d0 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -62,24 +62,19 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
62 loff_t len, vma_len; 62 loff_t len, vma_len;
63 int ret; 63 int ret;
64 64
65 if (vma->vm_pgoff & (HPAGE_SIZE / PAGE_SIZE - 1)) 65 /*
66 return -EINVAL; 66 * vma alignment has already been checked by prepare_hugepage_range.
67 67 * If you add any error returns here, do so after setting VM_HUGETLB,
68 if (vma->vm_start & ~HPAGE_MASK) 68 * so is_vm_hugetlb_page tests below unmap_region go the right way
69 return -EINVAL; 69 * when do_mmap_pgoff unwinds (may be important on powerpc and ia64).
70 70 */
71 if (vma->vm_end & ~HPAGE_MASK) 71 vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
72 return -EINVAL; 72 vma->vm_ops = &hugetlb_vm_ops;
73
74 if (vma->vm_end - vma->vm_start < HPAGE_SIZE)
75 return -EINVAL;
76 73
77 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 74 vma_len = (loff_t)(vma->vm_end - vma->vm_start);
78 75
79 mutex_lock(&inode->i_mutex); 76 mutex_lock(&inode->i_mutex);
80 file_accessed(file); 77 file_accessed(file);
81 vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
82 vma->vm_ops = &hugetlb_vm_ops;
83 78
84 ret = -ENOMEM; 79 ret = -ENOMEM;
85 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 80 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5081d27bfa2..ace64e57e17 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -60,8 +60,11 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
60 * If the arch doesn't supply something else, assume that hugepage 60 * If the arch doesn't supply something else, assume that hugepage
61 * size aligned regions are ok without further preparation. 61 * size aligned regions are ok without further preparation.
62 */ 62 */
63static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) 63static inline int prepare_hugepage_range(unsigned long addr, unsigned long len,
64 pgoff_t pgoff)
64{ 65{
66 if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT))
67 return -EINVAL;
65 if (len & ~HPAGE_MASK) 68 if (len & ~HPAGE_MASK)
66 return -EINVAL; 69 return -EINVAL;
67 if (addr & ~HPAGE_MASK) 70 if (addr & ~HPAGE_MASK)
@@ -69,7 +72,8 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
69 return 0; 72 return 0;
70} 73}
71#else 74#else
72int prepare_hugepage_range(unsigned long addr, unsigned long len); 75int prepare_hugepage_range(unsigned long addr, unsigned long len,
76 pgoff_t pgoff);
73#endif 77#endif
74 78
75#ifndef ARCH_HAS_SETCLEAR_HUGE_PTE 79#ifndef ARCH_HAS_SETCLEAR_HUGE_PTE
@@ -107,7 +111,7 @@ static inline unsigned long hugetlb_total_pages(void)
107#define hugetlb_report_meminfo(buf) 0 111#define hugetlb_report_meminfo(buf) 0
108#define hugetlb_report_node_meminfo(n, buf) 0 112#define hugetlb_report_node_meminfo(n, buf) 0
109#define follow_huge_pmd(mm, addr, pmd, write) NULL 113#define follow_huge_pmd(mm, addr, pmd, write) NULL
110#define prepare_hugepage_range(addr, len) (-EINVAL) 114#define prepare_hugepage_range(addr,len,pgoff) (-EINVAL)
111#define pmd_huge(x) 0 115#define pmd_huge(x) 0
112#define is_hugepage_only_range(mm, addr, len) 0 116#define is_hugepage_only_range(mm, addr, len) 0
113#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) 117#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
diff --git a/mm/mmap.c b/mm/mmap.c
index 497e502dfd6..bdace87d7c0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1379,7 +1379,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1379 * Check if the given range is hugepage aligned, and 1379 * Check if the given range is hugepage aligned, and
1380 * can be made suitable for hugepages. 1380 * can be made suitable for hugepages.
1381 */ 1381 */
1382 ret = prepare_hugepage_range(addr, len); 1382 ret = prepare_hugepage_range(addr, len, pgoff);
1383 } else { 1383 } else {
1384 /* 1384 /*
1385 * Ensure that a normal request is not falling in a 1385 * Ensure that a normal request is not falling in a