aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2006-10-28 13:38:43 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-10-28 14:30:53 -0400
commit856fc29505556cf263f3dcda2533cf3766c14ab6 (patch)
tree986b301f967487043cac92c7381e6e92bbcfe146
parentb9d7e6ae82da124dc9c579fe1061264ef2a69407 (diff)
[PATCH] hugetlb: fix prio_tree unit
hugetlb_vmtruncate_list was misconverted to prio_tree: its prio_tree is in units of PAGE_SIZE (PAGE_CACHE_SIZE) like any other, not HPAGE_SIZE (whereas its radix_tree is kept in units of HPAGE_SIZE, otherwise slots would be absurdly sparse). At first I thought the error benign, just calling __unmap_hugepage_range on more vmas than necessary; but on 32-bit machines, when the prio_tree is searched correctly, it happens to ensure the v_offset calculation won't overflow. As it stood, when truncating at or beyond 4GB, it was liable to discard pages COWed from lower offsets; or even to clear pmd entries of preceding vmas, triggering exit_mmap's BUG_ON(nr_ptes). Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: Adam Litke <agl@us.ibm.com> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/hugetlbfs/inode.c24
1 files changed, 11 insertions, 13 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0b23b963bb44..0bea6a619e10 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -271,26 +271,24 @@ static void hugetlbfs_drop_inode(struct inode *inode)
271 hugetlbfs_forget_inode(inode); 271 hugetlbfs_forget_inode(inode);
272} 272}
273 273
274/*
275 * h_pgoff is in HPAGE_SIZE units.
276 * vma->vm_pgoff is in PAGE_SIZE units.
277 */
278static inline void 274static inline void
279hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) 275hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
280{ 276{
281 struct vm_area_struct *vma; 277 struct vm_area_struct *vma;
282 struct prio_tree_iter iter; 278 struct prio_tree_iter iter;
283 279
284 vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) { 280 vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) {
285 unsigned long h_vm_pgoff;
286 unsigned long v_offset; 281 unsigned long v_offset;
287 282
288 h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT);
289 v_offset = (h_pgoff - h_vm_pgoff) << HPAGE_SHIFT;
290 /* 283 /*
291 * Is this VMA fully outside the truncation point? 284 * Can the expression below overflow on 32-bit arches?
285 * No, because the prio_tree returns us only those vmas
286 * which overlap the truncated area starting at pgoff,
287 * and no vma on a 32-bit arch can span beyond the 4GB.
292 */ 288 */
293 if (h_vm_pgoff >= h_pgoff) 289 if (vma->vm_pgoff < pgoff)
290 v_offset = (pgoff - vma->vm_pgoff) << PAGE_SHIFT;
291 else
294 v_offset = 0; 292 v_offset = 0;
295 293
296 __unmap_hugepage_range(vma, 294 __unmap_hugepage_range(vma,
@@ -303,14 +301,14 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
303 */ 301 */
304static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) 302static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
305{ 303{
306 unsigned long pgoff; 304 pgoff_t pgoff;
307 struct address_space *mapping = inode->i_mapping; 305 struct address_space *mapping = inode->i_mapping;
308 306
309 if (offset > inode->i_size) 307 if (offset > inode->i_size)
310 return -EINVAL; 308 return -EINVAL;
311 309
312 BUG_ON(offset & ~HPAGE_MASK); 310 BUG_ON(offset & ~HPAGE_MASK);
313 pgoff = offset >> HPAGE_SHIFT; 311 pgoff = offset >> PAGE_SHIFT;
314 312
315 inode->i_size = offset; 313 inode->i_size = offset;
316 spin_lock(&mapping->i_mmap_lock); 314 spin_lock(&mapping->i_mmap_lock);