diff options
author | Hugh Dickins <hugh@veritas.com> | 2006-10-28 13:38:43 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-10-28 14:30:53 -0400 |
commit | 856fc29505556cf263f3dcda2533cf3766c14ab6 (patch) | |
tree | 986b301f967487043cac92c7381e6e92bbcfe146 | |
parent | b9d7e6ae82da124dc9c579fe1061264ef2a69407 (diff) |
[PATCH] hugetlb: fix prio_tree unit
hugetlb_vmtruncate_list was misconverted to prio_tree: its prio_tree is in
units of PAGE_SIZE (PAGE_CACHE_SIZE) like any other, not HPAGE_SIZE (whereas
its radix_tree is kept in units of HPAGE_SIZE, otherwise slots would be
absurdly sparse).
At first I thought the error benign, just calling __unmap_hugepage_range on
more vmas than necessary; but on 32-bit machines, when the prio_tree is
searched correctly, it happens to ensure the v_offset calculation won't
overflow. As it stood, when truncating at or beyond 4GB, it was liable to
discard pages COWed from lower offsets; or even to clear pmd entries of
preceding vmas, triggering exit_mmap's BUG_ON(nr_ptes).
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/hugetlbfs/inode.c | 24 |
1 files changed, 11 insertions, 13 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 0b23b963bb44..0bea6a619e10 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -271,26 +271,24 @@ static void hugetlbfs_drop_inode(struct inode *inode) | |||
271 | hugetlbfs_forget_inode(inode); | 271 | hugetlbfs_forget_inode(inode); |
272 | } | 272 | } |
273 | 273 | ||
274 | /* | ||
275 | * h_pgoff is in HPAGE_SIZE units. | ||
276 | * vma->vm_pgoff is in PAGE_SIZE units. | ||
277 | */ | ||
278 | static inline void | 274 | static inline void |
279 | hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) | 275 | hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) |
280 | { | 276 | { |
281 | struct vm_area_struct *vma; | 277 | struct vm_area_struct *vma; |
282 | struct prio_tree_iter iter; | 278 | struct prio_tree_iter iter; |
283 | 279 | ||
284 | vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) { | 280 | vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) { |
285 | unsigned long h_vm_pgoff; | ||
286 | unsigned long v_offset; | 281 | unsigned long v_offset; |
287 | 282 | ||
288 | h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT); | ||
289 | v_offset = (h_pgoff - h_vm_pgoff) << HPAGE_SHIFT; | ||
290 | /* | 283 | /* |
291 | * Is this VMA fully outside the truncation point? | 284 | * Can the expression below overflow on 32-bit arches? |
285 | * No, because the prio_tree returns us only those vmas | ||
286 | * which overlap the truncated area starting at pgoff, | ||
287 | * and no vma on a 32-bit arch can span beyond the 4GB. | ||
292 | */ | 288 | */ |
293 | if (h_vm_pgoff >= h_pgoff) | 289 | if (vma->vm_pgoff < pgoff) |
290 | v_offset = (pgoff - vma->vm_pgoff) << PAGE_SHIFT; | ||
291 | else | ||
294 | v_offset = 0; | 292 | v_offset = 0; |
295 | 293 | ||
296 | __unmap_hugepage_range(vma, | 294 | __unmap_hugepage_range(vma, |
@@ -303,14 +301,14 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) | |||
303 | */ | 301 | */ |
304 | static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) | 302 | static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) |
305 | { | 303 | { |
306 | unsigned long pgoff; | 304 | pgoff_t pgoff; |
307 | struct address_space *mapping = inode->i_mapping; | 305 | struct address_space *mapping = inode->i_mapping; |
308 | 306 | ||
309 | if (offset > inode->i_size) | 307 | if (offset > inode->i_size) |
310 | return -EINVAL; | 308 | return -EINVAL; |
311 | 309 | ||
312 | BUG_ON(offset & ~HPAGE_MASK); | 310 | BUG_ON(offset & ~HPAGE_MASK); |
313 | pgoff = offset >> HPAGE_SHIFT; | 311 | pgoff = offset >> PAGE_SHIFT; |
314 | 312 | ||
315 | inode->i_size = offset; | 313 | inode->i_size = offset; |
316 | spin_lock(&mapping->i_mmap_lock); | 314 | spin_lock(&mapping->i_mmap_lock); |