aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChen, Kenneth W <kenneth.w.chen@intel.com>2006-10-11 04:20:46 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-10-11 14:14:15 -0400
commit502717f4e112b18d9c37753a32f675bec9f2838b (patch)
tree90e674229bbd7caa05e740dfe719cf8749d0eb27
parent97c7801cd5b0bb6a38c16108a496235474dc6310 (diff)
[PATCH] hugetlb: fix linked list corruption in unmap_hugepage_range()
commit fe1668ae5bf0145014c71797febd9ad5670d5d05 causes kernel to oops with libhugetlbfs test suite. The problem is that hugetlb pages can be shared by multiple mappings. Multiple threads can fight over page->lru in the unmap path and bad things happen. We now serialize __unmap_hugepage_range to void concurrent linked list manipulation. Such serialization is also needed for shared page table page on hugetlb area. This patch will fixed the bug and also serve as a prepatch for shared page table. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Cc: Hugh Dickins <hugh@veritas.com> Cc: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--include/linux/hugetlb.h1
-rw-r--r--mm/hugetlb.c22
3 files changed, 22 insertions, 3 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 5e03b2f67b93..4ee3f006b861 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -293,7 +293,7 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
293 if (h_vm_pgoff >= h_pgoff) 293 if (h_vm_pgoff >= h_pgoff)
294 v_offset = 0; 294 v_offset = 0;
295 295
296 unmap_hugepage_range(vma, 296 __unmap_hugepage_range(vma,
297 vma->vm_start + v_offset, vma->vm_end); 297 vma->vm_start + v_offset, vma->vm_end);
298 } 298 }
299} 299}
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c25a38d8f600..5081d27bfa27 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -17,6 +17,7 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *
17int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); 17int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
18int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); 18int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
19void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); 19void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
20void __unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
20int hugetlb_prefault(struct address_space *, struct vm_area_struct *); 21int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
21int hugetlb_report_meminfo(char *); 22int hugetlb_report_meminfo(char *);
22int hugetlb_report_node_meminfo(int, char *); 23int hugetlb_report_node_meminfo(int, char *);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1d709ff528e1..2dbec90dc3ba 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -356,8 +356,8 @@ nomem:
356 return -ENOMEM; 356 return -ENOMEM;
357} 357}
358 358
359void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, 359void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
360 unsigned long end) 360 unsigned long end)
361{ 361{
362 struct mm_struct *mm = vma->vm_mm; 362 struct mm_struct *mm = vma->vm_mm;
363 unsigned long address; 363 unsigned long address;
@@ -398,6 +398,24 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
398 } 398 }
399} 399}
400 400
401void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
402 unsigned long end)
403{
404 /*
405 * It is undesirable to test vma->vm_file as it should be non-null
406 * for valid hugetlb area. However, vm_file will be NULL in the error
407 * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails,
408 * do_mmap_pgoff() nullifies vma->vm_file before calling this function
409 * to clean up. Since no pte has actually been setup, it is safe to
410 * do nothing in this case.
411 */
412 if (vma->vm_file) {
413 spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
414 __unmap_hugepage_range(vma, start, end);
415 spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
416 }
417}
418
401static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, 419static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
402 unsigned long address, pte_t *ptep, pte_t pte) 420 unsigned long address, pte_t *ptep, pte_t pte)
403{ 421{