aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2011-04-27 18:26:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-04-28 14:28:20 -0400
commit78f11a255749d09025f54d4e2df4fbcb031530e2 (patch)
tree20f8ffaf8548d963ffb519631f5c7d7372e9ca42
parent6d4831c283530a5f2c6bd8172c13efa236eb149d (diff)
mm: thp: fix /dev/zero MAP_PRIVATE and vm_flags cleanups
The huge_memory.c THP page fault was allowed to run if vm_ops was null (which would succeed for /dev/zero MAP_PRIVATE, as the f_op->mmap wouldn't setup a special vma->vm_ops and it would fallback to regular anonymous memory) but other THP logics weren't fully activated for vmas with vm_file not NULL (/dev/zero has a not NULL vma->vm_file). So this removes the vm_file checks so that /dev/zero also can safely use THP (the other albeit safer approach to fix this bug would have been to prevent the THP initial page fault to run if vm_file was set). After removing the vm_file checks, this also makes huge_memory.c stricter in khugepaged for the DEBUG_VM=y case. It doesn't replace the vm_file check with a is_pfn_mapping check (but it keeps checking for VM_PFNMAP under VM_BUG_ON) because for a is_cow_mapping() mapping VM_PFNMAP should only be allowed to exist before the first page fault, and in turn when vma->anon_vma is null (so preventing khugepaged registration). So I tend to think the previous comment saying if vm_file was set, VM_PFNMAP might have been set and we could still be registered in khugepaged (despite anon_vma was not NULL to be registered in khugepaged) was too paranoid. The is_linear_pfn_mapping check is also I think superfluous (as described by comment) but under DEBUG_VM it is safe to stay. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=33682 Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Reported-by: Caspar Zhang <bugs@casparzhang.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Rik van Riel <riel@redhat.com> Cc: <stable@kernel.org> [2.6.38.x] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/huge_mm.h2
-rw-r--r--include/linux/mm.h3
-rw-r--r--mm/huge_memory.c43
3 files changed, 27 insertions, 21 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index df29c8fde36b..8847c8c29791 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -117,7 +117,7 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
117 unsigned long end, 117 unsigned long end,
118 long adjust_next) 118 long adjust_next)
119{ 119{
120 if (!vma->anon_vma || vma->vm_ops || vma->vm_file) 120 if (!vma->anon_vma || vma->vm_ops)
121 return; 121 return;
122 __vma_adjust_trans_huge(vma, start, end, adjust_next); 122 __vma_adjust_trans_huge(vma, start, end, adjust_next);
123} 123}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 692dbae6ffa7..2348db26bc3d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -137,7 +137,8 @@ extern unsigned int kobjsize(const void *objp);
137#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) 137#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ)
138 138
139/* 139/*
140 * special vmas that are non-mergable, non-mlock()able 140 * Special vmas that are non-mergable, non-mlock()able.
141 * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
141 */ 142 */
142#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) 143#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
143 144
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 470dcda10add..83326ad66d9b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1408,6 +1408,9 @@ out:
1408 return ret; 1408 return ret;
1409} 1409}
1410 1410
1411#define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \
1412 VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
1413
1411int hugepage_madvise(struct vm_area_struct *vma, 1414int hugepage_madvise(struct vm_area_struct *vma,
1412 unsigned long *vm_flags, int advice) 1415 unsigned long *vm_flags, int advice)
1413{ 1416{
@@ -1416,11 +1419,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
1416 /* 1419 /*
1417 * Be somewhat over-protective like KSM for now! 1420 * Be somewhat over-protective like KSM for now!
1418 */ 1421 */
1419 if (*vm_flags & (VM_HUGEPAGE | 1422 if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
1420 VM_SHARED | VM_MAYSHARE |
1421 VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1422 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
1423 VM_MIXEDMAP | VM_SAO))
1424 return -EINVAL; 1423 return -EINVAL;
1425 *vm_flags &= ~VM_NOHUGEPAGE; 1424 *vm_flags &= ~VM_NOHUGEPAGE;
1426 *vm_flags |= VM_HUGEPAGE; 1425 *vm_flags |= VM_HUGEPAGE;
@@ -1436,11 +1435,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
1436 /* 1435 /*
1437 * Be somewhat over-protective like KSM for now! 1436 * Be somewhat over-protective like KSM for now!
1438 */ 1437 */
1439 if (*vm_flags & (VM_NOHUGEPAGE | 1438 if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP))
1440 VM_SHARED | VM_MAYSHARE |
1441 VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1442 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
1443 VM_MIXEDMAP | VM_SAO))
1444 return -EINVAL; 1439 return -EINVAL;
1445 *vm_flags &= ~VM_HUGEPAGE; 1440 *vm_flags &= ~VM_HUGEPAGE;
1446 *vm_flags |= VM_NOHUGEPAGE; 1441 *vm_flags |= VM_NOHUGEPAGE;
@@ -1574,10 +1569,14 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
1574 * page fault if needed. 1569 * page fault if needed.
1575 */ 1570 */
1576 return 0; 1571 return 0;
1577 if (vma->vm_file || vma->vm_ops) 1572 if (vma->vm_ops)
1578 /* khugepaged not yet working on file or special mappings */ 1573 /* khugepaged not yet working on file or special mappings */
1579 return 0; 1574 return 0;
1580 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); 1575 /*
1576 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
1577 * true too, verify it here.
1578 */
1579 VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
1581 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; 1580 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
1582 hend = vma->vm_end & HPAGE_PMD_MASK; 1581 hend = vma->vm_end & HPAGE_PMD_MASK;
1583 if (hstart < hend) 1582 if (hstart < hend)
@@ -1828,12 +1827,15 @@ static void collapse_huge_page(struct mm_struct *mm,
1828 (vma->vm_flags & VM_NOHUGEPAGE)) 1827 (vma->vm_flags & VM_NOHUGEPAGE))
1829 goto out; 1828 goto out;
1830 1829
1831 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ 1830 if (!vma->anon_vma || vma->vm_ops)
1832 if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
1833 goto out; 1831 goto out;
1834 if (is_vma_temporary_stack(vma)) 1832 if (is_vma_temporary_stack(vma))
1835 goto out; 1833 goto out;
1836 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); 1834 /*
1835 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
1836 * true too, verify it here.
1837 */
1838 VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
1837 1839
1838 pgd = pgd_offset(mm, address); 1840 pgd = pgd_offset(mm, address);
1839 if (!pgd_present(*pgd)) 1841 if (!pgd_present(*pgd))
@@ -2066,13 +2068,16 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
2066 progress++; 2068 progress++;
2067 continue; 2069 continue;
2068 } 2070 }
2069 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ 2071 if (!vma->anon_vma || vma->vm_ops)
2070 if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
2071 goto skip; 2072 goto skip;
2072 if (is_vma_temporary_stack(vma)) 2073 if (is_vma_temporary_stack(vma))
2073 goto skip; 2074 goto skip;
2074 2075 /*
2075 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); 2076 * If is_pfn_mapping() is true is_learn_pfn_mapping()
2077 * must be true too, verify it here.
2078 */
2079 VM_BUG_ON(is_linear_pfn_mapping(vma) ||
2080 vma->vm_flags & VM_NO_THP);
2076 2081
2077 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; 2082 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
2078 hend = vma->vm_end & HPAGE_PMD_MASK; 2083 hend = vma->vm_end & HPAGE_PMD_MASK;