diff options
author | Andrea Arcangeli <aarcange@redhat.com> | 2011-04-27 18:26:45 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-04-28 14:28:20 -0400 |
commit | 78f11a255749d09025f54d4e2df4fbcb031530e2 (patch) | |
tree | 20f8ffaf8548d963ffb519631f5c7d7372e9ca42 | |
parent | 6d4831c283530a5f2c6bd8172c13efa236eb149d (diff) |
mm: thp: fix /dev/zero MAP_PRIVATE and vm_flags cleanups
The huge_memory.c THP page fault was allowed to run if vm_ops was null
(which would succeed for /dev/zero MAP_PRIVATE, as the f_op->mmap wouldn't
setup a special vma->vm_ops and it would fallback to regular anonymous
memory) but other THP logics weren't fully activated for vmas with vm_file
not NULL (/dev/zero has a not NULL vma->vm_file).
So this removes the vm_file checks so that /dev/zero also can safely use
THP (the other albeit safer approach to fix this bug would have been to
prevent the THP initial page fault to run if vm_file was set).
After removing the vm_file checks, this also makes huge_memory.c stricter
in khugepaged for the DEBUG_VM=y case. It doesn't replace the vm_file
check with a is_pfn_mapping check (but it keeps checking for VM_PFNMAP
under VM_BUG_ON) because for a is_cow_mapping() mapping VM_PFNMAP should
only be allowed to exist before the first page fault, and in turn when
vma->anon_vma is null (so preventing khugepaged registration). So I tend
to think the previous comment saying if vm_file was set, VM_PFNMAP might
have been set and we could still be registered in khugepaged (despite
anon_vma was not NULL to be registered in khugepaged) was too paranoid.
The is_linear_pfn_mapping check is also I think superfluous (as described
by comment) but under DEBUG_VM it is safe to stay.
Addresses https://bugzilla.kernel.org/show_bug.cgi?id=33682
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Caspar Zhang <bugs@casparzhang.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: <stable@kernel.org> [2.6.38.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/huge_mm.h | 2 | ||||
-rw-r--r-- | include/linux/mm.h | 3 | ||||
-rw-r--r-- | mm/huge_memory.c | 43 |
3 files changed, 27 insertions, 21 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index df29c8fde36b..8847c8c29791 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h | |||
@@ -117,7 +117,7 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, | |||
117 | unsigned long end, | 117 | unsigned long end, |
118 | long adjust_next) | 118 | long adjust_next) |
119 | { | 119 | { |
120 | if (!vma->anon_vma || vma->vm_ops || vma->vm_file) | 120 | if (!vma->anon_vma || vma->vm_ops) |
121 | return; | 121 | return; |
122 | __vma_adjust_trans_huge(vma, start, end, adjust_next); | 122 | __vma_adjust_trans_huge(vma, start, end, adjust_next); |
123 | } | 123 | } |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 692dbae6ffa7..2348db26bc3d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -137,7 +137,8 @@ extern unsigned int kobjsize(const void *objp); | |||
137 | #define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) | 137 | #define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) |
138 | 138 | ||
139 | /* | 139 | /* |
140 | * special vmas that are non-mergable, non-mlock()able | 140 | * Special vmas that are non-mergable, non-mlock()able. |
141 | * Note: mm/huge_memory.c VM_NO_THP depends on this definition. | ||
141 | */ | 142 | */ |
142 | #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) | 143 | #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) |
143 | 144 | ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 470dcda10add..83326ad66d9b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1408,6 +1408,9 @@ out: | |||
1408 | return ret; | 1408 | return ret; |
1409 | } | 1409 | } |
1410 | 1410 | ||
1411 | #define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \ | ||
1412 | VM_HUGETLB|VM_SHARED|VM_MAYSHARE) | ||
1413 | |||
1411 | int hugepage_madvise(struct vm_area_struct *vma, | 1414 | int hugepage_madvise(struct vm_area_struct *vma, |
1412 | unsigned long *vm_flags, int advice) | 1415 | unsigned long *vm_flags, int advice) |
1413 | { | 1416 | { |
@@ -1416,11 +1419,7 @@ int hugepage_madvise(struct vm_area_struct *vma, | |||
1416 | /* | 1419 | /* |
1417 | * Be somewhat over-protective like KSM for now! | 1420 | * Be somewhat over-protective like KSM for now! |
1418 | */ | 1421 | */ |
1419 | if (*vm_flags & (VM_HUGEPAGE | | 1422 | if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP)) |
1420 | VM_SHARED | VM_MAYSHARE | | ||
1421 | VM_PFNMAP | VM_IO | VM_DONTEXPAND | | ||
1422 | VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | | ||
1423 | VM_MIXEDMAP | VM_SAO)) | ||
1424 | return -EINVAL; | 1423 | return -EINVAL; |
1425 | *vm_flags &= ~VM_NOHUGEPAGE; | 1424 | *vm_flags &= ~VM_NOHUGEPAGE; |
1426 | *vm_flags |= VM_HUGEPAGE; | 1425 | *vm_flags |= VM_HUGEPAGE; |
@@ -1436,11 +1435,7 @@ int hugepage_madvise(struct vm_area_struct *vma, | |||
1436 | /* | 1435 | /* |
1437 | * Be somewhat over-protective like KSM for now! | 1436 | * Be somewhat over-protective like KSM for now! |
1438 | */ | 1437 | */ |
1439 | if (*vm_flags & (VM_NOHUGEPAGE | | 1438 | if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP)) |
1440 | VM_SHARED | VM_MAYSHARE | | ||
1441 | VM_PFNMAP | VM_IO | VM_DONTEXPAND | | ||
1442 | VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | | ||
1443 | VM_MIXEDMAP | VM_SAO)) | ||
1444 | return -EINVAL; | 1439 | return -EINVAL; |
1445 | *vm_flags &= ~VM_HUGEPAGE; | 1440 | *vm_flags &= ~VM_HUGEPAGE; |
1446 | *vm_flags |= VM_NOHUGEPAGE; | 1441 | *vm_flags |= VM_NOHUGEPAGE; |
@@ -1574,10 +1569,14 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma) | |||
1574 | * page fault if needed. | 1569 | * page fault if needed. |
1575 | */ | 1570 | */ |
1576 | return 0; | 1571 | return 0; |
1577 | if (vma->vm_file || vma->vm_ops) | 1572 | if (vma->vm_ops) |
1578 | /* khugepaged not yet working on file or special mappings */ | 1573 | /* khugepaged not yet working on file or special mappings */ |
1579 | return 0; | 1574 | return 0; |
1580 | VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); | 1575 | /* |
1576 | * If is_pfn_mapping() is true is_learn_pfn_mapping() must be | ||
1577 | * true too, verify it here. | ||
1578 | */ | ||
1579 | VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP); | ||
1581 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; | 1580 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
1582 | hend = vma->vm_end & HPAGE_PMD_MASK; | 1581 | hend = vma->vm_end & HPAGE_PMD_MASK; |
1583 | if (hstart < hend) | 1582 | if (hstart < hend) |
@@ -1828,12 +1827,15 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1828 | (vma->vm_flags & VM_NOHUGEPAGE)) | 1827 | (vma->vm_flags & VM_NOHUGEPAGE)) |
1829 | goto out; | 1828 | goto out; |
1830 | 1829 | ||
1831 | /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ | 1830 | if (!vma->anon_vma || vma->vm_ops) |
1832 | if (!vma->anon_vma || vma->vm_ops || vma->vm_file) | ||
1833 | goto out; | 1831 | goto out; |
1834 | if (is_vma_temporary_stack(vma)) | 1832 | if (is_vma_temporary_stack(vma)) |
1835 | goto out; | 1833 | goto out; |
1836 | VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); | 1834 | /* |
1835 | * If is_pfn_mapping() is true is_learn_pfn_mapping() must be | ||
1836 | * true too, verify it here. | ||
1837 | */ | ||
1838 | VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP); | ||
1837 | 1839 | ||
1838 | pgd = pgd_offset(mm, address); | 1840 | pgd = pgd_offset(mm, address); |
1839 | if (!pgd_present(*pgd)) | 1841 | if (!pgd_present(*pgd)) |
@@ -2066,13 +2068,16 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, | |||
2066 | progress++; | 2068 | progress++; |
2067 | continue; | 2069 | continue; |
2068 | } | 2070 | } |
2069 | /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ | 2071 | if (!vma->anon_vma || vma->vm_ops) |
2070 | if (!vma->anon_vma || vma->vm_ops || vma->vm_file) | ||
2071 | goto skip; | 2072 | goto skip; |
2072 | if (is_vma_temporary_stack(vma)) | 2073 | if (is_vma_temporary_stack(vma)) |
2073 | goto skip; | 2074 | goto skip; |
2074 | 2075 | /* | |
2075 | VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); | 2076 | * If is_pfn_mapping() is true is_learn_pfn_mapping() |
2077 | * must be true too, verify it here. | ||
2078 | */ | ||
2079 | VM_BUG_ON(is_linear_pfn_mapping(vma) || | ||
2080 | vma->vm_flags & VM_NO_THP); | ||
2076 | 2081 | ||
2077 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; | 2082 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
2078 | hend = vma->vm_end & HPAGE_PMD_MASK; | 2083 | hend = vma->vm_end & HPAGE_PMD_MASK; |