diff options
author | David Rientjes <rientjes@google.com> | 2014-10-29 17:50:31 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-29 19:33:14 -0400 |
commit | 6d50e60cd2edb5a57154db5a6f64eef5aa59b751 (patch) | |
tree | 025056e23d57062c5e045d54613edc2e2fc4ca86 | |
parent | 47f29df7db78ee4fcdb104cf36918d987ddd0278 (diff) |
mm, thp: fix collapsing of hugepages on madvise
If an anonymous mapping is not allowed to fault thp memory and then
madvise(MADV_HUGEPAGE) is used after fault, khugepaged will never
collapse this memory into thp memory.
This occurs because the madvise(2) handler for thp, hugepage_madvise(),
clears VM_NOHUGEPAGE on the stack and it isn't stored in vma->vm_flags
until the final action of madvise_behavior(). This causes the
khugepaged_enter_vma_merge() to be a no-op in hugepage_madvise() when
the vma had previously had VM_NOHUGEPAGE set.
Fix this by passing the correct vma flags to the khugepaged mm slot
handler. There's no chance khugepaged can run on this vma until after
madvise_behavior() returns since we hold mm->mmap_sem.
It would be possible to clear VM_NOHUGEPAGE directly from vma->vm_flags
in hugepage_advise(), but I didn't want to introduce special case
behavior into madvise_behavior(). I think it's best to just let it
always set vma->vm_flags itself.
Signed-off-by: David Rientjes <rientjes@google.com>
Reported-by: Suleiman Souhlal <suleiman@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/khugepaged.h | 17 | ||||
-rw-r--r-- | mm/huge_memory.c | 11 | ||||
-rw-r--r-- | mm/mmap.c | 8 |
3 files changed, 20 insertions, 16 deletions
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 6b394f0b5148..eeb307985715 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h | |||
@@ -6,7 +6,8 @@ | |||
6 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 6 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
7 | extern int __khugepaged_enter(struct mm_struct *mm); | 7 | extern int __khugepaged_enter(struct mm_struct *mm); |
8 | extern void __khugepaged_exit(struct mm_struct *mm); | 8 | extern void __khugepaged_exit(struct mm_struct *mm); |
9 | extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma); | 9 | extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma, |
10 | unsigned long vm_flags); | ||
10 | 11 | ||
11 | #define khugepaged_enabled() \ | 12 | #define khugepaged_enabled() \ |
12 | (transparent_hugepage_flags & \ | 13 | (transparent_hugepage_flags & \ |
@@ -35,13 +36,13 @@ static inline void khugepaged_exit(struct mm_struct *mm) | |||
35 | __khugepaged_exit(mm); | 36 | __khugepaged_exit(mm); |
36 | } | 37 | } |
37 | 38 | ||
38 | static inline int khugepaged_enter(struct vm_area_struct *vma) | 39 | static inline int khugepaged_enter(struct vm_area_struct *vma, |
40 | unsigned long vm_flags) | ||
39 | { | 41 | { |
40 | if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) | 42 | if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) |
41 | if ((khugepaged_always() || | 43 | if ((khugepaged_always() || |
42 | (khugepaged_req_madv() && | 44 | (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) && |
43 | vma->vm_flags & VM_HUGEPAGE)) && | 45 | !(vm_flags & VM_NOHUGEPAGE)) |
44 | !(vma->vm_flags & VM_NOHUGEPAGE)) | ||
45 | if (__khugepaged_enter(vma->vm_mm)) | 46 | if (__khugepaged_enter(vma->vm_mm)) |
46 | return -ENOMEM; | 47 | return -ENOMEM; |
47 | return 0; | 48 | return 0; |
@@ -54,11 +55,13 @@ static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) | |||
54 | static inline void khugepaged_exit(struct mm_struct *mm) | 55 | static inline void khugepaged_exit(struct mm_struct *mm) |
55 | { | 56 | { |
56 | } | 57 | } |
57 | static inline int khugepaged_enter(struct vm_area_struct *vma) | 58 | static inline int khugepaged_enter(struct vm_area_struct *vma, |
59 | unsigned long vm_flags) | ||
58 | { | 60 | { |
59 | return 0; | 61 | return 0; |
60 | } | 62 | } |
61 | static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma) | 63 | static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma, |
64 | unsigned long vm_flags) | ||
62 | { | 65 | { |
63 | return 0; | 66 | return 0; |
64 | } | 67 | } |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 780d12c000e9..de984159cf0b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -803,7 +803,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
803 | return VM_FAULT_FALLBACK; | 803 | return VM_FAULT_FALLBACK; |
804 | if (unlikely(anon_vma_prepare(vma))) | 804 | if (unlikely(anon_vma_prepare(vma))) |
805 | return VM_FAULT_OOM; | 805 | return VM_FAULT_OOM; |
806 | if (unlikely(khugepaged_enter(vma))) | 806 | if (unlikely(khugepaged_enter(vma, vma->vm_flags))) |
807 | return VM_FAULT_OOM; | 807 | return VM_FAULT_OOM; |
808 | if (!(flags & FAULT_FLAG_WRITE) && | 808 | if (!(flags & FAULT_FLAG_WRITE) && |
809 | transparent_hugepage_use_zero_page()) { | 809 | transparent_hugepage_use_zero_page()) { |
@@ -1970,7 +1970,7 @@ int hugepage_madvise(struct vm_area_struct *vma, | |||
1970 | * register it here without waiting a page fault that | 1970 | * register it here without waiting a page fault that |
1971 | * may not happen any time soon. | 1971 | * may not happen any time soon. |
1972 | */ | 1972 | */ |
1973 | if (unlikely(khugepaged_enter_vma_merge(vma))) | 1973 | if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags))) |
1974 | return -ENOMEM; | 1974 | return -ENOMEM; |
1975 | break; | 1975 | break; |
1976 | case MADV_NOHUGEPAGE: | 1976 | case MADV_NOHUGEPAGE: |
@@ -2071,7 +2071,8 @@ int __khugepaged_enter(struct mm_struct *mm) | |||
2071 | return 0; | 2071 | return 0; |
2072 | } | 2072 | } |
2073 | 2073 | ||
2074 | int khugepaged_enter_vma_merge(struct vm_area_struct *vma) | 2074 | int khugepaged_enter_vma_merge(struct vm_area_struct *vma, |
2075 | unsigned long vm_flags) | ||
2075 | { | 2076 | { |
2076 | unsigned long hstart, hend; | 2077 | unsigned long hstart, hend; |
2077 | if (!vma->anon_vma) | 2078 | if (!vma->anon_vma) |
@@ -2083,11 +2084,11 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma) | |||
2083 | if (vma->vm_ops) | 2084 | if (vma->vm_ops) |
2084 | /* khugepaged not yet working on file or special mappings */ | 2085 | /* khugepaged not yet working on file or special mappings */ |
2085 | return 0; | 2086 | return 0; |
2086 | VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma); | 2087 | VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma); |
2087 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; | 2088 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
2088 | hend = vma->vm_end & HPAGE_PMD_MASK; | 2089 | hend = vma->vm_end & HPAGE_PMD_MASK; |
2089 | if (hstart < hend) | 2090 | if (hstart < hend) |
2090 | return khugepaged_enter(vma); | 2091 | return khugepaged_enter(vma, vm_flags); |
2091 | return 0; | 2092 | return 0; |
2092 | } | 2093 | } |
2093 | 2094 | ||
@@ -1080,7 +1080,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
1080 | end, prev->vm_pgoff, NULL); | 1080 | end, prev->vm_pgoff, NULL); |
1081 | if (err) | 1081 | if (err) |
1082 | return NULL; | 1082 | return NULL; |
1083 | khugepaged_enter_vma_merge(prev); | 1083 | khugepaged_enter_vma_merge(prev, vm_flags); |
1084 | return prev; | 1084 | return prev; |
1085 | } | 1085 | } |
1086 | 1086 | ||
@@ -1099,7 +1099,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
1099 | next->vm_pgoff - pglen, NULL); | 1099 | next->vm_pgoff - pglen, NULL); |
1100 | if (err) | 1100 | if (err) |
1101 | return NULL; | 1101 | return NULL; |
1102 | khugepaged_enter_vma_merge(area); | 1102 | khugepaged_enter_vma_merge(area, vm_flags); |
1103 | return area; | 1103 | return area; |
1104 | } | 1104 | } |
1105 | 1105 | ||
@@ -2208,7 +2208,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) | |||
2208 | } | 2208 | } |
2209 | } | 2209 | } |
2210 | vma_unlock_anon_vma(vma); | 2210 | vma_unlock_anon_vma(vma); |
2211 | khugepaged_enter_vma_merge(vma); | 2211 | khugepaged_enter_vma_merge(vma, vma->vm_flags); |
2212 | validate_mm(vma->vm_mm); | 2212 | validate_mm(vma->vm_mm); |
2213 | return error; | 2213 | return error; |
2214 | } | 2214 | } |
@@ -2277,7 +2277,7 @@ int expand_downwards(struct vm_area_struct *vma, | |||
2277 | } | 2277 | } |
2278 | } | 2278 | } |
2279 | vma_unlock_anon_vma(vma); | 2279 | vma_unlock_anon_vma(vma); |
2280 | khugepaged_enter_vma_merge(vma); | 2280 | khugepaged_enter_vma_merge(vma, vma->vm_flags); |
2281 | validate_mm(vma->vm_mm); | 2281 | validate_mm(vma->vm_mm); |
2282 | return error; | 2282 | return error; |
2283 | } | 2283 | } |