aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2014-10-29 17:50:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-29 19:33:14 -0400
commit6d50e60cd2edb5a57154db5a6f64eef5aa59b751 (patch)
tree025056e23d57062c5e045d54613edc2e2fc4ca86 /mm
parent47f29df7db78ee4fcdb104cf36918d987ddd0278 (diff)
mm, thp: fix collapsing of hugepages on madvise
If an anonymous mapping is not allowed to fault thp memory and then madvise(MADV_HUGEPAGE) is used after fault, khugepaged will never collapse this memory into thp memory. This occurs because the madvise(2) handler for thp, hugepage_madvise(), clears VM_NOHUGEPAGE on the stack and it isn't stored in vma->vm_flags until the final action of madvise_behavior(). This causes the khugepaged_enter_vma_merge() to be a no-op in hugepage_madvise() when the vma had previously had VM_NOHUGEPAGE set. Fix this by passing the correct vma flags to the khugepaged mm slot handler. There's no chance khugepaged can run on this vma until after madvise_behavior() returns since we hold mm->mmap_sem. It would be possible to clear VM_NOHUGEPAGE directly from vma->vm_flags in hugepage_advise(), but I didn't want to introduce special case behavior into madvise_behavior(). I think it's best to just let it always set vma->vm_flags itself. Signed-off-by: David Rientjes <rientjes@google.com> Reported-by: Suleiman Souhlal <suleiman@google.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c11
-rw-r--r--mm/mmap.c8
2 files changed, 10 insertions, 9 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 780d12c000e9..de984159cf0b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -803,7 +803,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
803 return VM_FAULT_FALLBACK; 803 return VM_FAULT_FALLBACK;
804 if (unlikely(anon_vma_prepare(vma))) 804 if (unlikely(anon_vma_prepare(vma)))
805 return VM_FAULT_OOM; 805 return VM_FAULT_OOM;
806 if (unlikely(khugepaged_enter(vma))) 806 if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
807 return VM_FAULT_OOM; 807 return VM_FAULT_OOM;
808 if (!(flags & FAULT_FLAG_WRITE) && 808 if (!(flags & FAULT_FLAG_WRITE) &&
809 transparent_hugepage_use_zero_page()) { 809 transparent_hugepage_use_zero_page()) {
@@ -1970,7 +1970,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
1970 * register it here without waiting a page fault that 1970 * register it here without waiting a page fault that
1971 * may not happen any time soon. 1971 * may not happen any time soon.
1972 */ 1972 */
1973 if (unlikely(khugepaged_enter_vma_merge(vma))) 1973 if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags)))
1974 return -ENOMEM; 1974 return -ENOMEM;
1975 break; 1975 break;
1976 case MADV_NOHUGEPAGE: 1976 case MADV_NOHUGEPAGE:
@@ -2071,7 +2071,8 @@ int __khugepaged_enter(struct mm_struct *mm)
2071 return 0; 2071 return 0;
2072} 2072}
2073 2073
2074int khugepaged_enter_vma_merge(struct vm_area_struct *vma) 2074int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
2075 unsigned long vm_flags)
2075{ 2076{
2076 unsigned long hstart, hend; 2077 unsigned long hstart, hend;
2077 if (!vma->anon_vma) 2078 if (!vma->anon_vma)
@@ -2083,11 +2084,11 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
2083 if (vma->vm_ops) 2084 if (vma->vm_ops)
2084 /* khugepaged not yet working on file or special mappings */ 2085 /* khugepaged not yet working on file or special mappings */
2085 return 0; 2086 return 0;
2086 VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma); 2087 VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma);
2087 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; 2088 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
2088 hend = vma->vm_end & HPAGE_PMD_MASK; 2089 hend = vma->vm_end & HPAGE_PMD_MASK;
2089 if (hstart < hend) 2090 if (hstart < hend)
2090 return khugepaged_enter(vma); 2091 return khugepaged_enter(vma, vm_flags);
2091 return 0; 2092 return 0;
2092} 2093}
2093 2094
diff --git a/mm/mmap.c b/mm/mmap.c
index 7f855206e7fb..87e82b38453c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1080,7 +1080,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
1080 end, prev->vm_pgoff, NULL); 1080 end, prev->vm_pgoff, NULL);
1081 if (err) 1081 if (err)
1082 return NULL; 1082 return NULL;
1083 khugepaged_enter_vma_merge(prev); 1083 khugepaged_enter_vma_merge(prev, vm_flags);
1084 return prev; 1084 return prev;
1085 } 1085 }
1086 1086
@@ -1099,7 +1099,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
1099 next->vm_pgoff - pglen, NULL); 1099 next->vm_pgoff - pglen, NULL);
1100 if (err) 1100 if (err)
1101 return NULL; 1101 return NULL;
1102 khugepaged_enter_vma_merge(area); 1102 khugepaged_enter_vma_merge(area, vm_flags);
1103 return area; 1103 return area;
1104 } 1104 }
1105 1105
@@ -2208,7 +2208,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2208 } 2208 }
2209 } 2209 }
2210 vma_unlock_anon_vma(vma); 2210 vma_unlock_anon_vma(vma);
2211 khugepaged_enter_vma_merge(vma); 2211 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2212 validate_mm(vma->vm_mm); 2212 validate_mm(vma->vm_mm);
2213 return error; 2213 return error;
2214} 2214}
@@ -2277,7 +2277,7 @@ int expand_downwards(struct vm_area_struct *vma,
2277 } 2277 }
2278 } 2278 }
2279 vma_unlock_anon_vma(vma); 2279 vma_unlock_anon_vma(vma);
2280 khugepaged_enter_vma_merge(vma); 2280 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2281 validate_mm(vma->vm_mm); 2281 validate_mm(vma->vm_mm);
2282 return error; 2282 return error;
2283} 2283}