summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2017-02-24 17:58:47 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-24 20:46:55 -0500
commitdef5efe0376501ef7bd6b53ed061512c142e59aa (patch)
tree46c957c37989924f2a970f09e15fc90d757999bf /mm
parent712c604dcdf8186295e2af694adf52c6842ad100 (diff)
mm, madvise: fail with ENOMEM when splitting vma will hit max_map_count
If madvise(2) advice will result in the underlying vma being split and the number of areas mapped by the process will exceed /proc/sys/vm/max_map_count as a result, return ENOMEM instead of EAGAIN. EAGAIN is returned by madvise(2) when a kernel resource, such as slab, is temporarily unavailable. It indicates that userspace should retry the advice in the near future. This is important for advice such as MADV_DONTNEED which is often used by malloc implementations to free memory back to the system: we really do want to free memory back when madvise(2) returns EAGAIN because slab allocations (for vmas, anon_vmas, or mempolicies) cannot be allocated. Encountering /proc/sys/vm/max_map_count is not a temporary failure, however, so return ENOMEM to indicate this is a more serious issue. A followup patch to the man page will specify this behavior. Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1701241431120.42507@chino.kir.corp.google.com Signed-off-by: David Rientjes <rientjes@google.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Michael Kerrisk <mtk.manpages@googlemail.com> Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/madvise.c51
-rw-r--r--mm/mmap.c8
2 files changed, 46 insertions, 13 deletions
diff --git a/mm/madvise.c b/mm/madvise.c
index 0012071a6e50..11fc65f81ecd 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -92,14 +92,28 @@ static long madvise_behavior(struct vm_area_struct *vma,
92 case MADV_MERGEABLE: 92 case MADV_MERGEABLE:
93 case MADV_UNMERGEABLE: 93 case MADV_UNMERGEABLE:
94 error = ksm_madvise(vma, start, end, behavior, &new_flags); 94 error = ksm_madvise(vma, start, end, behavior, &new_flags);
95 if (error) 95 if (error) {
96 /*
97 * madvise() returns EAGAIN if kernel resources, such as
98 * slab, are temporarily unavailable.
99 */
100 if (error == -ENOMEM)
101 error = -EAGAIN;
96 goto out; 102 goto out;
103 }
97 break; 104 break;
98 case MADV_HUGEPAGE: 105 case MADV_HUGEPAGE:
99 case MADV_NOHUGEPAGE: 106 case MADV_NOHUGEPAGE:
100 error = hugepage_madvise(vma, &new_flags, behavior); 107 error = hugepage_madvise(vma, &new_flags, behavior);
101 if (error) 108 if (error) {
109 /*
110 * madvise() returns EAGAIN if kernel resources, such as
111 * slab, are temporarily unavailable.
112 */
113 if (error == -ENOMEM)
114 error = -EAGAIN;
102 goto out; 115 goto out;
116 }
103 break; 117 break;
104 } 118 }
105 119
@@ -120,15 +134,37 @@ static long madvise_behavior(struct vm_area_struct *vma,
120 *prev = vma; 134 *prev = vma;
121 135
122 if (start != vma->vm_start) { 136 if (start != vma->vm_start) {
123 error = split_vma(mm, vma, start, 1); 137 if (unlikely(mm->map_count >= sysctl_max_map_count)) {
124 if (error) 138 error = -ENOMEM;
125 goto out; 139 goto out;
140 }
141 error = __split_vma(mm, vma, start, 1);
142 if (error) {
143 /*
144 * madvise() returns EAGAIN if kernel resources, such as
145 * slab, are temporarily unavailable.
146 */
147 if (error == -ENOMEM)
148 error = -EAGAIN;
149 goto out;
150 }
126 } 151 }
127 152
128 if (end != vma->vm_end) { 153 if (end != vma->vm_end) {
129 error = split_vma(mm, vma, end, 0); 154 if (unlikely(mm->map_count >= sysctl_max_map_count)) {
130 if (error) 155 error = -ENOMEM;
156 goto out;
157 }
158 error = __split_vma(mm, vma, end, 0);
159 if (error) {
160 /*
161 * madvise() returns EAGAIN if kernel resources, such as
162 * slab, are temporarily unavailable.
163 */
164 if (error == -ENOMEM)
165 error = -EAGAIN;
131 goto out; 166 goto out;
167 }
132 } 168 }
133 169
134success: 170success:
@@ -136,10 +172,7 @@ success:
136 * vm_flags is protected by the mmap_sem held in write mode. 172 * vm_flags is protected by the mmap_sem held in write mode.
137 */ 173 */
138 vma->vm_flags = new_flags; 174 vma->vm_flags = new_flags;
139
140out: 175out:
141 if (error == -ENOMEM)
142 error = -EAGAIN;
143 return error; 176 return error;
144} 177}
145 178
diff --git a/mm/mmap.c b/mm/mmap.c
index 1cec28d20583..499b988b1639 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2499,11 +2499,11 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2499} 2499}
2500 2500
2501/* 2501/*
2502 * __split_vma() bypasses sysctl_max_map_count checking. We use this on the 2502 * __split_vma() bypasses sysctl_max_map_count checking. We use this where it
2503 * munmap path where it doesn't make sense to fail. 2503 * has already been checked or doesn't make sense to fail.
2504 */ 2504 */
2505static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, 2505int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2506 unsigned long addr, int new_below) 2506 unsigned long addr, int new_below)
2507{ 2507{
2508 struct vm_area_struct *new; 2508 struct vm_area_struct *new;
2509 int err; 2509 int err;