diff options
author | David Rientjes <rientjes@google.com> | 2017-02-24 17:58:47 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-24 20:46:55 -0500 |
commit | def5efe0376501ef7bd6b53ed061512c142e59aa (patch) | |
tree | 46c957c37989924f2a970f09e15fc90d757999bf /mm | |
parent | 712c604dcdf8186295e2af694adf52c6842ad100 (diff) |
mm, madvise: fail with ENOMEM when splitting vma will hit max_map_count
If madvise(2) advice will result in the underlying vma being split and
the number of areas mapped by the process will exceed
/proc/sys/vm/max_map_count as a result, return ENOMEM instead of EAGAIN.
EAGAIN is returned by madvise(2) when a kernel resource, such as slab,
is temporarily unavailable. It indicates that userspace should retry
the advice in the near future. This is important for advice such as
MADV_DONTNEED which is often used by malloc implementations to free
memory back to the system: we really do want to free memory back when
madvise(2) returns EAGAIN because slab allocations (for vmas, anon_vmas,
or mempolicies) cannot be allocated.
Encountering /proc/sys/vm/max_map_count is not a temporary failure,
however, so return ENOMEM to indicate this is a more serious issue. A
followup patch to the man page will specify this behavior.
Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1701241431120.42507@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/madvise.c | 51 | ||||
-rw-r--r-- | mm/mmap.c | 8 |
2 files changed, 46 insertions, 13 deletions
diff --git a/mm/madvise.c b/mm/madvise.c index 0012071a6e50..11fc65f81ecd 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -92,14 +92,28 @@ static long madvise_behavior(struct vm_area_struct *vma, | |||
92 | case MADV_MERGEABLE: | 92 | case MADV_MERGEABLE: |
93 | case MADV_UNMERGEABLE: | 93 | case MADV_UNMERGEABLE: |
94 | error = ksm_madvise(vma, start, end, behavior, &new_flags); | 94 | error = ksm_madvise(vma, start, end, behavior, &new_flags); |
95 | if (error) | 95 | if (error) { |
96 | /* | ||
97 | * madvise() returns EAGAIN if kernel resources, such as | ||
98 | * slab, are temporarily unavailable. | ||
99 | */ | ||
100 | if (error == -ENOMEM) | ||
101 | error = -EAGAIN; | ||
96 | goto out; | 102 | goto out; |
103 | } | ||
97 | break; | 104 | break; |
98 | case MADV_HUGEPAGE: | 105 | case MADV_HUGEPAGE: |
99 | case MADV_NOHUGEPAGE: | 106 | case MADV_NOHUGEPAGE: |
100 | error = hugepage_madvise(vma, &new_flags, behavior); | 107 | error = hugepage_madvise(vma, &new_flags, behavior); |
101 | if (error) | 108 | if (error) { |
109 | /* | ||
110 | * madvise() returns EAGAIN if kernel resources, such as | ||
111 | * slab, are temporarily unavailable. | ||
112 | */ | ||
113 | if (error == -ENOMEM) | ||
114 | error = -EAGAIN; | ||
102 | goto out; | 115 | goto out; |
116 | } | ||
103 | break; | 117 | break; |
104 | } | 118 | } |
105 | 119 | ||
@@ -120,15 +134,37 @@ static long madvise_behavior(struct vm_area_struct *vma, | |||
120 | *prev = vma; | 134 | *prev = vma; |
121 | 135 | ||
122 | if (start != vma->vm_start) { | 136 | if (start != vma->vm_start) { |
123 | error = split_vma(mm, vma, start, 1); | 137 | if (unlikely(mm->map_count >= sysctl_max_map_count)) { |
124 | if (error) | 138 | error = -ENOMEM; |
125 | goto out; | 139 | goto out; |
140 | } | ||
141 | error = __split_vma(mm, vma, start, 1); | ||
142 | if (error) { | ||
143 | /* | ||
144 | * madvise() returns EAGAIN if kernel resources, such as | ||
145 | * slab, are temporarily unavailable. | ||
146 | */ | ||
147 | if (error == -ENOMEM) | ||
148 | error = -EAGAIN; | ||
149 | goto out; | ||
150 | } | ||
126 | } | 151 | } |
127 | 152 | ||
128 | if (end != vma->vm_end) { | 153 | if (end != vma->vm_end) { |
129 | error = split_vma(mm, vma, end, 0); | 154 | if (unlikely(mm->map_count >= sysctl_max_map_count)) { |
130 | if (error) | 155 | error = -ENOMEM; |
156 | goto out; | ||
157 | } | ||
158 | error = __split_vma(mm, vma, end, 0); | ||
159 | if (error) { | ||
160 | /* | ||
161 | * madvise() returns EAGAIN if kernel resources, such as | ||
162 | * slab, are temporarily unavailable. | ||
163 | */ | ||
164 | if (error == -ENOMEM) | ||
165 | error = -EAGAIN; | ||
131 | goto out; | 166 | goto out; |
167 | } | ||
132 | } | 168 | } |
133 | 169 | ||
134 | success: | 170 | success: |
@@ -136,10 +172,7 @@ success: | |||
136 | * vm_flags is protected by the mmap_sem held in write mode. | 172 | * vm_flags is protected by the mmap_sem held in write mode. |
137 | */ | 173 | */ |
138 | vma->vm_flags = new_flags; | 174 | vma->vm_flags = new_flags; |
139 | |||
140 | out: | 175 | out: |
141 | if (error == -ENOMEM) | ||
142 | error = -EAGAIN; | ||
143 | return error; | 176 | return error; |
144 | } | 177 | } |
145 | 178 | ||
@@ -2499,11 +2499,11 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2499 | } | 2499 | } |
2500 | 2500 | ||
2501 | /* | 2501 | /* |
2502 | * __split_vma() bypasses sysctl_max_map_count checking. We use this on the | 2502 | * __split_vma() bypasses sysctl_max_map_count checking. We use this where it |
2503 | * munmap path where it doesn't make sense to fail. | 2503 | * has already been checked or doesn't make sense to fail. |
2504 | */ | 2504 | */ |
2505 | static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, | 2505 | int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, |
2506 | unsigned long addr, int new_below) | 2506 | unsigned long addr, int new_below) |
2507 | { | 2507 | { |
2508 | struct vm_area_struct *new; | 2508 | struct vm_area_struct *new; |
2509 | int err; | 2509 | int err; |