aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorHugh Dickins <hugh.dickins@tiscali.co.uk>2009-09-21 20:03:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-22 10:17:40 -0400
commit408e82b78bcc9f1b47c76e833c3df97f675947de (patch)
tree393fb1c659f3fe23928c722d3cffcac3429c1aee /mm
parent5d3bc2709114b416cab588c577e02c2470e40a6c (diff)
mm: munlock use follow_page
Hiroaki Wakabayashi points out that when mlock() has been interrupted by SIGKILL, the subsequent munlock() takes unnecessarily long because its use of __get_user_pages() insists on faulting in all the pages which mlock() never reached. It's worse than slowness if mlock() is terminated by Out Of Memory kill: the munlock_vma_pages_all() in exit_mmap() insists on faulting in all the pages which mlock() could not find memory for; so innocent bystanders are killed too, and perhaps the system hangs. __get_user_pages() does a lot that's silly for munlock(): so remove the munlock option from __mlock_vma_pages_range(), and use a simple loop of follow_page()s in munlock_vma_pages_range() instead; ignoring absent pages, and not marking present pages as accessed or dirty. (Change munlock() to only go so far as mlock() reached? That does not work out, given the convention that mlock() claims complete success even when it has to give up early - in part so that an underlying file can be extended later, and those pages locked which earlier would give SIGBUS.) Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: <stable@kernel.org> Acked-by: Rik van Riel <riel@redhat.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Mel Gorman <mel@csn.ul.ie> Reviewed-by: Hiroaki Wakabayashi <primulaelatior@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/mlock.c99
1 files changed, 40 insertions, 59 deletions
diff --git a/mm/mlock.c b/mm/mlock.c
index 45eb650b9654..e13918d4fc4f 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -139,49 +139,36 @@ static void munlock_vma_page(struct page *page)
139} 139}
140 140
141/** 141/**
142 * __mlock_vma_pages_range() - mlock/munlock a range of pages in the vma. 142 * __mlock_vma_pages_range() - mlock a range of pages in the vma.
143 * @vma: target vma 143 * @vma: target vma
144 * @start: start address 144 * @start: start address
145 * @end: end address 145 * @end: end address
146 * @mlock: 0 indicate munlock, otherwise mlock.
147 * 146 *
148 * If @mlock == 0, unlock an mlocked range; 147 * This takes care of making the pages present too.
149 * else mlock the range of pages. This takes care of making the pages present ,
150 * too.
151 * 148 *
152 * return 0 on success, negative error code on error. 149 * return 0 on success, negative error code on error.
153 * 150 *
154 * vma->vm_mm->mmap_sem must be held for at least read. 151 * vma->vm_mm->mmap_sem must be held for at least read.
155 */ 152 */
156static long __mlock_vma_pages_range(struct vm_area_struct *vma, 153static long __mlock_vma_pages_range(struct vm_area_struct *vma,
157 unsigned long start, unsigned long end, 154 unsigned long start, unsigned long end)
158 int mlock)
159{ 155{
160 struct mm_struct *mm = vma->vm_mm; 156 struct mm_struct *mm = vma->vm_mm;
161 unsigned long addr = start; 157 unsigned long addr = start;
162 struct page *pages[16]; /* 16 gives a reasonable batch */ 158 struct page *pages[16]; /* 16 gives a reasonable batch */
163 int nr_pages = (end - start) / PAGE_SIZE; 159 int nr_pages = (end - start) / PAGE_SIZE;
164 int ret = 0; 160 int ret = 0;
165 int gup_flags = 0; 161 int gup_flags;
166 162
167 VM_BUG_ON(start & ~PAGE_MASK); 163 VM_BUG_ON(start & ~PAGE_MASK);
168 VM_BUG_ON(end & ~PAGE_MASK); 164 VM_BUG_ON(end & ~PAGE_MASK);
169 VM_BUG_ON(start < vma->vm_start); 165 VM_BUG_ON(start < vma->vm_start);
170 VM_BUG_ON(end > vma->vm_end); 166 VM_BUG_ON(end > vma->vm_end);
171 VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) && 167 VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
172 (atomic_read(&mm->mm_users) != 0));
173
174 /*
175 * mlock: don't page populate if vma has PROT_NONE permission.
176 * munlock: always do munlock although the vma has PROT_NONE
177 * permission, or SIGKILL is pending.
178 */
179 if (!mlock)
180 gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS |
181 GUP_FLAGS_IGNORE_SIGKILL;
182 168
169 gup_flags = 0;
183 if (vma->vm_flags & VM_WRITE) 170 if (vma->vm_flags & VM_WRITE)
184 gup_flags |= GUP_FLAGS_WRITE; 171 gup_flags = GUP_FLAGS_WRITE;
185 172
186 while (nr_pages > 0) { 173 while (nr_pages > 0) {
187 int i; 174 int i;
@@ -201,19 +188,10 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
201 * This can happen for, e.g., VM_NONLINEAR regions before 188 * This can happen for, e.g., VM_NONLINEAR regions before
202 * a page has been allocated and mapped at a given offset, 189 * a page has been allocated and mapped at a given offset,
203 * or for addresses that map beyond end of a file. 190 * or for addresses that map beyond end of a file.
204 * We'll mlock the the pages if/when they get faulted in. 191 * We'll mlock the pages if/when they get faulted in.
205 */ 192 */
206 if (ret < 0) 193 if (ret < 0)
207 break; 194 break;
208 if (ret == 0) {
209 /*
210 * We know the vma is there, so the only time
211 * we cannot get a single page should be an
212 * error (ret < 0) case.
213 */
214 WARN_ON(1);
215 break;
216 }
217 195
218 lru_add_drain(); /* push cached pages to LRU */ 196 lru_add_drain(); /* push cached pages to LRU */
219 197
@@ -224,28 +202,22 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
224 /* 202 /*
225 * Because we lock page here and migration is blocked 203 * Because we lock page here and migration is blocked
226 * by the elevated reference, we need only check for 204 * by the elevated reference, we need only check for
227 * page truncation (file-cache only). 205 * file-cache page truncation. This page->mapping
206 * check also neatly skips over the ZERO_PAGE(),
207 * though if that's common we'd prefer not to lock it.
228 */ 208 */
229 if (page->mapping) { 209 if (page->mapping)
230 if (mlock) 210 mlock_vma_page(page);
231 mlock_vma_page(page);
232 else
233 munlock_vma_page(page);
234 }
235 unlock_page(page); 211 unlock_page(page);
236 put_page(page); /* ref from get_user_pages() */ 212 put_page(page); /* ref from get_user_pages() */
237
238 /*
239 * here we assume that get_user_pages() has given us
240 * a list of virtually contiguous pages.
241 */
242 addr += PAGE_SIZE; /* for next get_user_pages() */
243 nr_pages--;
244 } 213 }
214
215 addr += ret * PAGE_SIZE;
216 nr_pages -= ret;
245 ret = 0; 217 ret = 0;
246 } 218 }
247 219
248 return ret; /* count entire vma as locked_vm */ 220 return ret; /* 0 or negative error code */
249} 221}
250 222
251/* 223/*
@@ -289,7 +261,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
289 is_vm_hugetlb_page(vma) || 261 is_vm_hugetlb_page(vma) ||
290 vma == get_gate_vma(current))) { 262 vma == get_gate_vma(current))) {
291 263
292 __mlock_vma_pages_range(vma, start, end, 1); 264 __mlock_vma_pages_range(vma, start, end);
293 265
294 /* Hide errors from mmap() and other callers */ 266 /* Hide errors from mmap() and other callers */
295 return 0; 267 return 0;
@@ -310,7 +282,6 @@ no_mlock:
310 return nr_pages; /* error or pages NOT mlocked */ 282 return nr_pages; /* error or pages NOT mlocked */
311} 283}
312 284
313
314/* 285/*
315 * munlock_vma_pages_range() - munlock all pages in the vma range.' 286 * munlock_vma_pages_range() - munlock all pages in the vma range.'
316 * @vma - vma containing range to be munlock()ed. 287 * @vma - vma containing range to be munlock()ed.
@@ -330,10 +301,24 @@ no_mlock:
330 * free them. This will result in freeing mlocked pages. 301 * free them. This will result in freeing mlocked pages.
331 */ 302 */
332void munlock_vma_pages_range(struct vm_area_struct *vma, 303void munlock_vma_pages_range(struct vm_area_struct *vma,
333 unsigned long start, unsigned long end) 304 unsigned long start, unsigned long end)
334{ 305{
306 unsigned long addr;
307
308 lru_add_drain();
335 vma->vm_flags &= ~VM_LOCKED; 309 vma->vm_flags &= ~VM_LOCKED;
336 __mlock_vma_pages_range(vma, start, end, 0); 310
311 for (addr = start; addr < end; addr += PAGE_SIZE) {
312 struct page *page = follow_page(vma, addr, FOLL_GET);
313 if (page) {
314 lock_page(page);
315 if (page->mapping)
316 munlock_vma_page(page);
317 unlock_page(page);
318 put_page(page);
319 }
320 cond_resched();
321 }
337} 322}
338 323
339/* 324/*
@@ -400,18 +385,14 @@ success:
400 * It's okay if try_to_unmap_one unmaps a page just after we 385 * It's okay if try_to_unmap_one unmaps a page just after we
401 * set VM_LOCKED, __mlock_vma_pages_range will bring it back. 386 * set VM_LOCKED, __mlock_vma_pages_range will bring it back.
402 */ 387 */
403 vma->vm_flags = newflags;
404 388
405 if (lock) { 389 if (lock) {
406 ret = __mlock_vma_pages_range(vma, start, end, 1); 390 vma->vm_flags = newflags;
407 391 ret = __mlock_vma_pages_range(vma, start, end);
408 if (ret > 0) { 392 if (ret < 0)
409 mm->locked_vm -= ret; 393 ret = __mlock_posix_error_return(ret);
410 ret = 0;
411 } else
412 ret = __mlock_posix_error_return(ret); /* translate if needed */
413 } else { 394 } else {
414 __mlock_vma_pages_range(vma, start, end, 0); 395 munlock_vma_pages_range(vma, start, end);
415 } 396 }
416 397
417out: 398out: