aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mlock.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mlock.c')
-rw-r--r--mm/mlock.c163
1 files changed, 83 insertions, 80 deletions
diff --git a/mm/mlock.c b/mm/mlock.c
index b70919ce4f72..13e81ee8be9d 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -155,13 +155,12 @@ static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long add
155 * vma->vm_mm->mmap_sem must be held for at least read. 155 * vma->vm_mm->mmap_sem must be held for at least read.
156 */ 156 */
157static long __mlock_vma_pages_range(struct vm_area_struct *vma, 157static long __mlock_vma_pages_range(struct vm_area_struct *vma,
158 unsigned long start, unsigned long end) 158 unsigned long start, unsigned long end,
159 int *nonblocking)
159{ 160{
160 struct mm_struct *mm = vma->vm_mm; 161 struct mm_struct *mm = vma->vm_mm;
161 unsigned long addr = start; 162 unsigned long addr = start;
162 struct page *pages[16]; /* 16 gives a reasonable batch */
163 int nr_pages = (end - start) / PAGE_SIZE; 163 int nr_pages = (end - start) / PAGE_SIZE;
164 int ret = 0;
165 int gup_flags; 164 int gup_flags;
166 165
167 VM_BUG_ON(start & ~PAGE_MASK); 166 VM_BUG_ON(start & ~PAGE_MASK);
@@ -170,73 +169,26 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
170 VM_BUG_ON(end > vma->vm_end); 169 VM_BUG_ON(end > vma->vm_end);
171 VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); 170 VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
172 171
173 gup_flags = FOLL_TOUCH | FOLL_GET; 172 gup_flags = FOLL_TOUCH;
174 if (vma->vm_flags & VM_WRITE) 173 /*
174 * We want to touch writable mappings with a write fault in order
175 * to break COW, except for shared mappings because these don't COW
176 * and we would not want to dirty them for nothing.
177 */
178 if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
175 gup_flags |= FOLL_WRITE; 179 gup_flags |= FOLL_WRITE;
176 180
181 if (vma->vm_flags & VM_LOCKED)
182 gup_flags |= FOLL_MLOCK;
183
177 /* We don't try to access the guard page of a stack vma */ 184 /* We don't try to access the guard page of a stack vma */
178 if (stack_guard_page(vma, start)) { 185 if (stack_guard_page(vma, start)) {
179 addr += PAGE_SIZE; 186 addr += PAGE_SIZE;
180 nr_pages--; 187 nr_pages--;
181 } 188 }
182 189
183 while (nr_pages > 0) { 190 return __get_user_pages(current, mm, addr, nr_pages, gup_flags,
184 int i; 191 NULL, NULL, nonblocking);
185
186 cond_resched();
187
188 /*
189 * get_user_pages makes pages present if we are
190 * setting mlock. and this extra reference count will
191 * disable migration of this page. However, page may
192 * still be truncated out from under us.
193 */
194 ret = __get_user_pages(current, mm, addr,
195 min_t(int, nr_pages, ARRAY_SIZE(pages)),
196 gup_flags, pages, NULL);
197 /*
198 * This can happen for, e.g., VM_NONLINEAR regions before
199 * a page has been allocated and mapped at a given offset,
200 * or for addresses that map beyond end of a file.
201 * We'll mlock the pages if/when they get faulted in.
202 */
203 if (ret < 0)
204 break;
205
206 lru_add_drain(); /* push cached pages to LRU */
207
208 for (i = 0; i < ret; i++) {
209 struct page *page = pages[i];
210
211 if (page->mapping) {
212 /*
213 * That preliminary check is mainly to avoid
214 * the pointless overhead of lock_page on the
215 * ZERO_PAGE: which might bounce very badly if
216 * there is contention. However, we're still
217 * dirtying its cacheline with get/put_page:
218 * we'll add another __get_user_pages flag to
219 * avoid it if that case turns out to matter.
220 */
221 lock_page(page);
222 /*
223 * Because we lock page here and migration is
224 * blocked by the elevated reference, we need
225 * only check for file-cache page truncation.
226 */
227 if (page->mapping)
228 mlock_vma_page(page);
229 unlock_page(page);
230 }
231 put_page(page); /* ref from get_user_pages() */
232 }
233
234 addr += ret * PAGE_SIZE;
235 nr_pages -= ret;
236 ret = 0;
237 }
238
239 return ret; /* 0 or negative error code */
240} 192}
241 193
242/* 194/*
@@ -280,7 +232,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
280 is_vm_hugetlb_page(vma) || 232 is_vm_hugetlb_page(vma) ||
281 vma == get_gate_vma(current))) { 233 vma == get_gate_vma(current))) {
282 234
283 __mlock_vma_pages_range(vma, start, end); 235 __mlock_vma_pages_range(vma, start, end, NULL);
284 236
285 /* Hide errors from mmap() and other callers */ 237 /* Hide errors from mmap() and other callers */
286 return 0; 238 return 0;
@@ -372,18 +324,10 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
372 int ret = 0; 324 int ret = 0;
373 int lock = newflags & VM_LOCKED; 325 int lock = newflags & VM_LOCKED;
374 326
375 if (newflags == vma->vm_flags || 327 if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
376 (vma->vm_flags & (VM_IO | VM_PFNMAP))) 328 is_vm_hugetlb_page(vma) || vma == get_gate_vma(current))
377 goto out; /* don't set VM_LOCKED, don't count */ 329 goto out; /* don't set VM_LOCKED, don't count */
378 330
379 if ((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
380 is_vm_hugetlb_page(vma) ||
381 vma == get_gate_vma(current)) {
382 if (lock)
383 make_pages_present(start, end);
384 goto out; /* don't set VM_LOCKED, don't count */
385 }
386
387 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 331 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
388 *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, 332 *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
389 vma->vm_file, pgoff, vma_policy(vma)); 333 vma->vm_file, pgoff, vma_policy(vma));
@@ -419,14 +363,10 @@ success:
419 * set VM_LOCKED, __mlock_vma_pages_range will bring it back. 363 * set VM_LOCKED, __mlock_vma_pages_range will bring it back.
420 */ 364 */
421 365
422 if (lock) { 366 if (lock)
423 vma->vm_flags = newflags; 367 vma->vm_flags = newflags;
424 ret = __mlock_vma_pages_range(vma, start, end); 368 else
425 if (ret < 0)
426 ret = __mlock_posix_error_return(ret);
427 } else {
428 munlock_vma_pages_range(vma, start, end); 369 munlock_vma_pages_range(vma, start, end);
429 }
430 370
431out: 371out:
432 *prev = vma; 372 *prev = vma;
@@ -439,7 +379,8 @@ static int do_mlock(unsigned long start, size_t len, int on)
439 struct vm_area_struct * vma, * prev; 379 struct vm_area_struct * vma, * prev;
440 int error; 380 int error;
441 381
442 len = PAGE_ALIGN(len); 382 VM_BUG_ON(start & ~PAGE_MASK);
383 VM_BUG_ON(len != PAGE_ALIGN(len));
443 end = start + len; 384 end = start + len;
444 if (end < start) 385 if (end < start)
445 return -EINVAL; 386 return -EINVAL;
@@ -482,6 +423,62 @@ static int do_mlock(unsigned long start, size_t len, int on)
482 return error; 423 return error;
483} 424}
484 425
426static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors)
427{
428 struct mm_struct *mm = current->mm;
429 unsigned long end, nstart, nend;
430 struct vm_area_struct *vma = NULL;
431 int locked = 0;
432 int ret = 0;
433
434 VM_BUG_ON(start & ~PAGE_MASK);
435 VM_BUG_ON(len != PAGE_ALIGN(len));
436 end = start + len;
437
438 for (nstart = start; nstart < end; nstart = nend) {
439 /*
440 * We want to fault in pages for [nstart; end) address range.
441 * Find first corresponding VMA.
442 */
443 if (!locked) {
444 locked = 1;
445 down_read(&mm->mmap_sem);
446 vma = find_vma(mm, nstart);
447 } else if (nstart >= vma->vm_end)
448 vma = vma->vm_next;
449 if (!vma || vma->vm_start >= end)
450 break;
451 /*
452 * Set [nstart; nend) to intersection of desired address
453 * range with the first VMA. Also, skip undesirable VMA types.
454 */
455 nend = min(end, vma->vm_end);
456 if (vma->vm_flags & (VM_IO | VM_PFNMAP))
457 continue;
458 if (nstart < vma->vm_start)
459 nstart = vma->vm_start;
460 /*
461 * Now fault in a range of pages. __mlock_vma_pages_range()
462 * double checks the vma flags, so that it won't mlock pages
463 * if the vma was already munlocked.
464 */
465 ret = __mlock_vma_pages_range(vma, nstart, nend, &locked);
466 if (ret < 0) {
467 if (ignore_errors) {
468 ret = 0;
469 continue; /* continue at next VMA */
470 }
471 ret = __mlock_posix_error_return(ret);
472 break;
473 }
474 nend = nstart + ret * PAGE_SIZE;
475 ret = 0;
476 }
477 if (locked)
478 up_read(&mm->mmap_sem);
479 return ret; /* 0 or negative error code */
480}
481
485SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) 482SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
486{ 483{
487 unsigned long locked; 484 unsigned long locked;
@@ -507,6 +504,8 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
507 if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) 504 if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
508 error = do_mlock(start, len, 1); 505 error = do_mlock(start, len, 1);
509 up_write(&current->mm->mmap_sem); 506 up_write(&current->mm->mmap_sem);
507 if (!error)
508 error = do_mlock_pages(start, len, 0);
510 return error; 509 return error;
511} 510}
512 511
@@ -571,6 +570,10 @@ SYSCALL_DEFINE1(mlockall, int, flags)
571 capable(CAP_IPC_LOCK)) 570 capable(CAP_IPC_LOCK))
572 ret = do_mlockall(flags); 571 ret = do_mlockall(flags);
573 up_write(&current->mm->mmap_sem); 572 up_write(&current->mm->mmap_sem);
573 if (!ret && (flags & MCL_CURRENT)) {
574 /* Ignore errors */
575 do_mlock_pages(0, TASK_SIZE, 1);
576 }
574out: 577out:
575 return ret; 578 return ret;
576} 579}