aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mlock.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mlock.c')
-rw-r--r--mm/mlock.c188
1 files changed, 91 insertions, 97 deletions
diff --git a/mm/mlock.c b/mm/mlock.c
index b70919ce4f72..048260c4e02e 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -135,13 +135,6 @@ void munlock_vma_page(struct page *page)
135 } 135 }
136} 136}
137 137
138static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
139{
140 return (vma->vm_flags & VM_GROWSDOWN) &&
141 (vma->vm_start == addr) &&
142 !vma_stack_continue(vma->vm_prev, addr);
143}
144
145/** 138/**
146 * __mlock_vma_pages_range() - mlock a range of pages in the vma. 139 * __mlock_vma_pages_range() - mlock a range of pages in the vma.
147 * @vma: target vma 140 * @vma: target vma
@@ -155,13 +148,12 @@ static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long add
155 * vma->vm_mm->mmap_sem must be held for at least read. 148 * vma->vm_mm->mmap_sem must be held for at least read.
156 */ 149 */
157static long __mlock_vma_pages_range(struct vm_area_struct *vma, 150static long __mlock_vma_pages_range(struct vm_area_struct *vma,
158 unsigned long start, unsigned long end) 151 unsigned long start, unsigned long end,
152 int *nonblocking)
159{ 153{
160 struct mm_struct *mm = vma->vm_mm; 154 struct mm_struct *mm = vma->vm_mm;
161 unsigned long addr = start; 155 unsigned long addr = start;
162 struct page *pages[16]; /* 16 gives a reasonable batch */
163 int nr_pages = (end - start) / PAGE_SIZE; 156 int nr_pages = (end - start) / PAGE_SIZE;
164 int ret = 0;
165 int gup_flags; 157 int gup_flags;
166 158
167 VM_BUG_ON(start & ~PAGE_MASK); 159 VM_BUG_ON(start & ~PAGE_MASK);
@@ -170,73 +162,24 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
170 VM_BUG_ON(end > vma->vm_end); 162 VM_BUG_ON(end > vma->vm_end);
171 VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); 163 VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
172 164
173 gup_flags = FOLL_TOUCH | FOLL_GET; 165 gup_flags = FOLL_TOUCH | FOLL_MLOCK;
174 if (vma->vm_flags & VM_WRITE) 166 /*
167 * We want to touch writable mappings with a write fault in order
168 * to break COW, except for shared mappings because these don't COW
169 * and we would not want to dirty them for nothing.
170 */
171 if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
175 gup_flags |= FOLL_WRITE; 172 gup_flags |= FOLL_WRITE;
176 173
177 /* We don't try to access the guard page of a stack vma */ 174 /*
178 if (stack_guard_page(vma, start)) { 175 * We want mlock to succeed for regions that have any permissions
179 addr += PAGE_SIZE; 176 * other than PROT_NONE.
180 nr_pages--; 177 */
181 } 178 if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
182 179 gup_flags |= FOLL_FORCE;
183 while (nr_pages > 0) {
184 int i;
185
186 cond_resched();
187
188 /*
189 * get_user_pages makes pages present if we are
190 * setting mlock. and this extra reference count will
191 * disable migration of this page. However, page may
192 * still be truncated out from under us.
193 */
194 ret = __get_user_pages(current, mm, addr,
195 min_t(int, nr_pages, ARRAY_SIZE(pages)),
196 gup_flags, pages, NULL);
197 /*
198 * This can happen for, e.g., VM_NONLINEAR regions before
199 * a page has been allocated and mapped at a given offset,
200 * or for addresses that map beyond end of a file.
201 * We'll mlock the pages if/when they get faulted in.
202 */
203 if (ret < 0)
204 break;
205
206 lru_add_drain(); /* push cached pages to LRU */
207
208 for (i = 0; i < ret; i++) {
209 struct page *page = pages[i];
210
211 if (page->mapping) {
212 /*
213 * That preliminary check is mainly to avoid
214 * the pointless overhead of lock_page on the
215 * ZERO_PAGE: which might bounce very badly if
216 * there is contention. However, we're still
217 * dirtying its cacheline with get/put_page:
218 * we'll add another __get_user_pages flag to
219 * avoid it if that case turns out to matter.
220 */
221 lock_page(page);
222 /*
223 * Because we lock page here and migration is
224 * blocked by the elevated reference, we need
225 * only check for file-cache page truncation.
226 */
227 if (page->mapping)
228 mlock_vma_page(page);
229 unlock_page(page);
230 }
231 put_page(page); /* ref from get_user_pages() */
232 }
233
234 addr += ret * PAGE_SIZE;
235 nr_pages -= ret;
236 ret = 0;
237 }
238 180
239 return ret; /* 0 or negative error code */ 181 return __get_user_pages(current, mm, addr, nr_pages, gup_flags,
182 NULL, NULL, nonblocking);
240} 183}
241 184
242/* 185/*
@@ -278,9 +221,9 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
278 221
279 if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || 222 if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
280 is_vm_hugetlb_page(vma) || 223 is_vm_hugetlb_page(vma) ||
281 vma == get_gate_vma(current))) { 224 vma == get_gate_vma(current->mm))) {
282 225
283 __mlock_vma_pages_range(vma, start, end); 226 __mlock_vma_pages_range(vma, start, end, NULL);
284 227
285 /* Hide errors from mmap() and other callers */ 228 /* Hide errors from mmap() and other callers */
286 return 0; 229 return 0;
@@ -364,26 +307,18 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
364 * For vmas that pass the filters, merge/split as appropriate. 307 * For vmas that pass the filters, merge/split as appropriate.
365 */ 308 */
366static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, 309static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
367 unsigned long start, unsigned long end, unsigned int newflags) 310 unsigned long start, unsigned long end, vm_flags_t newflags)
368{ 311{
369 struct mm_struct *mm = vma->vm_mm; 312 struct mm_struct *mm = vma->vm_mm;
370 pgoff_t pgoff; 313 pgoff_t pgoff;
371 int nr_pages; 314 int nr_pages;
372 int ret = 0; 315 int ret = 0;
373 int lock = newflags & VM_LOCKED; 316 int lock = !!(newflags & VM_LOCKED);
374 317
375 if (newflags == vma->vm_flags || 318 if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
376 (vma->vm_flags & (VM_IO | VM_PFNMAP))) 319 is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm))
377 goto out; /* don't set VM_LOCKED, don't count */ 320 goto out; /* don't set VM_LOCKED, don't count */
378 321
379 if ((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
380 is_vm_hugetlb_page(vma) ||
381 vma == get_gate_vma(current)) {
382 if (lock)
383 make_pages_present(start, end);
384 goto out; /* don't set VM_LOCKED, don't count */
385 }
386
387 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 322 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
388 *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, 323 *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
389 vma->vm_file, pgoff, vma_policy(vma)); 324 vma->vm_file, pgoff, vma_policy(vma));
@@ -419,14 +354,10 @@ success:
419 * set VM_LOCKED, __mlock_vma_pages_range will bring it back. 354 * set VM_LOCKED, __mlock_vma_pages_range will bring it back.
420 */ 355 */
421 356
422 if (lock) { 357 if (lock)
423 vma->vm_flags = newflags; 358 vma->vm_flags = newflags;
424 ret = __mlock_vma_pages_range(vma, start, end); 359 else
425 if (ret < 0)
426 ret = __mlock_posix_error_return(ret);
427 } else {
428 munlock_vma_pages_range(vma, start, end); 360 munlock_vma_pages_range(vma, start, end);
429 }
430 361
431out: 362out:
432 *prev = vma; 363 *prev = vma;
@@ -439,7 +370,8 @@ static int do_mlock(unsigned long start, size_t len, int on)
439 struct vm_area_struct * vma, * prev; 370 struct vm_area_struct * vma, * prev;
440 int error; 371 int error;
441 372
442 len = PAGE_ALIGN(len); 373 VM_BUG_ON(start & ~PAGE_MASK);
374 VM_BUG_ON(len != PAGE_ALIGN(len));
443 end = start + len; 375 end = start + len;
444 if (end < start) 376 if (end < start)
445 return -EINVAL; 377 return -EINVAL;
@@ -453,7 +385,7 @@ static int do_mlock(unsigned long start, size_t len, int on)
453 prev = vma; 385 prev = vma;
454 386
455 for (nstart = start ; ; ) { 387 for (nstart = start ; ; ) {
456 unsigned int newflags; 388 vm_flags_t newflags;
457 389
458 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ 390 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */
459 391
@@ -482,6 +414,62 @@ static int do_mlock(unsigned long start, size_t len, int on)
482 return error; 414 return error;
483} 415}
484 416
417static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors)
418{
419 struct mm_struct *mm = current->mm;
420 unsigned long end, nstart, nend;
421 struct vm_area_struct *vma = NULL;
422 int locked = 0;
423 int ret = 0;
424
425 VM_BUG_ON(start & ~PAGE_MASK);
426 VM_BUG_ON(len != PAGE_ALIGN(len));
427 end = start + len;
428
429 for (nstart = start; nstart < end; nstart = nend) {
430 /*
431 * We want to fault in pages for [nstart; end) address range.
432 * Find first corresponding VMA.
433 */
434 if (!locked) {
435 locked = 1;
436 down_read(&mm->mmap_sem);
437 vma = find_vma(mm, nstart);
438 } else if (nstart >= vma->vm_end)
439 vma = vma->vm_next;
440 if (!vma || vma->vm_start >= end)
441 break;
442 /*
443 * Set [nstart; nend) to intersection of desired address
444 * range with the first VMA. Also, skip undesirable VMA types.
445 */
446 nend = min(end, vma->vm_end);
447 if (vma->vm_flags & (VM_IO | VM_PFNMAP))
448 continue;
449 if (nstart < vma->vm_start)
450 nstart = vma->vm_start;
451 /*
452 * Now fault in a range of pages. __mlock_vma_pages_range()
453 * double checks the vma flags, so that it won't mlock pages
454 * if the vma was already munlocked.
455 */
456 ret = __mlock_vma_pages_range(vma, nstart, nend, &locked);
457 if (ret < 0) {
458 if (ignore_errors) {
459 ret = 0;
460 continue; /* continue at next VMA */
461 }
462 ret = __mlock_posix_error_return(ret);
463 break;
464 }
465 nend = nstart + ret * PAGE_SIZE;
466 ret = 0;
467 }
468 if (locked)
469 up_read(&mm->mmap_sem);
470 return ret; /* 0 or negative error code */
471}
472
485SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) 473SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
486{ 474{
487 unsigned long locked; 475 unsigned long locked;
@@ -507,6 +495,8 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
507 if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) 495 if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
508 error = do_mlock(start, len, 1); 496 error = do_mlock(start, len, 1);
509 up_write(&current->mm->mmap_sem); 497 up_write(&current->mm->mmap_sem);
498 if (!error)
499 error = do_mlock_pages(start, len, 0);
510 return error; 500 return error;
511} 501}
512 502
@@ -534,7 +524,7 @@ static int do_mlockall(int flags)
534 goto out; 524 goto out;
535 525
536 for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { 526 for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
537 unsigned int newflags; 527 vm_flags_t newflags;
538 528
539 newflags = vma->vm_flags | VM_LOCKED; 529 newflags = vma->vm_flags | VM_LOCKED;
540 if (!(flags & MCL_CURRENT)) 530 if (!(flags & MCL_CURRENT))
@@ -571,6 +561,10 @@ SYSCALL_DEFINE1(mlockall, int, flags)
571 capable(CAP_IPC_LOCK)) 561 capable(CAP_IPC_LOCK))
572 ret = do_mlockall(flags); 562 ret = do_mlockall(flags);
573 up_write(&current->mm->mmap_sem); 563 up_write(&current->mm->mmap_sem);
564 if (!ret && (flags & MCL_CURRENT)) {
565 /* Ignore errors */
566 do_mlock_pages(0, TASK_SIZE, 1);
567 }
574out: 568out:
575 return ret; 569 return ret;
576} 570}