diff options
Diffstat (limited to 'mm/mlock.c')
-rw-r--r-- | mm/mlock.c | 163 |
1 files changed, 83 insertions, 80 deletions
diff --git a/mm/mlock.c b/mm/mlock.c index b70919ce4f72..13e81ee8be9d 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -155,13 +155,12 @@ static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long add | |||
155 | * vma->vm_mm->mmap_sem must be held for at least read. | 155 | * vma->vm_mm->mmap_sem must be held for at least read. |
156 | */ | 156 | */ |
157 | static long __mlock_vma_pages_range(struct vm_area_struct *vma, | 157 | static long __mlock_vma_pages_range(struct vm_area_struct *vma, |
158 | unsigned long start, unsigned long end) | 158 | unsigned long start, unsigned long end, |
159 | int *nonblocking) | ||
159 | { | 160 | { |
160 | struct mm_struct *mm = vma->vm_mm; | 161 | struct mm_struct *mm = vma->vm_mm; |
161 | unsigned long addr = start; | 162 | unsigned long addr = start; |
162 | struct page *pages[16]; /* 16 gives a reasonable batch */ | ||
163 | int nr_pages = (end - start) / PAGE_SIZE; | 163 | int nr_pages = (end - start) / PAGE_SIZE; |
164 | int ret = 0; | ||
165 | int gup_flags; | 164 | int gup_flags; |
166 | 165 | ||
167 | VM_BUG_ON(start & ~PAGE_MASK); | 166 | VM_BUG_ON(start & ~PAGE_MASK); |
@@ -170,73 +169,26 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
170 | VM_BUG_ON(end > vma->vm_end); | 169 | VM_BUG_ON(end > vma->vm_end); |
171 | VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); | 170 | VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); |
172 | 171 | ||
173 | gup_flags = FOLL_TOUCH | FOLL_GET; | 172 | gup_flags = FOLL_TOUCH; |
174 | if (vma->vm_flags & VM_WRITE) | 173 | /* |
174 | * We want to touch writable mappings with a write fault in order | ||
175 | * to break COW, except for shared mappings because these don't COW | ||
176 | * and we would not want to dirty them for nothing. | ||
177 | */ | ||
178 | if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) | ||
175 | gup_flags |= FOLL_WRITE; | 179 | gup_flags |= FOLL_WRITE; |
176 | 180 | ||
181 | if (vma->vm_flags & VM_LOCKED) | ||
182 | gup_flags |= FOLL_MLOCK; | ||
183 | |||
177 | /* We don't try to access the guard page of a stack vma */ | 184 | /* We don't try to access the guard page of a stack vma */ |
178 | if (stack_guard_page(vma, start)) { | 185 | if (stack_guard_page(vma, start)) { |
179 | addr += PAGE_SIZE; | 186 | addr += PAGE_SIZE; |
180 | nr_pages--; | 187 | nr_pages--; |
181 | } | 188 | } |
182 | 189 | ||
183 | while (nr_pages > 0) { | 190 | return __get_user_pages(current, mm, addr, nr_pages, gup_flags, |
184 | int i; | 191 | NULL, NULL, nonblocking); |
185 | |||
186 | cond_resched(); | ||
187 | |||
188 | /* | ||
189 | * get_user_pages makes pages present if we are | ||
190 | * setting mlock. and this extra reference count will | ||
191 | * disable migration of this page. However, page may | ||
192 | * still be truncated out from under us. | ||
193 | */ | ||
194 | ret = __get_user_pages(current, mm, addr, | ||
195 | min_t(int, nr_pages, ARRAY_SIZE(pages)), | ||
196 | gup_flags, pages, NULL); | ||
197 | /* | ||
198 | * This can happen for, e.g., VM_NONLINEAR regions before | ||
199 | * a page has been allocated and mapped at a given offset, | ||
200 | * or for addresses that map beyond end of a file. | ||
201 | * We'll mlock the pages if/when they get faulted in. | ||
202 | */ | ||
203 | if (ret < 0) | ||
204 | break; | ||
205 | |||
206 | lru_add_drain(); /* push cached pages to LRU */ | ||
207 | |||
208 | for (i = 0; i < ret; i++) { | ||
209 | struct page *page = pages[i]; | ||
210 | |||
211 | if (page->mapping) { | ||
212 | /* | ||
213 | * That preliminary check is mainly to avoid | ||
214 | * the pointless overhead of lock_page on the | ||
215 | * ZERO_PAGE: which might bounce very badly if | ||
216 | * there is contention. However, we're still | ||
217 | * dirtying its cacheline with get/put_page: | ||
218 | * we'll add another __get_user_pages flag to | ||
219 | * avoid it if that case turns out to matter. | ||
220 | */ | ||
221 | lock_page(page); | ||
222 | /* | ||
223 | * Because we lock page here and migration is | ||
224 | * blocked by the elevated reference, we need | ||
225 | * only check for file-cache page truncation. | ||
226 | */ | ||
227 | if (page->mapping) | ||
228 | mlock_vma_page(page); | ||
229 | unlock_page(page); | ||
230 | } | ||
231 | put_page(page); /* ref from get_user_pages() */ | ||
232 | } | ||
233 | |||
234 | addr += ret * PAGE_SIZE; | ||
235 | nr_pages -= ret; | ||
236 | ret = 0; | ||
237 | } | ||
238 | |||
239 | return ret; /* 0 or negative error code */ | ||
240 | } | 192 | } |
241 | 193 | ||
242 | /* | 194 | /* |
@@ -280,7 +232,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma, | |||
280 | is_vm_hugetlb_page(vma) || | 232 | is_vm_hugetlb_page(vma) || |
281 | vma == get_gate_vma(current))) { | 233 | vma == get_gate_vma(current))) { |
282 | 234 | ||
283 | __mlock_vma_pages_range(vma, start, end); | 235 | __mlock_vma_pages_range(vma, start, end, NULL); |
284 | 236 | ||
285 | /* Hide errors from mmap() and other callers */ | 237 | /* Hide errors from mmap() and other callers */ |
286 | return 0; | 238 | return 0; |
@@ -372,18 +324,10 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, | |||
372 | int ret = 0; | 324 | int ret = 0; |
373 | int lock = newflags & VM_LOCKED; | 325 | int lock = newflags & VM_LOCKED; |
374 | 326 | ||
375 | if (newflags == vma->vm_flags || | 327 | if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || |
376 | (vma->vm_flags & (VM_IO | VM_PFNMAP))) | 328 | is_vm_hugetlb_page(vma) || vma == get_gate_vma(current)) |
377 | goto out; /* don't set VM_LOCKED, don't count */ | 329 | goto out; /* don't set VM_LOCKED, don't count */ |
378 | 330 | ||
379 | if ((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || | ||
380 | is_vm_hugetlb_page(vma) || | ||
381 | vma == get_gate_vma(current)) { | ||
382 | if (lock) | ||
383 | make_pages_present(start, end); | ||
384 | goto out; /* don't set VM_LOCKED, don't count */ | ||
385 | } | ||
386 | |||
387 | pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); | 331 | pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); |
388 | *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, | 332 | *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, |
389 | vma->vm_file, pgoff, vma_policy(vma)); | 333 | vma->vm_file, pgoff, vma_policy(vma)); |
@@ -419,14 +363,10 @@ success: | |||
419 | * set VM_LOCKED, __mlock_vma_pages_range will bring it back. | 363 | * set VM_LOCKED, __mlock_vma_pages_range will bring it back. |
420 | */ | 364 | */ |
421 | 365 | ||
422 | if (lock) { | 366 | if (lock) |
423 | vma->vm_flags = newflags; | 367 | vma->vm_flags = newflags; |
424 | ret = __mlock_vma_pages_range(vma, start, end); | 368 | else |
425 | if (ret < 0) | ||
426 | ret = __mlock_posix_error_return(ret); | ||
427 | } else { | ||
428 | munlock_vma_pages_range(vma, start, end); | 369 | munlock_vma_pages_range(vma, start, end); |
429 | } | ||
430 | 370 | ||
431 | out: | 371 | out: |
432 | *prev = vma; | 372 | *prev = vma; |
@@ -439,7 +379,8 @@ static int do_mlock(unsigned long start, size_t len, int on) | |||
439 | struct vm_area_struct * vma, * prev; | 379 | struct vm_area_struct * vma, * prev; |
440 | int error; | 380 | int error; |
441 | 381 | ||
442 | len = PAGE_ALIGN(len); | 382 | VM_BUG_ON(start & ~PAGE_MASK); |
383 | VM_BUG_ON(len != PAGE_ALIGN(len)); | ||
443 | end = start + len; | 384 | end = start + len; |
444 | if (end < start) | 385 | if (end < start) |
445 | return -EINVAL; | 386 | return -EINVAL; |
@@ -482,6 +423,62 @@ static int do_mlock(unsigned long start, size_t len, int on) | |||
482 | return error; | 423 | return error; |
483 | } | 424 | } |
484 | 425 | ||
426 | static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors) | ||
427 | { | ||
428 | struct mm_struct *mm = current->mm; | ||
429 | unsigned long end, nstart, nend; | ||
430 | struct vm_area_struct *vma = NULL; | ||
431 | int locked = 0; | ||
432 | int ret = 0; | ||
433 | |||
434 | VM_BUG_ON(start & ~PAGE_MASK); | ||
435 | VM_BUG_ON(len != PAGE_ALIGN(len)); | ||
436 | end = start + len; | ||
437 | |||
438 | for (nstart = start; nstart < end; nstart = nend) { | ||
439 | /* | ||
440 | * We want to fault in pages for [nstart; end) address range. | ||
441 | * Find first corresponding VMA. | ||
442 | */ | ||
443 | if (!locked) { | ||
444 | locked = 1; | ||
445 | down_read(&mm->mmap_sem); | ||
446 | vma = find_vma(mm, nstart); | ||
447 | } else if (nstart >= vma->vm_end) | ||
448 | vma = vma->vm_next; | ||
449 | if (!vma || vma->vm_start >= end) | ||
450 | break; | ||
451 | /* | ||
452 | * Set [nstart; nend) to intersection of desired address | ||
453 | * range with the first VMA. Also, skip undesirable VMA types. | ||
454 | */ | ||
455 | nend = min(end, vma->vm_end); | ||
456 | if (vma->vm_flags & (VM_IO | VM_PFNMAP)) | ||
457 | continue; | ||
458 | if (nstart < vma->vm_start) | ||
459 | nstart = vma->vm_start; | ||
460 | /* | ||
461 | * Now fault in a range of pages. __mlock_vma_pages_range() | ||
462 | * double checks the vma flags, so that it won't mlock pages | ||
463 | * if the vma was already munlocked. | ||
464 | */ | ||
465 | ret = __mlock_vma_pages_range(vma, nstart, nend, &locked); | ||
466 | if (ret < 0) { | ||
467 | if (ignore_errors) { | ||
468 | ret = 0; | ||
469 | continue; /* continue at next VMA */ | ||
470 | } | ||
471 | ret = __mlock_posix_error_return(ret); | ||
472 | break; | ||
473 | } | ||
474 | nend = nstart + ret * PAGE_SIZE; | ||
475 | ret = 0; | ||
476 | } | ||
477 | if (locked) | ||
478 | up_read(&mm->mmap_sem); | ||
479 | return ret; /* 0 or negative error code */ | ||
480 | } | ||
481 | |||
485 | SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) | 482 | SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) |
486 | { | 483 | { |
487 | unsigned long locked; | 484 | unsigned long locked; |
@@ -507,6 +504,8 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) | |||
507 | if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) | 504 | if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) |
508 | error = do_mlock(start, len, 1); | 505 | error = do_mlock(start, len, 1); |
509 | up_write(¤t->mm->mmap_sem); | 506 | up_write(¤t->mm->mmap_sem); |
507 | if (!error) | ||
508 | error = do_mlock_pages(start, len, 0); | ||
510 | return error; | 509 | return error; |
511 | } | 510 | } |
512 | 511 | ||
@@ -571,6 +570,10 @@ SYSCALL_DEFINE1(mlockall, int, flags) | |||
571 | capable(CAP_IPC_LOCK)) | 570 | capable(CAP_IPC_LOCK)) |
572 | ret = do_mlockall(flags); | 571 | ret = do_mlockall(flags); |
573 | up_write(¤t->mm->mmap_sem); | 572 | up_write(¤t->mm->mmap_sem); |
573 | if (!ret && (flags & MCL_CURRENT)) { | ||
574 | /* Ignore errors */ | ||
575 | do_mlock_pages(0, TASK_SIZE, 1); | ||
576 | } | ||
574 | out: | 577 | out: |
575 | return ret; | 578 | return ret; |
576 | } | 579 | } |