aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mlock.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mlock.c')
-rw-r--r--mm/mlock.c128
1 files changed, 49 insertions, 79 deletions
diff --git a/mm/mlock.c b/mm/mlock.c
index c9bd528b01d2..79b7cf7d1bca 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -102,13 +102,16 @@ void mlock_vma_page(struct page *page)
102 * can't isolate the page, we leave it for putback_lru_page() and vmscan 102 * can't isolate the page, we leave it for putback_lru_page() and vmscan
103 * [page_referenced()/try_to_unmap()] to deal with. 103 * [page_referenced()/try_to_unmap()] to deal with.
104 */ 104 */
105void munlock_vma_page(struct page *page) 105unsigned int munlock_vma_page(struct page *page)
106{ 106{
107 unsigned int page_mask = 0;
108
107 BUG_ON(!PageLocked(page)); 109 BUG_ON(!PageLocked(page));
108 110
109 if (TestClearPageMlocked(page)) { 111 if (TestClearPageMlocked(page)) {
110 mod_zone_page_state(page_zone(page), NR_MLOCK, 112 unsigned int nr_pages = hpage_nr_pages(page);
111 -hpage_nr_pages(page)); 113 mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
114 page_mask = nr_pages - 1;
112 if (!isolate_lru_page(page)) { 115 if (!isolate_lru_page(page)) {
113 int ret = SWAP_AGAIN; 116 int ret = SWAP_AGAIN;
114 117
@@ -141,6 +144,8 @@ void munlock_vma_page(struct page *page)
141 count_vm_event(UNEVICTABLE_PGMUNLOCKED); 144 count_vm_event(UNEVICTABLE_PGMUNLOCKED);
142 } 145 }
143 } 146 }
147
148 return page_mask;
144} 149}
145 150
146/** 151/**
@@ -155,13 +160,11 @@ void munlock_vma_page(struct page *page)
155 * 160 *
156 * vma->vm_mm->mmap_sem must be held for at least read. 161 * vma->vm_mm->mmap_sem must be held for at least read.
157 */ 162 */
158static long __mlock_vma_pages_range(struct vm_area_struct *vma, 163long __mlock_vma_pages_range(struct vm_area_struct *vma,
159 unsigned long start, unsigned long end, 164 unsigned long start, unsigned long end, int *nonblocking)
160 int *nonblocking)
161{ 165{
162 struct mm_struct *mm = vma->vm_mm; 166 struct mm_struct *mm = vma->vm_mm;
163 unsigned long addr = start; 167 unsigned long nr_pages = (end - start) / PAGE_SIZE;
164 int nr_pages = (end - start) / PAGE_SIZE;
165 int gup_flags; 168 int gup_flags;
166 169
167 VM_BUG_ON(start & ~PAGE_MASK); 170 VM_BUG_ON(start & ~PAGE_MASK);
@@ -186,7 +189,11 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
186 if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) 189 if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
187 gup_flags |= FOLL_FORCE; 190 gup_flags |= FOLL_FORCE;
188 191
189 return __get_user_pages(current, mm, addr, nr_pages, gup_flags, 192 /*
193 * We made sure addr is within a VMA, so the following will
194 * not result in a stack expansion that recurses back here.
195 */
196 return __get_user_pages(current, mm, start, nr_pages, gup_flags,
190 NULL, NULL, nonblocking); 197 NULL, NULL, nonblocking);
191} 198}
192 199
@@ -202,56 +209,6 @@ static int __mlock_posix_error_return(long retval)
202 return retval; 209 return retval;
203} 210}
204 211
205/**
206 * mlock_vma_pages_range() - mlock pages in specified vma range.
207 * @vma - the vma containing the specfied address range
208 * @start - starting address in @vma to mlock
209 * @end - end address [+1] in @vma to mlock
210 *
211 * For mmap()/mremap()/expansion of mlocked vma.
212 *
213 * return 0 on success for "normal" vmas.
214 *
215 * return number of pages [> 0] to be removed from locked_vm on success
216 * of "special" vmas.
217 */
218long mlock_vma_pages_range(struct vm_area_struct *vma,
219 unsigned long start, unsigned long end)
220{
221 int nr_pages = (end - start) / PAGE_SIZE;
222 BUG_ON(!(vma->vm_flags & VM_LOCKED));
223
224 /*
225 * filter unlockable vmas
226 */
227 if (vma->vm_flags & (VM_IO | VM_PFNMAP))
228 goto no_mlock;
229
230 if (!((vma->vm_flags & VM_DONTEXPAND) ||
231 is_vm_hugetlb_page(vma) ||
232 vma == get_gate_vma(current->mm))) {
233
234 __mlock_vma_pages_range(vma, start, end, NULL);
235
236 /* Hide errors from mmap() and other callers */
237 return 0;
238 }
239
240 /*
241 * User mapped kernel pages or huge pages:
242 * make these pages present to populate the ptes, but
243 * fall thru' to reset VM_LOCKED--no need to unlock, and
244 * return nr_pages so these don't get counted against task's
245 * locked limit. huge pages are already counted against
246 * locked vm limit.
247 */
248 make_pages_present(start, end);
249
250no_mlock:
251 vma->vm_flags &= ~VM_LOCKED; /* and don't come back! */
252 return nr_pages; /* error or pages NOT mlocked */
253}
254
255/* 212/*
256 * munlock_vma_pages_range() - munlock all pages in the vma range.' 213 * munlock_vma_pages_range() - munlock all pages in the vma range.'
257 * @vma - vma containing range to be munlock()ed. 214 * @vma - vma containing range to be munlock()ed.
@@ -273,13 +230,12 @@ no_mlock:
273void munlock_vma_pages_range(struct vm_area_struct *vma, 230void munlock_vma_pages_range(struct vm_area_struct *vma,
274 unsigned long start, unsigned long end) 231 unsigned long start, unsigned long end)
275{ 232{
276 unsigned long addr;
277
278 lru_add_drain();
279 vma->vm_flags &= ~VM_LOCKED; 233 vma->vm_flags &= ~VM_LOCKED;
280 234
281 for (addr = start; addr < end; addr += PAGE_SIZE) { 235 while (start < end) {
282 struct page *page; 236 struct page *page;
237 unsigned int page_mask, page_increm;
238
283 /* 239 /*
284 * Although FOLL_DUMP is intended for get_dump_page(), 240 * Although FOLL_DUMP is intended for get_dump_page(),
285 * it just so happens that its special treatment of the 241 * it just so happens that its special treatment of the
@@ -287,13 +243,22 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
287 * suits munlock very well (and if somehow an abnormal page 243 * suits munlock very well (and if somehow an abnormal page
288 * has sneaked into the range, we won't oops here: great). 244 * has sneaked into the range, we won't oops here: great).
289 */ 245 */
290 page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); 246 page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
247 &page_mask);
291 if (page && !IS_ERR(page)) { 248 if (page && !IS_ERR(page)) {
292 lock_page(page); 249 lock_page(page);
293 munlock_vma_page(page); 250 lru_add_drain();
251 /*
252 * Any THP page found by follow_page_mask() may have
253 * gotten split before reaching munlock_vma_page(),
254 * so we need to recompute the page_mask here.
255 */
256 page_mask = munlock_vma_page(page);
294 unlock_page(page); 257 unlock_page(page);
295 put_page(page); 258 put_page(page);
296 } 259 }
260 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
261 start += page_increm * PAGE_SIZE;
297 cond_resched(); 262 cond_resched();
298 } 263 }
299} 264}
@@ -303,7 +268,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
303 * 268 *
304 * Filters out "special" vmas -- VM_LOCKED never gets set for these, and 269 * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
305 * munlock is a no-op. However, for some special vmas, we go ahead and 270 * munlock is a no-op. However, for some special vmas, we go ahead and
306 * populate the ptes via make_pages_present(). 271 * populate the ptes.
307 * 272 *
308 * For vmas that pass the filters, merge/split as appropriate. 273 * For vmas that pass the filters, merge/split as appropriate.
309 */ 274 */
@@ -391,9 +356,9 @@ static int do_mlock(unsigned long start, size_t len, int on)
391 356
392 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ 357 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */
393 358
394 newflags = vma->vm_flags | VM_LOCKED; 359 newflags = vma->vm_flags & ~VM_LOCKED;
395 if (!on) 360 if (on)
396 newflags &= ~VM_LOCKED; 361 newflags |= VM_LOCKED;
397 362
398 tmp = vma->vm_end; 363 tmp = vma->vm_end;
399 if (tmp > end) 364 if (tmp > end)
@@ -416,13 +381,20 @@ static int do_mlock(unsigned long start, size_t len, int on)
416 return error; 381 return error;
417} 382}
418 383
419static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors) 384/*
385 * __mm_populate - populate and/or mlock pages within a range of address space.
386 *
387 * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
388 * flags. VMAs must be already marked with the desired vm_flags, and
389 * mmap_sem must not be held.
390 */
391int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
420{ 392{
421 struct mm_struct *mm = current->mm; 393 struct mm_struct *mm = current->mm;
422 unsigned long end, nstart, nend; 394 unsigned long end, nstart, nend;
423 struct vm_area_struct *vma = NULL; 395 struct vm_area_struct *vma = NULL;
424 int locked = 0; 396 int locked = 0;
425 int ret = 0; 397 long ret = 0;
426 398
427 VM_BUG_ON(start & ~PAGE_MASK); 399 VM_BUG_ON(start & ~PAGE_MASK);
428 VM_BUG_ON(len != PAGE_ALIGN(len)); 400 VM_BUG_ON(len != PAGE_ALIGN(len));
@@ -498,7 +470,7 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
498 error = do_mlock(start, len, 1); 470 error = do_mlock(start, len, 1);
499 up_write(&current->mm->mmap_sem); 471 up_write(&current->mm->mmap_sem);
500 if (!error) 472 if (!error)
501 error = do_mlock_pages(start, len, 0); 473 error = __mm_populate(start, len, 0);
502 return error; 474 return error;
503} 475}
504 476
@@ -528,9 +500,9 @@ static int do_mlockall(int flags)
528 for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { 500 for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
529 vm_flags_t newflags; 501 vm_flags_t newflags;
530 502
531 newflags = vma->vm_flags | VM_LOCKED; 503 newflags = vma->vm_flags & ~VM_LOCKED;
532 if (!(flags & MCL_CURRENT)) 504 if (flags & MCL_CURRENT)
533 newflags &= ~VM_LOCKED; 505 newflags |= VM_LOCKED;
534 506
535 /* Ignore errors */ 507 /* Ignore errors */
536 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); 508 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
@@ -564,10 +536,8 @@ SYSCALL_DEFINE1(mlockall, int, flags)
564 capable(CAP_IPC_LOCK)) 536 capable(CAP_IPC_LOCK))
565 ret = do_mlockall(flags); 537 ret = do_mlockall(flags);
566 up_write(&current->mm->mmap_sem); 538 up_write(&current->mm->mmap_sem);
567 if (!ret && (flags & MCL_CURRENT)) { 539 if (!ret && (flags & MCL_CURRENT))
568 /* Ignore errors */ 540 mm_populate(0, TASK_SIZE);
569 do_mlock_pages(0, TASK_SIZE, 1);
570 }
571out: 541out:
572 return ret; 542 return ret;
573} 543}