diff options
Diffstat (limited to 'mm/mlock.c')
-rw-r--r-- | mm/mlock.c | 128 |
1 files changed, 49 insertions, 79 deletions
diff --git a/mm/mlock.c b/mm/mlock.c index c9bd528b01d2..79b7cf7d1bca 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -102,13 +102,16 @@ void mlock_vma_page(struct page *page) | |||
102 | * can't isolate the page, we leave it for putback_lru_page() and vmscan | 102 | * can't isolate the page, we leave it for putback_lru_page() and vmscan |
103 | * [page_referenced()/try_to_unmap()] to deal with. | 103 | * [page_referenced()/try_to_unmap()] to deal with. |
104 | */ | 104 | */ |
105 | void munlock_vma_page(struct page *page) | 105 | unsigned int munlock_vma_page(struct page *page) |
106 | { | 106 | { |
107 | unsigned int page_mask = 0; | ||
108 | |||
107 | BUG_ON(!PageLocked(page)); | 109 | BUG_ON(!PageLocked(page)); |
108 | 110 | ||
109 | if (TestClearPageMlocked(page)) { | 111 | if (TestClearPageMlocked(page)) { |
110 | mod_zone_page_state(page_zone(page), NR_MLOCK, | 112 | unsigned int nr_pages = hpage_nr_pages(page); |
111 | -hpage_nr_pages(page)); | 113 | mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); |
114 | page_mask = nr_pages - 1; | ||
112 | if (!isolate_lru_page(page)) { | 115 | if (!isolate_lru_page(page)) { |
113 | int ret = SWAP_AGAIN; | 116 | int ret = SWAP_AGAIN; |
114 | 117 | ||
@@ -141,6 +144,8 @@ void munlock_vma_page(struct page *page) | |||
141 | count_vm_event(UNEVICTABLE_PGMUNLOCKED); | 144 | count_vm_event(UNEVICTABLE_PGMUNLOCKED); |
142 | } | 145 | } |
143 | } | 146 | } |
147 | |||
148 | return page_mask; | ||
144 | } | 149 | } |
145 | 150 | ||
146 | /** | 151 | /** |
@@ -155,13 +160,11 @@ void munlock_vma_page(struct page *page) | |||
155 | * | 160 | * |
156 | * vma->vm_mm->mmap_sem must be held for at least read. | 161 | * vma->vm_mm->mmap_sem must be held for at least read. |
157 | */ | 162 | */ |
158 | static long __mlock_vma_pages_range(struct vm_area_struct *vma, | 163 | long __mlock_vma_pages_range(struct vm_area_struct *vma, |
159 | unsigned long start, unsigned long end, | 164 | unsigned long start, unsigned long end, int *nonblocking) |
160 | int *nonblocking) | ||
161 | { | 165 | { |
162 | struct mm_struct *mm = vma->vm_mm; | 166 | struct mm_struct *mm = vma->vm_mm; |
163 | unsigned long addr = start; | 167 | unsigned long nr_pages = (end - start) / PAGE_SIZE; |
164 | int nr_pages = (end - start) / PAGE_SIZE; | ||
165 | int gup_flags; | 168 | int gup_flags; |
166 | 169 | ||
167 | VM_BUG_ON(start & ~PAGE_MASK); | 170 | VM_BUG_ON(start & ~PAGE_MASK); |
@@ -186,7 +189,11 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
186 | if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) | 189 | if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) |
187 | gup_flags |= FOLL_FORCE; | 190 | gup_flags |= FOLL_FORCE; |
188 | 191 | ||
189 | return __get_user_pages(current, mm, addr, nr_pages, gup_flags, | 192 | /* |
193 | * We made sure addr is within a VMA, so the following will | ||
194 | * not result in a stack expansion that recurses back here. | ||
195 | */ | ||
196 | return __get_user_pages(current, mm, start, nr_pages, gup_flags, | ||
190 | NULL, NULL, nonblocking); | 197 | NULL, NULL, nonblocking); |
191 | } | 198 | } |
192 | 199 | ||
@@ -202,56 +209,6 @@ static int __mlock_posix_error_return(long retval) | |||
202 | return retval; | 209 | return retval; |
203 | } | 210 | } |
204 | 211 | ||
205 | /** | ||
206 | * mlock_vma_pages_range() - mlock pages in specified vma range. | ||
207 | * @vma - the vma containing the specfied address range | ||
208 | * @start - starting address in @vma to mlock | ||
209 | * @end - end address [+1] in @vma to mlock | ||
210 | * | ||
211 | * For mmap()/mremap()/expansion of mlocked vma. | ||
212 | * | ||
213 | * return 0 on success for "normal" vmas. | ||
214 | * | ||
215 | * return number of pages [> 0] to be removed from locked_vm on success | ||
216 | * of "special" vmas. | ||
217 | */ | ||
218 | long mlock_vma_pages_range(struct vm_area_struct *vma, | ||
219 | unsigned long start, unsigned long end) | ||
220 | { | ||
221 | int nr_pages = (end - start) / PAGE_SIZE; | ||
222 | BUG_ON(!(vma->vm_flags & VM_LOCKED)); | ||
223 | |||
224 | /* | ||
225 | * filter unlockable vmas | ||
226 | */ | ||
227 | if (vma->vm_flags & (VM_IO | VM_PFNMAP)) | ||
228 | goto no_mlock; | ||
229 | |||
230 | if (!((vma->vm_flags & VM_DONTEXPAND) || | ||
231 | is_vm_hugetlb_page(vma) || | ||
232 | vma == get_gate_vma(current->mm))) { | ||
233 | |||
234 | __mlock_vma_pages_range(vma, start, end, NULL); | ||
235 | |||
236 | /* Hide errors from mmap() and other callers */ | ||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * User mapped kernel pages or huge pages: | ||
242 | * make these pages present to populate the ptes, but | ||
243 | * fall thru' to reset VM_LOCKED--no need to unlock, and | ||
244 | * return nr_pages so these don't get counted against task's | ||
245 | * locked limit. huge pages are already counted against | ||
246 | * locked vm limit. | ||
247 | */ | ||
248 | make_pages_present(start, end); | ||
249 | |||
250 | no_mlock: | ||
251 | vma->vm_flags &= ~VM_LOCKED; /* and don't come back! */ | ||
252 | return nr_pages; /* error or pages NOT mlocked */ | ||
253 | } | ||
254 | |||
255 | /* | 212 | /* |
256 | * munlock_vma_pages_range() - munlock all pages in the vma range.' | 213 | * munlock_vma_pages_range() - munlock all pages in the vma range.' |
257 | * @vma - vma containing range to be munlock()ed. | 214 | * @vma - vma containing range to be munlock()ed. |
@@ -273,13 +230,12 @@ no_mlock: | |||
273 | void munlock_vma_pages_range(struct vm_area_struct *vma, | 230 | void munlock_vma_pages_range(struct vm_area_struct *vma, |
274 | unsigned long start, unsigned long end) | 231 | unsigned long start, unsigned long end) |
275 | { | 232 | { |
276 | unsigned long addr; | ||
277 | |||
278 | lru_add_drain(); | ||
279 | vma->vm_flags &= ~VM_LOCKED; | 233 | vma->vm_flags &= ~VM_LOCKED; |
280 | 234 | ||
281 | for (addr = start; addr < end; addr += PAGE_SIZE) { | 235 | while (start < end) { |
282 | struct page *page; | 236 | struct page *page; |
237 | unsigned int page_mask, page_increm; | ||
238 | |||
283 | /* | 239 | /* |
284 | * Although FOLL_DUMP is intended for get_dump_page(), | 240 | * Although FOLL_DUMP is intended for get_dump_page(), |
285 | * it just so happens that its special treatment of the | 241 | * it just so happens that its special treatment of the |
@@ -287,13 +243,22 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
287 | * suits munlock very well (and if somehow an abnormal page | 243 | * suits munlock very well (and if somehow an abnormal page |
288 | * has sneaked into the range, we won't oops here: great). | 244 | * has sneaked into the range, we won't oops here: great). |
289 | */ | 245 | */ |
290 | page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); | 246 | page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP, |
247 | &page_mask); | ||
291 | if (page && !IS_ERR(page)) { | 248 | if (page && !IS_ERR(page)) { |
292 | lock_page(page); | 249 | lock_page(page); |
293 | munlock_vma_page(page); | 250 | lru_add_drain(); |
251 | /* | ||
252 | * Any THP page found by follow_page_mask() may have | ||
253 | * gotten split before reaching munlock_vma_page(), | ||
254 | * so we need to recompute the page_mask here. | ||
255 | */ | ||
256 | page_mask = munlock_vma_page(page); | ||
294 | unlock_page(page); | 257 | unlock_page(page); |
295 | put_page(page); | 258 | put_page(page); |
296 | } | 259 | } |
260 | page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); | ||
261 | start += page_increm * PAGE_SIZE; | ||
297 | cond_resched(); | 262 | cond_resched(); |
298 | } | 263 | } |
299 | } | 264 | } |
@@ -303,7 +268,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
303 | * | 268 | * |
304 | * Filters out "special" vmas -- VM_LOCKED never gets set for these, and | 269 | * Filters out "special" vmas -- VM_LOCKED never gets set for these, and |
305 | * munlock is a no-op. However, for some special vmas, we go ahead and | 270 | * munlock is a no-op. However, for some special vmas, we go ahead and |
306 | * populate the ptes via make_pages_present(). | 271 | * populate the ptes. |
307 | * | 272 | * |
308 | * For vmas that pass the filters, merge/split as appropriate. | 273 | * For vmas that pass the filters, merge/split as appropriate. |
309 | */ | 274 | */ |
@@ -391,9 +356,9 @@ static int do_mlock(unsigned long start, size_t len, int on) | |||
391 | 356 | ||
392 | /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ | 357 | /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ |
393 | 358 | ||
394 | newflags = vma->vm_flags | VM_LOCKED; | 359 | newflags = vma->vm_flags & ~VM_LOCKED; |
395 | if (!on) | 360 | if (on) |
396 | newflags &= ~VM_LOCKED; | 361 | newflags |= VM_LOCKED; |
397 | 362 | ||
398 | tmp = vma->vm_end; | 363 | tmp = vma->vm_end; |
399 | if (tmp > end) | 364 | if (tmp > end) |
@@ -416,13 +381,20 @@ static int do_mlock(unsigned long start, size_t len, int on) | |||
416 | return error; | 381 | return error; |
417 | } | 382 | } |
418 | 383 | ||
419 | static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors) | 384 | /* |
385 | * __mm_populate - populate and/or mlock pages within a range of address space. | ||
386 | * | ||
387 | * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap | ||
388 | * flags. VMAs must be already marked with the desired vm_flags, and | ||
389 | * mmap_sem must not be held. | ||
390 | */ | ||
391 | int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) | ||
420 | { | 392 | { |
421 | struct mm_struct *mm = current->mm; | 393 | struct mm_struct *mm = current->mm; |
422 | unsigned long end, nstart, nend; | 394 | unsigned long end, nstart, nend; |
423 | struct vm_area_struct *vma = NULL; | 395 | struct vm_area_struct *vma = NULL; |
424 | int locked = 0; | 396 | int locked = 0; |
425 | int ret = 0; | 397 | long ret = 0; |
426 | 398 | ||
427 | VM_BUG_ON(start & ~PAGE_MASK); | 399 | VM_BUG_ON(start & ~PAGE_MASK); |
428 | VM_BUG_ON(len != PAGE_ALIGN(len)); | 400 | VM_BUG_ON(len != PAGE_ALIGN(len)); |
@@ -498,7 +470,7 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) | |||
498 | error = do_mlock(start, len, 1); | 470 | error = do_mlock(start, len, 1); |
499 | up_write(¤t->mm->mmap_sem); | 471 | up_write(¤t->mm->mmap_sem); |
500 | if (!error) | 472 | if (!error) |
501 | error = do_mlock_pages(start, len, 0); | 473 | error = __mm_populate(start, len, 0); |
502 | return error; | 474 | return error; |
503 | } | 475 | } |
504 | 476 | ||
@@ -528,9 +500,9 @@ static int do_mlockall(int flags) | |||
528 | for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { | 500 | for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { |
529 | vm_flags_t newflags; | 501 | vm_flags_t newflags; |
530 | 502 | ||
531 | newflags = vma->vm_flags | VM_LOCKED; | 503 | newflags = vma->vm_flags & ~VM_LOCKED; |
532 | if (!(flags & MCL_CURRENT)) | 504 | if (flags & MCL_CURRENT) |
533 | newflags &= ~VM_LOCKED; | 505 | newflags |= VM_LOCKED; |
534 | 506 | ||
535 | /* Ignore errors */ | 507 | /* Ignore errors */ |
536 | mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); | 508 | mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); |
@@ -564,10 +536,8 @@ SYSCALL_DEFINE1(mlockall, int, flags) | |||
564 | capable(CAP_IPC_LOCK)) | 536 | capable(CAP_IPC_LOCK)) |
565 | ret = do_mlockall(flags); | 537 | ret = do_mlockall(flags); |
566 | up_write(¤t->mm->mmap_sem); | 538 | up_write(¤t->mm->mmap_sem); |
567 | if (!ret && (flags & MCL_CURRENT)) { | 539 | if (!ret && (flags & MCL_CURRENT)) |
568 | /* Ignore errors */ | 540 | mm_populate(0, TASK_SIZE); |
569 | do_mlock_pages(0, TASK_SIZE, 1); | ||
570 | } | ||
571 | out: | 541 | out: |
572 | return ret; | 542 | return ret; |
573 | } | 543 | } |