diff options
Diffstat (limited to 'mm/mlock.c')
-rw-r--r-- | mm/mlock.c | 221 |
1 files changed, 90 insertions, 131 deletions
diff --git a/mm/mlock.c b/mm/mlock.c index c83896a72504..8b478350a2a1 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -112,26 +112,49 @@ static void munlock_vma_page(struct page *page) | |||
112 | } | 112 | } |
113 | } | 113 | } |
114 | 114 | ||
115 | /* | 115 | /** |
116 | * mlock a range of pages in the vma. | 116 | * __mlock_vma_pages_range() - mlock/munlock a range of pages in the vma. |
117 | * @vma: target vma | ||
118 | * @start: start address | ||
119 | * @end: end address | ||
120 | * @mlock: 0 indicate munlock, otherwise mlock. | ||
121 | * | ||
122 | * If @mlock == 0, unlock an mlocked range; | ||
123 | * else mlock the range of pages. This takes care of making the pages present , | ||
124 | * too. | ||
117 | * | 125 | * |
118 | * This takes care of making the pages present too. | 126 | * return 0 on success, negative error code on error. |
119 | * | 127 | * |
120 | * vma->vm_mm->mmap_sem must be held for write. | 128 | * vma->vm_mm->mmap_sem must be held for at least read. |
121 | */ | 129 | */ |
122 | static int __mlock_vma_pages_range(struct vm_area_struct *vma, | 130 | static long __mlock_vma_pages_range(struct vm_area_struct *vma, |
123 | unsigned long start, unsigned long end) | 131 | unsigned long start, unsigned long end, |
132 | int mlock) | ||
124 | { | 133 | { |
125 | struct mm_struct *mm = vma->vm_mm; | 134 | struct mm_struct *mm = vma->vm_mm; |
126 | unsigned long addr = start; | 135 | unsigned long addr = start; |
127 | struct page *pages[16]; /* 16 gives a reasonable batch */ | 136 | struct page *pages[16]; /* 16 gives a reasonable batch */ |
128 | int write = !!(vma->vm_flags & VM_WRITE); | ||
129 | int nr_pages = (end - start) / PAGE_SIZE; | 137 | int nr_pages = (end - start) / PAGE_SIZE; |
130 | int ret; | 138 | int ret; |
139 | int gup_flags = 0; | ||
131 | 140 | ||
132 | VM_BUG_ON(start & ~PAGE_MASK || end & ~PAGE_MASK); | 141 | VM_BUG_ON(start & ~PAGE_MASK); |
133 | VM_BUG_ON(start < vma->vm_start || end > vma->vm_end); | 142 | VM_BUG_ON(end & ~PAGE_MASK); |
134 | VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem)); | 143 | VM_BUG_ON(start < vma->vm_start); |
144 | VM_BUG_ON(end > vma->vm_end); | ||
145 | VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) && | ||
146 | (atomic_read(&mm->mm_users) != 0)); | ||
147 | |||
148 | /* | ||
149 | * mlock: don't page populate if page has PROT_NONE permission. | ||
150 | * munlock: the pages always do munlock althrough | ||
151 | * its has PROT_NONE permission. | ||
152 | */ | ||
153 | if (!mlock) | ||
154 | gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS; | ||
155 | |||
156 | if (vma->vm_flags & VM_WRITE) | ||
157 | gup_flags |= GUP_FLAGS_WRITE; | ||
135 | 158 | ||
136 | lru_add_drain_all(); /* push cached pages to LRU */ | 159 | lru_add_drain_all(); /* push cached pages to LRU */ |
137 | 160 | ||
@@ -146,9 +169,9 @@ static int __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
146 | * disable migration of this page. However, page may | 169 | * disable migration of this page. However, page may |
147 | * still be truncated out from under us. | 170 | * still be truncated out from under us. |
148 | */ | 171 | */ |
149 | ret = get_user_pages(current, mm, addr, | 172 | ret = __get_user_pages(current, mm, addr, |
150 | min_t(int, nr_pages, ARRAY_SIZE(pages)), | 173 | min_t(int, nr_pages, ARRAY_SIZE(pages)), |
151 | write, 0, pages, NULL); | 174 | gup_flags, pages, NULL); |
152 | /* | 175 | /* |
153 | * This can happen for, e.g., VM_NONLINEAR regions before | 176 | * This can happen for, e.g., VM_NONLINEAR regions before |
154 | * a page has been allocated and mapped at a given offset, | 177 | * a page has been allocated and mapped at a given offset, |
@@ -178,8 +201,12 @@ static int __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
178 | * by the elevated reference, we need only check for | 201 | * by the elevated reference, we need only check for |
179 | * page truncation (file-cache only). | 202 | * page truncation (file-cache only). |
180 | */ | 203 | */ |
181 | if (page->mapping) | 204 | if (page->mapping) { |
182 | mlock_vma_page(page); | 205 | if (mlock) |
206 | mlock_vma_page(page); | ||
207 | else | ||
208 | munlock_vma_page(page); | ||
209 | } | ||
183 | unlock_page(page); | 210 | unlock_page(page); |
184 | put_page(page); /* ref from get_user_pages() */ | 211 | put_page(page); /* ref from get_user_pages() */ |
185 | 212 | ||
@@ -197,125 +224,38 @@ static int __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
197 | return 0; /* count entire vma as locked_vm */ | 224 | return 0; /* count entire vma as locked_vm */ |
198 | } | 225 | } |
199 | 226 | ||
200 | /* | ||
201 | * private structure for munlock page table walk | ||
202 | */ | ||
203 | struct munlock_page_walk { | ||
204 | struct vm_area_struct *vma; | ||
205 | pmd_t *pmd; /* for migration_entry_wait() */ | ||
206 | }; | ||
207 | |||
208 | /* | ||
209 | * munlock normal pages for present ptes | ||
210 | */ | ||
211 | static int __munlock_pte_handler(pte_t *ptep, unsigned long addr, | ||
212 | unsigned long end, struct mm_walk *walk) | ||
213 | { | ||
214 | struct munlock_page_walk *mpw = walk->private; | ||
215 | swp_entry_t entry; | ||
216 | struct page *page; | ||
217 | pte_t pte; | ||
218 | |||
219 | retry: | ||
220 | pte = *ptep; | ||
221 | /* | ||
222 | * If it's a swap pte, we might be racing with page migration. | ||
223 | */ | ||
224 | if (unlikely(!pte_present(pte))) { | ||
225 | if (!is_swap_pte(pte)) | ||
226 | goto out; | ||
227 | entry = pte_to_swp_entry(pte); | ||
228 | if (is_migration_entry(entry)) { | ||
229 | migration_entry_wait(mpw->vma->vm_mm, mpw->pmd, addr); | ||
230 | goto retry; | ||
231 | } | ||
232 | goto out; | ||
233 | } | ||
234 | |||
235 | page = vm_normal_page(mpw->vma, addr, pte); | ||
236 | if (!page) | ||
237 | goto out; | ||
238 | |||
239 | lock_page(page); | ||
240 | if (!page->mapping) { | ||
241 | unlock_page(page); | ||
242 | goto retry; | ||
243 | } | ||
244 | munlock_vma_page(page); | ||
245 | unlock_page(page); | ||
246 | |||
247 | out: | ||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * Save pmd for pte handler for waiting on migration entries | ||
253 | */ | ||
254 | static int __munlock_pmd_handler(pmd_t *pmd, unsigned long addr, | ||
255 | unsigned long end, struct mm_walk *walk) | ||
256 | { | ||
257 | struct munlock_page_walk *mpw = walk->private; | ||
258 | |||
259 | mpw->pmd = pmd; | ||
260 | return 0; | ||
261 | } | ||
262 | |||
263 | |||
264 | /* | ||
265 | * munlock a range of pages in the vma using standard page table walk. | ||
266 | * | ||
267 | * vma->vm_mm->mmap_sem must be held for write. | ||
268 | */ | ||
269 | static void __munlock_vma_pages_range(struct vm_area_struct *vma, | ||
270 | unsigned long start, unsigned long end) | ||
271 | { | ||
272 | struct mm_struct *mm = vma->vm_mm; | ||
273 | struct munlock_page_walk mpw = { | ||
274 | .vma = vma, | ||
275 | }; | ||
276 | struct mm_walk munlock_page_walk = { | ||
277 | .pmd_entry = __munlock_pmd_handler, | ||
278 | .pte_entry = __munlock_pte_handler, | ||
279 | .private = &mpw, | ||
280 | .mm = mm, | ||
281 | }; | ||
282 | |||
283 | VM_BUG_ON(start & ~PAGE_MASK || end & ~PAGE_MASK); | ||
284 | VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem)); | ||
285 | VM_BUG_ON(start < vma->vm_start); | ||
286 | VM_BUG_ON(end > vma->vm_end); | ||
287 | |||
288 | lru_add_drain_all(); /* push cached pages to LRU */ | ||
289 | walk_page_range(start, end, &munlock_page_walk); | ||
290 | lru_add_drain_all(); /* to update stats */ | ||
291 | } | ||
292 | |||
293 | #else /* CONFIG_UNEVICTABLE_LRU */ | 227 | #else /* CONFIG_UNEVICTABLE_LRU */ |
294 | 228 | ||
295 | /* | 229 | /* |
296 | * Just make pages present if VM_LOCKED. No-op if unlocking. | 230 | * Just make pages present if VM_LOCKED. No-op if unlocking. |
297 | */ | 231 | */ |
298 | static int __mlock_vma_pages_range(struct vm_area_struct *vma, | 232 | static long __mlock_vma_pages_range(struct vm_area_struct *vma, |
299 | unsigned long start, unsigned long end) | 233 | unsigned long start, unsigned long end, |
234 | int mlock) | ||
300 | { | 235 | { |
301 | if (vma->vm_flags & VM_LOCKED) | 236 | if (mlock && (vma->vm_flags & VM_LOCKED)) |
302 | make_pages_present(start, end); | 237 | make_pages_present(start, end); |
303 | return 0; | 238 | return 0; |
304 | } | 239 | } |
305 | |||
306 | /* | ||
307 | * munlock a range of pages in the vma -- no-op. | ||
308 | */ | ||
309 | static void __munlock_vma_pages_range(struct vm_area_struct *vma, | ||
310 | unsigned long start, unsigned long end) | ||
311 | { | ||
312 | } | ||
313 | #endif /* CONFIG_UNEVICTABLE_LRU */ | 240 | #endif /* CONFIG_UNEVICTABLE_LRU */ |
314 | 241 | ||
315 | /* | 242 | /** |
316 | * mlock all pages in this vma range. For mmap()/mremap()/... | 243 | * mlock_vma_pages_range() - mlock pages in specified vma range. |
244 | * @vma - the vma containing the specfied address range | ||
245 | * @start - starting address in @vma to mlock | ||
246 | * @end - end address [+1] in @vma to mlock | ||
247 | * | ||
248 | * For mmap()/mremap()/expansion of mlocked vma. | ||
249 | * | ||
250 | * return 0 on success for "normal" vmas. | ||
251 | * | ||
252 | * return number of pages [> 0] to be removed from locked_vm on success | ||
253 | * of "special" vmas. | ||
254 | * | ||
255 | * return negative error if vma spanning @start-@range disappears while | ||
256 | * mmap semaphore is dropped. Unlikely? | ||
317 | */ | 257 | */ |
318 | int mlock_vma_pages_range(struct vm_area_struct *vma, | 258 | long mlock_vma_pages_range(struct vm_area_struct *vma, |
319 | unsigned long start, unsigned long end) | 259 | unsigned long start, unsigned long end) |
320 | { | 260 | { |
321 | struct mm_struct *mm = vma->vm_mm; | 261 | struct mm_struct *mm = vma->vm_mm; |
@@ -331,8 +271,10 @@ int mlock_vma_pages_range(struct vm_area_struct *vma, | |||
331 | if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || | 271 | if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || |
332 | is_vm_hugetlb_page(vma) || | 272 | is_vm_hugetlb_page(vma) || |
333 | vma == get_gate_vma(current))) { | 273 | vma == get_gate_vma(current))) { |
274 | long error; | ||
334 | downgrade_write(&mm->mmap_sem); | 275 | downgrade_write(&mm->mmap_sem); |
335 | nr_pages = __mlock_vma_pages_range(vma, start, end); | 276 | |
277 | error = __mlock_vma_pages_range(vma, start, end, 1); | ||
336 | 278 | ||
337 | up_read(&mm->mmap_sem); | 279 | up_read(&mm->mmap_sem); |
338 | /* vma can change or disappear */ | 280 | /* vma can change or disappear */ |
@@ -340,8 +282,9 @@ int mlock_vma_pages_range(struct vm_area_struct *vma, | |||
340 | vma = find_vma(mm, start); | 282 | vma = find_vma(mm, start); |
341 | /* non-NULL vma must contain @start, but need to check @end */ | 283 | /* non-NULL vma must contain @start, but need to check @end */ |
342 | if (!vma || end > vma->vm_end) | 284 | if (!vma || end > vma->vm_end) |
343 | return -EAGAIN; | 285 | return -ENOMEM; |
344 | return nr_pages; | 286 | |
287 | return 0; /* hide other errors from mmap(), et al */ | ||
345 | } | 288 | } |
346 | 289 | ||
347 | /* | 290 | /* |
@@ -356,17 +299,33 @@ int mlock_vma_pages_range(struct vm_area_struct *vma, | |||
356 | 299 | ||
357 | no_mlock: | 300 | no_mlock: |
358 | vma->vm_flags &= ~VM_LOCKED; /* and don't come back! */ | 301 | vma->vm_flags &= ~VM_LOCKED; /* and don't come back! */ |
359 | return nr_pages; /* pages NOT mlocked */ | 302 | return nr_pages; /* error or pages NOT mlocked */ |
360 | } | 303 | } |
361 | 304 | ||
362 | 305 | ||
363 | /* | 306 | /* |
364 | * munlock all pages in vma. For munmap() and exit(). | 307 | * munlock_vma_pages_range() - munlock all pages in the vma range.' |
308 | * @vma - vma containing range to be munlock()ed. | ||
309 | * @start - start address in @vma of the range | ||
310 | * @end - end of range in @vma. | ||
311 | * | ||
312 | * For mremap(), munmap() and exit(). | ||
313 | * | ||
314 | * Called with @vma VM_LOCKED. | ||
315 | * | ||
316 | * Returns with VM_LOCKED cleared. Callers must be prepared to | ||
317 | * deal with this. | ||
318 | * | ||
319 | * We don't save and restore VM_LOCKED here because pages are | ||
320 | * still on lru. In unmap path, pages might be scanned by reclaim | ||
321 | * and re-mlocked by try_to_{munlock|unmap} before we unmap and | ||
322 | * free them. This will result in freeing mlocked pages. | ||
365 | */ | 323 | */ |
366 | void munlock_vma_pages_all(struct vm_area_struct *vma) | 324 | void munlock_vma_pages_range(struct vm_area_struct *vma, |
325 | unsigned long start, unsigned long end) | ||
367 | { | 326 | { |
368 | vma->vm_flags &= ~VM_LOCKED; | 327 | vma->vm_flags &= ~VM_LOCKED; |
369 | __munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end); | 328 | __mlock_vma_pages_range(vma, start, end, 0); |
370 | } | 329 | } |
371 | 330 | ||
372 | /* | 331 | /* |
@@ -443,7 +402,7 @@ success: | |||
443 | */ | 402 | */ |
444 | downgrade_write(&mm->mmap_sem); | 403 | downgrade_write(&mm->mmap_sem); |
445 | 404 | ||
446 | ret = __mlock_vma_pages_range(vma, start, end); | 405 | ret = __mlock_vma_pages_range(vma, start, end, 1); |
447 | if (ret > 0) { | 406 | if (ret > 0) { |
448 | mm->locked_vm -= ret; | 407 | mm->locked_vm -= ret; |
449 | ret = 0; | 408 | ret = 0; |
@@ -460,7 +419,7 @@ success: | |||
460 | *prev = find_vma(mm, start); | 419 | *prev = find_vma(mm, start); |
461 | /* non-NULL *prev must contain @start, but need to check @end */ | 420 | /* non-NULL *prev must contain @start, but need to check @end */ |
462 | if (!(*prev) || end > (*prev)->vm_end) | 421 | if (!(*prev) || end > (*prev)->vm_end) |
463 | ret = -EAGAIN; | 422 | ret = -ENOMEM; |
464 | } else { | 423 | } else { |
465 | /* | 424 | /* |
466 | * TODO: for unlocking, pages will already be resident, so | 425 | * TODO: for unlocking, pages will already be resident, so |
@@ -469,7 +428,7 @@ success: | |||
469 | * while. Should we downgrade the semaphore for both lock | 428 | * while. Should we downgrade the semaphore for both lock |
470 | * AND unlock ? | 429 | * AND unlock ? |
471 | */ | 430 | */ |
472 | __munlock_vma_pages_range(vma, start, end); | 431 | __mlock_vma_pages_range(vma, start, end, 0); |
473 | } | 432 | } |
474 | 433 | ||
475 | out: | 434 | out: |