diff options
Diffstat (limited to 'mm/mlock.c')
-rw-r--r-- | mm/mlock.c | 128 |
1 files changed, 66 insertions, 62 deletions
diff --git a/mm/mlock.c b/mm/mlock.c index 45eb650b9654..bd6f0e466f6c 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -139,49 +139,36 @@ static void munlock_vma_page(struct page *page) | |||
139 | } | 139 | } |
140 | 140 | ||
141 | /** | 141 | /** |
142 | * __mlock_vma_pages_range() - mlock/munlock a range of pages in the vma. | 142 | * __mlock_vma_pages_range() - mlock a range of pages in the vma. |
143 | * @vma: target vma | 143 | * @vma: target vma |
144 | * @start: start address | 144 | * @start: start address |
145 | * @end: end address | 145 | * @end: end address |
146 | * @mlock: 0 indicate munlock, otherwise mlock. | ||
147 | * | 146 | * |
148 | * If @mlock == 0, unlock an mlocked range; | 147 | * This takes care of making the pages present too. |
149 | * else mlock the range of pages. This takes care of making the pages present , | ||
150 | * too. | ||
151 | * | 148 | * |
152 | * return 0 on success, negative error code on error. | 149 | * return 0 on success, negative error code on error. |
153 | * | 150 | * |
154 | * vma->vm_mm->mmap_sem must be held for at least read. | 151 | * vma->vm_mm->mmap_sem must be held for at least read. |
155 | */ | 152 | */ |
156 | static long __mlock_vma_pages_range(struct vm_area_struct *vma, | 153 | static long __mlock_vma_pages_range(struct vm_area_struct *vma, |
157 | unsigned long start, unsigned long end, | 154 | unsigned long start, unsigned long end) |
158 | int mlock) | ||
159 | { | 155 | { |
160 | struct mm_struct *mm = vma->vm_mm; | 156 | struct mm_struct *mm = vma->vm_mm; |
161 | unsigned long addr = start; | 157 | unsigned long addr = start; |
162 | struct page *pages[16]; /* 16 gives a reasonable batch */ | 158 | struct page *pages[16]; /* 16 gives a reasonable batch */ |
163 | int nr_pages = (end - start) / PAGE_SIZE; | 159 | int nr_pages = (end - start) / PAGE_SIZE; |
164 | int ret = 0; | 160 | int ret = 0; |
165 | int gup_flags = 0; | 161 | int gup_flags; |
166 | 162 | ||
167 | VM_BUG_ON(start & ~PAGE_MASK); | 163 | VM_BUG_ON(start & ~PAGE_MASK); |
168 | VM_BUG_ON(end & ~PAGE_MASK); | 164 | VM_BUG_ON(end & ~PAGE_MASK); |
169 | VM_BUG_ON(start < vma->vm_start); | 165 | VM_BUG_ON(start < vma->vm_start); |
170 | VM_BUG_ON(end > vma->vm_end); | 166 | VM_BUG_ON(end > vma->vm_end); |
171 | VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) && | 167 | VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); |
172 | (atomic_read(&mm->mm_users) != 0)); | ||
173 | |||
174 | /* | ||
175 | * mlock: don't page populate if vma has PROT_NONE permission. | ||
176 | * munlock: always do munlock although the vma has PROT_NONE | ||
177 | * permission, or SIGKILL is pending. | ||
178 | */ | ||
179 | if (!mlock) | ||
180 | gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS | | ||
181 | GUP_FLAGS_IGNORE_SIGKILL; | ||
182 | 168 | ||
169 | gup_flags = FOLL_TOUCH | FOLL_GET; | ||
183 | if (vma->vm_flags & VM_WRITE) | 170 | if (vma->vm_flags & VM_WRITE) |
184 | gup_flags |= GUP_FLAGS_WRITE; | 171 | gup_flags |= FOLL_WRITE; |
185 | 172 | ||
186 | while (nr_pages > 0) { | 173 | while (nr_pages > 0) { |
187 | int i; | 174 | int i; |
@@ -201,51 +188,45 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
201 | * This can happen for, e.g., VM_NONLINEAR regions before | 188 | * This can happen for, e.g., VM_NONLINEAR regions before |
202 | * a page has been allocated and mapped at a given offset, | 189 | * a page has been allocated and mapped at a given offset, |
203 | * or for addresses that map beyond end of a file. | 190 | * or for addresses that map beyond end of a file. |
204 | * We'll mlock the the pages if/when they get faulted in. | 191 | * We'll mlock the pages if/when they get faulted in. |
205 | */ | 192 | */ |
206 | if (ret < 0) | 193 | if (ret < 0) |
207 | break; | 194 | break; |
208 | if (ret == 0) { | ||
209 | /* | ||
210 | * We know the vma is there, so the only time | ||
211 | * we cannot get a single page should be an | ||
212 | * error (ret < 0) case. | ||
213 | */ | ||
214 | WARN_ON(1); | ||
215 | break; | ||
216 | } | ||
217 | 195 | ||
218 | lru_add_drain(); /* push cached pages to LRU */ | 196 | lru_add_drain(); /* push cached pages to LRU */ |
219 | 197 | ||
220 | for (i = 0; i < ret; i++) { | 198 | for (i = 0; i < ret; i++) { |
221 | struct page *page = pages[i]; | 199 | struct page *page = pages[i]; |
222 | 200 | ||
223 | lock_page(page); | ||
224 | /* | ||
225 | * Because we lock page here and migration is blocked | ||
226 | * by the elevated reference, we need only check for | ||
227 | * page truncation (file-cache only). | ||
228 | */ | ||
229 | if (page->mapping) { | 201 | if (page->mapping) { |
230 | if (mlock) | 202 | /* |
203 | * That preliminary check is mainly to avoid | ||
204 | * the pointless overhead of lock_page on the | ||
205 | * ZERO_PAGE: which might bounce very badly if | ||
206 | * there is contention. However, we're still | ||
207 | * dirtying its cacheline with get/put_page: | ||
208 | * we'll add another __get_user_pages flag to | ||
209 | * avoid it if that case turns out to matter. | ||
210 | */ | ||
211 | lock_page(page); | ||
212 | /* | ||
213 | * Because we lock page here and migration is | ||
214 | * blocked by the elevated reference, we need | ||
215 | * only check for file-cache page truncation. | ||
216 | */ | ||
217 | if (page->mapping) | ||
231 | mlock_vma_page(page); | 218 | mlock_vma_page(page); |
232 | else | 219 | unlock_page(page); |
233 | munlock_vma_page(page); | ||
234 | } | 220 | } |
235 | unlock_page(page); | 221 | put_page(page); /* ref from get_user_pages() */ |
236 | put_page(page); /* ref from get_user_pages() */ | ||
237 | |||
238 | /* | ||
239 | * here we assume that get_user_pages() has given us | ||
240 | * a list of virtually contiguous pages. | ||
241 | */ | ||
242 | addr += PAGE_SIZE; /* for next get_user_pages() */ | ||
243 | nr_pages--; | ||
244 | } | 222 | } |
223 | |||
224 | addr += ret * PAGE_SIZE; | ||
225 | nr_pages -= ret; | ||
245 | ret = 0; | 226 | ret = 0; |
246 | } | 227 | } |
247 | 228 | ||
248 | return ret; /* count entire vma as locked_vm */ | 229 | return ret; /* 0 or negative error code */ |
249 | } | 230 | } |
250 | 231 | ||
251 | /* | 232 | /* |
@@ -289,7 +270,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma, | |||
289 | is_vm_hugetlb_page(vma) || | 270 | is_vm_hugetlb_page(vma) || |
290 | vma == get_gate_vma(current))) { | 271 | vma == get_gate_vma(current))) { |
291 | 272 | ||
292 | __mlock_vma_pages_range(vma, start, end, 1); | 273 | __mlock_vma_pages_range(vma, start, end); |
293 | 274 | ||
294 | /* Hide errors from mmap() and other callers */ | 275 | /* Hide errors from mmap() and other callers */ |
295 | return 0; | 276 | return 0; |
@@ -310,7 +291,6 @@ no_mlock: | |||
310 | return nr_pages; /* error or pages NOT mlocked */ | 291 | return nr_pages; /* error or pages NOT mlocked */ |
311 | } | 292 | } |
312 | 293 | ||
313 | |||
314 | /* | 294 | /* |
315 | * munlock_vma_pages_range() - munlock all pages in the vma range.' | 295 | * munlock_vma_pages_range() - munlock all pages in the vma range.' |
316 | * @vma - vma containing range to be munlock()ed. | 296 | * @vma - vma containing range to be munlock()ed. |
@@ -330,10 +310,38 @@ no_mlock: | |||
330 | * free them. This will result in freeing mlocked pages. | 310 | * free them. This will result in freeing mlocked pages. |
331 | */ | 311 | */ |
332 | void munlock_vma_pages_range(struct vm_area_struct *vma, | 312 | void munlock_vma_pages_range(struct vm_area_struct *vma, |
333 | unsigned long start, unsigned long end) | 313 | unsigned long start, unsigned long end) |
334 | { | 314 | { |
315 | unsigned long addr; | ||
316 | |||
317 | lru_add_drain(); | ||
335 | vma->vm_flags &= ~VM_LOCKED; | 318 | vma->vm_flags &= ~VM_LOCKED; |
336 | __mlock_vma_pages_range(vma, start, end, 0); | 319 | |
320 | for (addr = start; addr < end; addr += PAGE_SIZE) { | ||
321 | struct page *page; | ||
322 | /* | ||
323 | * Although FOLL_DUMP is intended for get_dump_page(), | ||
324 | * it just so happens that its special treatment of the | ||
325 | * ZERO_PAGE (returning an error instead of doing get_page) | ||
326 | * suits munlock very well (and if somehow an abnormal page | ||
327 | * has sneaked into the range, we won't oops here: great). | ||
328 | */ | ||
329 | page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); | ||
330 | if (page && !IS_ERR(page)) { | ||
331 | lock_page(page); | ||
332 | /* | ||
333 | * Like in __mlock_vma_pages_range(), | ||
334 | * because we lock page here and migration is | ||
335 | * blocked by the elevated reference, we need | ||
336 | * only check for file-cache page truncation. | ||
337 | */ | ||
338 | if (page->mapping) | ||
339 | munlock_vma_page(page); | ||
340 | unlock_page(page); | ||
341 | put_page(page); | ||
342 | } | ||
343 | cond_resched(); | ||
344 | } | ||
337 | } | 345 | } |
338 | 346 | ||
339 | /* | 347 | /* |
@@ -400,18 +408,14 @@ success: | |||
400 | * It's okay if try_to_unmap_one unmaps a page just after we | 408 | * It's okay if try_to_unmap_one unmaps a page just after we |
401 | * set VM_LOCKED, __mlock_vma_pages_range will bring it back. | 409 | * set VM_LOCKED, __mlock_vma_pages_range will bring it back. |
402 | */ | 410 | */ |
403 | vma->vm_flags = newflags; | ||
404 | 411 | ||
405 | if (lock) { | 412 | if (lock) { |
406 | ret = __mlock_vma_pages_range(vma, start, end, 1); | 413 | vma->vm_flags = newflags; |
407 | 414 | ret = __mlock_vma_pages_range(vma, start, end); | |
408 | if (ret > 0) { | 415 | if (ret < 0) |
409 | mm->locked_vm -= ret; | 416 | ret = __mlock_posix_error_return(ret); |
410 | ret = 0; | ||
411 | } else | ||
412 | ret = __mlock_posix_error_return(ret); /* translate if needed */ | ||
413 | } else { | 417 | } else { |
414 | __mlock_vma_pages_range(vma, start, end, 0); | 418 | munlock_vma_pages_range(vma, start, end); |
415 | } | 419 | } |
416 | 420 | ||
417 | out: | 421 | out: |