diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /mm/mlock.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'mm/mlock.c')
-rw-r--r-- | mm/mlock.c | 188 |
1 files changed, 91 insertions, 97 deletions
diff --git a/mm/mlock.c b/mm/mlock.c index b70919ce4f72..048260c4e02e 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -135,13 +135,6 @@ void munlock_vma_page(struct page *page) | |||
135 | } | 135 | } |
136 | } | 136 | } |
137 | 137 | ||
138 | static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) | ||
139 | { | ||
140 | return (vma->vm_flags & VM_GROWSDOWN) && | ||
141 | (vma->vm_start == addr) && | ||
142 | !vma_stack_continue(vma->vm_prev, addr); | ||
143 | } | ||
144 | |||
145 | /** | 138 | /** |
146 | * __mlock_vma_pages_range() - mlock a range of pages in the vma. | 139 | * __mlock_vma_pages_range() - mlock a range of pages in the vma. |
147 | * @vma: target vma | 140 | * @vma: target vma |
@@ -155,13 +148,12 @@ static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long add | |||
155 | * vma->vm_mm->mmap_sem must be held for at least read. | 148 | * vma->vm_mm->mmap_sem must be held for at least read. |
156 | */ | 149 | */ |
157 | static long __mlock_vma_pages_range(struct vm_area_struct *vma, | 150 | static long __mlock_vma_pages_range(struct vm_area_struct *vma, |
158 | unsigned long start, unsigned long end) | 151 | unsigned long start, unsigned long end, |
152 | int *nonblocking) | ||
159 | { | 153 | { |
160 | struct mm_struct *mm = vma->vm_mm; | 154 | struct mm_struct *mm = vma->vm_mm; |
161 | unsigned long addr = start; | 155 | unsigned long addr = start; |
162 | struct page *pages[16]; /* 16 gives a reasonable batch */ | ||
163 | int nr_pages = (end - start) / PAGE_SIZE; | 156 | int nr_pages = (end - start) / PAGE_SIZE; |
164 | int ret = 0; | ||
165 | int gup_flags; | 157 | int gup_flags; |
166 | 158 | ||
167 | VM_BUG_ON(start & ~PAGE_MASK); | 159 | VM_BUG_ON(start & ~PAGE_MASK); |
@@ -170,73 +162,24 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
170 | VM_BUG_ON(end > vma->vm_end); | 162 | VM_BUG_ON(end > vma->vm_end); |
171 | VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); | 163 | VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); |
172 | 164 | ||
173 | gup_flags = FOLL_TOUCH | FOLL_GET; | 165 | gup_flags = FOLL_TOUCH | FOLL_MLOCK; |
174 | if (vma->vm_flags & VM_WRITE) | 166 | /* |
167 | * We want to touch writable mappings with a write fault in order | ||
168 | * to break COW, except for shared mappings because these don't COW | ||
169 | * and we would not want to dirty them for nothing. | ||
170 | */ | ||
171 | if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) | ||
175 | gup_flags |= FOLL_WRITE; | 172 | gup_flags |= FOLL_WRITE; |
176 | 173 | ||
177 | /* We don't try to access the guard page of a stack vma */ | 174 | /* |
178 | if (stack_guard_page(vma, start)) { | 175 | * We want mlock to succeed for regions that have any permissions |
179 | addr += PAGE_SIZE; | 176 | * other than PROT_NONE. |
180 | nr_pages--; | 177 | */ |
181 | } | 178 | if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) |
182 | 179 | gup_flags |= FOLL_FORCE; | |
183 | while (nr_pages > 0) { | ||
184 | int i; | ||
185 | |||
186 | cond_resched(); | ||
187 | |||
188 | /* | ||
189 | * get_user_pages makes pages present if we are | ||
190 | * setting mlock. and this extra reference count will | ||
191 | * disable migration of this page. However, page may | ||
192 | * still be truncated out from under us. | ||
193 | */ | ||
194 | ret = __get_user_pages(current, mm, addr, | ||
195 | min_t(int, nr_pages, ARRAY_SIZE(pages)), | ||
196 | gup_flags, pages, NULL); | ||
197 | /* | ||
198 | * This can happen for, e.g., VM_NONLINEAR regions before | ||
199 | * a page has been allocated and mapped at a given offset, | ||
200 | * or for addresses that map beyond end of a file. | ||
201 | * We'll mlock the pages if/when they get faulted in. | ||
202 | */ | ||
203 | if (ret < 0) | ||
204 | break; | ||
205 | |||
206 | lru_add_drain(); /* push cached pages to LRU */ | ||
207 | |||
208 | for (i = 0; i < ret; i++) { | ||
209 | struct page *page = pages[i]; | ||
210 | |||
211 | if (page->mapping) { | ||
212 | /* | ||
213 | * That preliminary check is mainly to avoid | ||
214 | * the pointless overhead of lock_page on the | ||
215 | * ZERO_PAGE: which might bounce very badly if | ||
216 | * there is contention. However, we're still | ||
217 | * dirtying its cacheline with get/put_page: | ||
218 | * we'll add another __get_user_pages flag to | ||
219 | * avoid it if that case turns out to matter. | ||
220 | */ | ||
221 | lock_page(page); | ||
222 | /* | ||
223 | * Because we lock page here and migration is | ||
224 | * blocked by the elevated reference, we need | ||
225 | * only check for file-cache page truncation. | ||
226 | */ | ||
227 | if (page->mapping) | ||
228 | mlock_vma_page(page); | ||
229 | unlock_page(page); | ||
230 | } | ||
231 | put_page(page); /* ref from get_user_pages() */ | ||
232 | } | ||
233 | |||
234 | addr += ret * PAGE_SIZE; | ||
235 | nr_pages -= ret; | ||
236 | ret = 0; | ||
237 | } | ||
238 | 180 | ||
239 | return ret; /* 0 or negative error code */ | 181 | return __get_user_pages(current, mm, addr, nr_pages, gup_flags, |
182 | NULL, NULL, nonblocking); | ||
240 | } | 183 | } |
241 | 184 | ||
242 | /* | 185 | /* |
@@ -278,9 +221,9 @@ long mlock_vma_pages_range(struct vm_area_struct *vma, | |||
278 | 221 | ||
279 | if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || | 222 | if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || |
280 | is_vm_hugetlb_page(vma) || | 223 | is_vm_hugetlb_page(vma) || |
281 | vma == get_gate_vma(current))) { | 224 | vma == get_gate_vma(current->mm))) { |
282 | 225 | ||
283 | __mlock_vma_pages_range(vma, start, end); | 226 | __mlock_vma_pages_range(vma, start, end, NULL); |
284 | 227 | ||
285 | /* Hide errors from mmap() and other callers */ | 228 | /* Hide errors from mmap() and other callers */ |
286 | return 0; | 229 | return 0; |
@@ -364,26 +307,18 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
364 | * For vmas that pass the filters, merge/split as appropriate. | 307 | * For vmas that pass the filters, merge/split as appropriate. |
365 | */ | 308 | */ |
366 | static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, | 309 | static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, |
367 | unsigned long start, unsigned long end, unsigned int newflags) | 310 | unsigned long start, unsigned long end, vm_flags_t newflags) |
368 | { | 311 | { |
369 | struct mm_struct *mm = vma->vm_mm; | 312 | struct mm_struct *mm = vma->vm_mm; |
370 | pgoff_t pgoff; | 313 | pgoff_t pgoff; |
371 | int nr_pages; | 314 | int nr_pages; |
372 | int ret = 0; | 315 | int ret = 0; |
373 | int lock = newflags & VM_LOCKED; | 316 | int lock = !!(newflags & VM_LOCKED); |
374 | 317 | ||
375 | if (newflags == vma->vm_flags || | 318 | if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || |
376 | (vma->vm_flags & (VM_IO | VM_PFNMAP))) | 319 | is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm)) |
377 | goto out; /* don't set VM_LOCKED, don't count */ | 320 | goto out; /* don't set VM_LOCKED, don't count */ |
378 | 321 | ||
379 | if ((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || | ||
380 | is_vm_hugetlb_page(vma) || | ||
381 | vma == get_gate_vma(current)) { | ||
382 | if (lock) | ||
383 | make_pages_present(start, end); | ||
384 | goto out; /* don't set VM_LOCKED, don't count */ | ||
385 | } | ||
386 | |||
387 | pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); | 322 | pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); |
388 | *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, | 323 | *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, |
389 | vma->vm_file, pgoff, vma_policy(vma)); | 324 | vma->vm_file, pgoff, vma_policy(vma)); |
@@ -419,14 +354,10 @@ success: | |||
419 | * set VM_LOCKED, __mlock_vma_pages_range will bring it back. | 354 | * set VM_LOCKED, __mlock_vma_pages_range will bring it back. |
420 | */ | 355 | */ |
421 | 356 | ||
422 | if (lock) { | 357 | if (lock) |
423 | vma->vm_flags = newflags; | 358 | vma->vm_flags = newflags; |
424 | ret = __mlock_vma_pages_range(vma, start, end); | 359 | else |
425 | if (ret < 0) | ||
426 | ret = __mlock_posix_error_return(ret); | ||
427 | } else { | ||
428 | munlock_vma_pages_range(vma, start, end); | 360 | munlock_vma_pages_range(vma, start, end); |
429 | } | ||
430 | 361 | ||
431 | out: | 362 | out: |
432 | *prev = vma; | 363 | *prev = vma; |
@@ -439,7 +370,8 @@ static int do_mlock(unsigned long start, size_t len, int on) | |||
439 | struct vm_area_struct * vma, * prev; | 370 | struct vm_area_struct * vma, * prev; |
440 | int error; | 371 | int error; |
441 | 372 | ||
442 | len = PAGE_ALIGN(len); | 373 | VM_BUG_ON(start & ~PAGE_MASK); |
374 | VM_BUG_ON(len != PAGE_ALIGN(len)); | ||
443 | end = start + len; | 375 | end = start + len; |
444 | if (end < start) | 376 | if (end < start) |
445 | return -EINVAL; | 377 | return -EINVAL; |
@@ -453,7 +385,7 @@ static int do_mlock(unsigned long start, size_t len, int on) | |||
453 | prev = vma; | 385 | prev = vma; |
454 | 386 | ||
455 | for (nstart = start ; ; ) { | 387 | for (nstart = start ; ; ) { |
456 | unsigned int newflags; | 388 | vm_flags_t newflags; |
457 | 389 | ||
458 | /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ | 390 | /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ |
459 | 391 | ||
@@ -482,6 +414,62 @@ static int do_mlock(unsigned long start, size_t len, int on) | |||
482 | return error; | 414 | return error; |
483 | } | 415 | } |
484 | 416 | ||
417 | static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors) | ||
418 | { | ||
419 | struct mm_struct *mm = current->mm; | ||
420 | unsigned long end, nstart, nend; | ||
421 | struct vm_area_struct *vma = NULL; | ||
422 | int locked = 0; | ||
423 | int ret = 0; | ||
424 | |||
425 | VM_BUG_ON(start & ~PAGE_MASK); | ||
426 | VM_BUG_ON(len != PAGE_ALIGN(len)); | ||
427 | end = start + len; | ||
428 | |||
429 | for (nstart = start; nstart < end; nstart = nend) { | ||
430 | /* | ||
431 | * We want to fault in pages for [nstart; end) address range. | ||
432 | * Find first corresponding VMA. | ||
433 | */ | ||
434 | if (!locked) { | ||
435 | locked = 1; | ||
436 | down_read(&mm->mmap_sem); | ||
437 | vma = find_vma(mm, nstart); | ||
438 | } else if (nstart >= vma->vm_end) | ||
439 | vma = vma->vm_next; | ||
440 | if (!vma || vma->vm_start >= end) | ||
441 | break; | ||
442 | /* | ||
443 | * Set [nstart; nend) to intersection of desired address | ||
444 | * range with the first VMA. Also, skip undesirable VMA types. | ||
445 | */ | ||
446 | nend = min(end, vma->vm_end); | ||
447 | if (vma->vm_flags & (VM_IO | VM_PFNMAP)) | ||
448 | continue; | ||
449 | if (nstart < vma->vm_start) | ||
450 | nstart = vma->vm_start; | ||
451 | /* | ||
452 | * Now fault in a range of pages. __mlock_vma_pages_range() | ||
453 | * double checks the vma flags, so that it won't mlock pages | ||
454 | * if the vma was already munlocked. | ||
455 | */ | ||
456 | ret = __mlock_vma_pages_range(vma, nstart, nend, &locked); | ||
457 | if (ret < 0) { | ||
458 | if (ignore_errors) { | ||
459 | ret = 0; | ||
460 | continue; /* continue at next VMA */ | ||
461 | } | ||
462 | ret = __mlock_posix_error_return(ret); | ||
463 | break; | ||
464 | } | ||
465 | nend = nstart + ret * PAGE_SIZE; | ||
466 | ret = 0; | ||
467 | } | ||
468 | if (locked) | ||
469 | up_read(&mm->mmap_sem); | ||
470 | return ret; /* 0 or negative error code */ | ||
471 | } | ||
472 | |||
485 | SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) | 473 | SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) |
486 | { | 474 | { |
487 | unsigned long locked; | 475 | unsigned long locked; |
@@ -507,6 +495,8 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) | |||
507 | if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) | 495 | if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) |
508 | error = do_mlock(start, len, 1); | 496 | error = do_mlock(start, len, 1); |
509 | up_write(¤t->mm->mmap_sem); | 497 | up_write(¤t->mm->mmap_sem); |
498 | if (!error) | ||
499 | error = do_mlock_pages(start, len, 0); | ||
510 | return error; | 500 | return error; |
511 | } | 501 | } |
512 | 502 | ||
@@ -534,7 +524,7 @@ static int do_mlockall(int flags) | |||
534 | goto out; | 524 | goto out; |
535 | 525 | ||
536 | for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { | 526 | for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { |
537 | unsigned int newflags; | 527 | vm_flags_t newflags; |
538 | 528 | ||
539 | newflags = vma->vm_flags | VM_LOCKED; | 529 | newflags = vma->vm_flags | VM_LOCKED; |
540 | if (!(flags & MCL_CURRENT)) | 530 | if (!(flags & MCL_CURRENT)) |
@@ -571,6 +561,10 @@ SYSCALL_DEFINE1(mlockall, int, flags) | |||
571 | capable(CAP_IPC_LOCK)) | 561 | capable(CAP_IPC_LOCK)) |
572 | ret = do_mlockall(flags); | 562 | ret = do_mlockall(flags); |
573 | up_write(¤t->mm->mmap_sem); | 563 | up_write(¤t->mm->mmap_sem); |
564 | if (!ret && (flags & MCL_CURRENT)) { | ||
565 | /* Ignore errors */ | ||
566 | do_mlock_pages(0, TASK_SIZE, 1); | ||
567 | } | ||
574 | out: | 568 | out: |
575 | return ret; | 569 | return ret; |
576 | } | 570 | } |