aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorPaul Cassella <cassella@cray.com>2014-08-06 19:07:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-06 21:01:20 -0400
commit9a95f3cf7b33d66fa64727cff8cd2f2a9d09f335 (patch)
tree14d7c178144b6de0b352b8e8d4b04ffdbae9a71c /mm
parent4ffeaf3560a52b4a69cc7909873d08c0ef5909d4 (diff)
mm: describe mmap_sem rules for __lock_page_or_retry() and callers
Add a comment describing the circumstances in which __lock_page_or_retry() will or will not release the mmap_sem when returning 0. Add comments to lock_page_or_retry()'s callers (filemap_fault(), do_swap_page()) noting the impact on VM_FAULT_RETRY returns. Add comments on up the call tree, particularly replacing the false "We return with mmap_sem still held" comments. Signed-off-by: Paul Cassella <cassella@cray.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c23
-rw-r--r--mm/gup.c18
-rw-r--r--mm/memory.c34
-rw-r--r--mm/mlock.c9
4 files changed, 77 insertions, 7 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 7e85c8147e1b..af19a6b079f5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -808,6 +808,17 @@ int __lock_page_killable(struct page *page)
808} 808}
809EXPORT_SYMBOL_GPL(__lock_page_killable); 809EXPORT_SYMBOL_GPL(__lock_page_killable);
810 810
811/*
812 * Return values:
813 * 1 - page is locked; mmap_sem is still held.
814 * 0 - page is not locked.
815 * mmap_sem has been released (up_read()), unless flags had both
816 * FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
817 * which case mmap_sem is still held.
818 *
819 * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
820 * with the page locked and the mmap_sem unperturbed.
821 */
811int __lock_page_or_retry(struct page *page, struct mm_struct *mm, 822int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
812 unsigned int flags) 823 unsigned int flags)
813{ 824{
@@ -1827,6 +1838,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
1827 * The goto's are kind of ugly, but this streamlines the normal case of having 1838 * The goto's are kind of ugly, but this streamlines the normal case of having
1828 * it in the page cache, and handles the special cases reasonably without 1839 * it in the page cache, and handles the special cases reasonably without
1829 * having a lot of duplicated code. 1840 * having a lot of duplicated code.
1841 *
1842 * vma->vm_mm->mmap_sem must be held on entry.
1843 *
1844 * If our return value has VM_FAULT_RETRY set, it's because
1845 * lock_page_or_retry() returned 0.
1846 * The mmap_sem has usually been released in this case.
1847 * See __lock_page_or_retry() for the exception.
1848 *
1849 * If our return value does not have VM_FAULT_RETRY set, the mmap_sem
1850 * has not been released.
1851 *
1852 * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
1830 */ 1853 */
1831int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1854int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1832{ 1855{
diff --git a/mm/gup.c b/mm/gup.c
index cc5a9e7adea7..91d044b1600d 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -258,6 +258,11 @@ unmap:
258 return ret; 258 return ret;
259} 259}
260 260
261/*
262 * mmap_sem must be held on entry. If @nonblocking != NULL and
263 * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released.
264 * If it is, *@nonblocking will be set to 0 and -EBUSY returned.
265 */
261static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, 266static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
262 unsigned long address, unsigned int *flags, int *nonblocking) 267 unsigned long address, unsigned int *flags, int *nonblocking)
263{ 268{
@@ -373,7 +378,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
373 * with a put_page() call when it is finished with. vmas will only 378 * with a put_page() call when it is finished with. vmas will only
374 * remain valid while mmap_sem is held. 379 * remain valid while mmap_sem is held.
375 * 380 *
376 * Must be called with mmap_sem held for read or write. 381 * Must be called with mmap_sem held. It may be released. See below.
377 * 382 *
378 * __get_user_pages walks a process's page tables and takes a reference to 383 * __get_user_pages walks a process's page tables and takes a reference to
379 * each struct page that each user address corresponds to at a given 384 * each struct page that each user address corresponds to at a given
@@ -396,7 +401,14 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
396 * 401 *
397 * If @nonblocking != NULL, __get_user_pages will not wait for disk IO 402 * If @nonblocking != NULL, __get_user_pages will not wait for disk IO
398 * or mmap_sem contention, and if waiting is needed to pin all pages, 403 * or mmap_sem contention, and if waiting is needed to pin all pages,
399 * *@nonblocking will be set to 0. 404 * *@nonblocking will be set to 0. Further, if @gup_flags does not
405 * include FOLL_NOWAIT, the mmap_sem will be released via up_read() in
406 * this case.
407 *
408 * A caller using such a combination of @nonblocking and @gup_flags
409 * must therefore hold the mmap_sem for reading only, and recognize
410 * when it's been released. Otherwise, it must be held for either
411 * reading or writing and will not be released.
400 * 412 *
401 * In most cases, get_user_pages or get_user_pages_fast should be used 413 * In most cases, get_user_pages or get_user_pages_fast should be used
402 * instead of __get_user_pages. __get_user_pages should be used only if 414 * instead of __get_user_pages. __get_user_pages should be used only if
@@ -528,7 +540,7 @@ EXPORT_SYMBOL(__get_user_pages);
528 * such architectures, gup() will not be enough to make a subsequent access 540 * such architectures, gup() will not be enough to make a subsequent access
529 * succeed. 541 * succeed.
530 * 542 *
531 * This should be called with the mm_sem held for read. 543 * This has the same semantics wrt the @mm->mmap_sem as does filemap_fault().
532 */ 544 */
533int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, 545int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
534 unsigned long address, unsigned int fault_flags) 546 unsigned long address, unsigned int fault_flags)
diff --git a/mm/memory.c b/mm/memory.c
index 7e131325bdf8..4d0a543f3bb3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2399,7 +2399,10 @@ EXPORT_SYMBOL(unmap_mapping_range);
2399/* 2399/*
2400 * We enter with non-exclusive mmap_sem (to exclude vma changes, 2400 * We enter with non-exclusive mmap_sem (to exclude vma changes,
2401 * but allow concurrent faults), and pte mapped but not yet locked. 2401 * but allow concurrent faults), and pte mapped but not yet locked.
2402 * We return with mmap_sem still held, but pte unmapped and unlocked. 2402 * We return with pte unmapped and unlocked.
2403 *
2404 * We return with the mmap_sem locked or unlocked in the same cases
2405 * as does filemap_fault().
2403 */ 2406 */
2404static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, 2407static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2405 unsigned long address, pte_t *page_table, pmd_t *pmd, 2408 unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -2688,6 +2691,11 @@ oom:
2688 return VM_FAULT_OOM; 2691 return VM_FAULT_OOM;
2689} 2692}
2690 2693
2694/*
2695 * The mmap_sem must have been held on entry, and may have been
2696 * released depending on flags and vma->vm_ops->fault() return value.
2697 * See filemap_fault() and __lock_page_retry().
2698 */
2691static int __do_fault(struct vm_area_struct *vma, unsigned long address, 2699static int __do_fault(struct vm_area_struct *vma, unsigned long address,
2692 pgoff_t pgoff, unsigned int flags, struct page **page) 2700 pgoff_t pgoff, unsigned int flags, struct page **page)
2693{ 2701{
@@ -3016,6 +3024,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3016 return ret; 3024 return ret;
3017} 3025}
3018 3026
3027/*
3028 * We enter with non-exclusive mmap_sem (to exclude vma changes,
3029 * but allow concurrent faults).
3030 * The mmap_sem may have been released depending on flags and our
3031 * return value. See filemap_fault() and __lock_page_or_retry().
3032 */
3019static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3033static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3020 unsigned long address, pte_t *page_table, pmd_t *pmd, 3034 unsigned long address, pte_t *page_table, pmd_t *pmd,
3021 unsigned int flags, pte_t orig_pte) 3035 unsigned int flags, pte_t orig_pte)
@@ -3040,7 +3054,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3040 * 3054 *
3041 * We enter with non-exclusive mmap_sem (to exclude vma changes, 3055 * We enter with non-exclusive mmap_sem (to exclude vma changes,
3042 * but allow concurrent faults), and pte mapped but not yet locked. 3056 * but allow concurrent faults), and pte mapped but not yet locked.
3043 * We return with mmap_sem still held, but pte unmapped and unlocked. 3057 * We return with pte unmapped and unlocked.
3058 * The mmap_sem may have been released depending on flags and our
3059 * return value. See filemap_fault() and __lock_page_or_retry().
3044 */ 3060 */
3045static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3061static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3046 unsigned long address, pte_t *page_table, pmd_t *pmd, 3062 unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -3172,7 +3188,10 @@ out:
3172 * 3188 *
3173 * We enter with non-exclusive mmap_sem (to exclude vma changes, 3189 * We enter with non-exclusive mmap_sem (to exclude vma changes,
3174 * but allow concurrent faults), and pte mapped but not yet locked. 3190 * but allow concurrent faults), and pte mapped but not yet locked.
3175 * We return with mmap_sem still held, but pte unmapped and unlocked. 3191 * We return with pte unmapped and unlocked.
3192 *
3193 * The mmap_sem may have been released depending on flags and our
3194 * return value. See filemap_fault() and __lock_page_or_retry().
3176 */ 3195 */
3177static int handle_pte_fault(struct mm_struct *mm, 3196static int handle_pte_fault(struct mm_struct *mm,
3178 struct vm_area_struct *vma, unsigned long address, 3197 struct vm_area_struct *vma, unsigned long address,
@@ -3232,6 +3251,9 @@ unlock:
3232 3251
3233/* 3252/*
3234 * By the time we get here, we already hold the mm semaphore 3253 * By the time we get here, we already hold the mm semaphore
3254 *
3255 * The mmap_sem may have been released depending on flags and our
3256 * return value. See filemap_fault() and __lock_page_or_retry().
3235 */ 3257 */
3236static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3258static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3237 unsigned long address, unsigned int flags) 3259 unsigned long address, unsigned int flags)
@@ -3313,6 +3335,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3313 return handle_pte_fault(mm, vma, address, pte, pmd, flags); 3335 return handle_pte_fault(mm, vma, address, pte, pmd, flags);
3314} 3336}
3315 3337
3338/*
3339 * By the time we get here, we already hold the mm semaphore
3340 *
3341 * The mmap_sem may have been released depending on flags and our
3342 * return value. See filemap_fault() and __lock_page_or_retry().
3343 */
3316int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3344int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3317 unsigned long address, unsigned int flags) 3345 unsigned long address, unsigned int flags)
3318{ 3346{
diff --git a/mm/mlock.c b/mm/mlock.c
index b1eb53634005..ce84cb0b83ef 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -210,12 +210,19 @@ out:
210 * @vma: target vma 210 * @vma: target vma
211 * @start: start address 211 * @start: start address
212 * @end: end address 212 * @end: end address
213 * @nonblocking:
213 * 214 *
214 * This takes care of making the pages present too. 215 * This takes care of making the pages present too.
215 * 216 *
216 * return 0 on success, negative error code on error. 217 * return 0 on success, negative error code on error.
217 * 218 *
218 * vma->vm_mm->mmap_sem must be held for at least read. 219 * vma->vm_mm->mmap_sem must be held.
220 *
221 * If @nonblocking is NULL, it may be held for read or write and will
222 * be unperturbed.
223 *
224 * If @nonblocking is non-NULL, it must held for read only and may be
225 * released. If it's released, *@nonblocking will be set to 0.
219 */ 226 */
220long __mlock_vma_pages_range(struct vm_area_struct *vma, 227long __mlock_vma_pages_range(struct vm_area_struct *vma,
221 unsigned long start, unsigned long end, int *nonblocking) 228 unsigned long start, unsigned long end, int *nonblocking)