diff options
author | Paul Cassella <cassella@cray.com> | 2014-08-06 19:07:24 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-06 21:01:20 -0400 |
commit | 9a95f3cf7b33d66fa64727cff8cd2f2a9d09f335 (patch) | |
tree | 14d7c178144b6de0b352b8e8d4b04ffdbae9a71c /mm | |
parent | 4ffeaf3560a52b4a69cc7909873d08c0ef5909d4 (diff) |
mm: describe mmap_sem rules for __lock_page_or_retry() and callers
Add a comment describing the circumstances in which
__lock_page_or_retry() will or will not release the mmap_sem when
returning 0.
Add comments to lock_page_or_retry()'s callers (filemap_fault(),
do_swap_page()) noting the impact on VM_FAULT_RETRY returns.
Add comments on up the call tree, particularly replacing the false "We
return with mmap_sem still held" comments.
Signed-off-by: Paul Cassella <cassella@cray.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 23 | ||||
-rw-r--r-- | mm/gup.c | 18 | ||||
-rw-r--r-- | mm/memory.c | 34 | ||||
-rw-r--r-- | mm/mlock.c | 9 |
4 files changed, 77 insertions, 7 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 7e85c8147e1b..af19a6b079f5 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -808,6 +808,17 @@ int __lock_page_killable(struct page *page) | |||
808 | } | 808 | } |
809 | EXPORT_SYMBOL_GPL(__lock_page_killable); | 809 | EXPORT_SYMBOL_GPL(__lock_page_killable); |
810 | 810 | ||
811 | /* | ||
812 | * Return values: | ||
813 | * 1 - page is locked; mmap_sem is still held. | ||
814 | * 0 - page is not locked. | ||
815 | * mmap_sem has been released (up_read()), unless flags had both | ||
816 | * FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in | ||
817 | * which case mmap_sem is still held. | ||
818 | * | ||
819 | * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1 | ||
820 | * with the page locked and the mmap_sem unperturbed. | ||
821 | */ | ||
811 | int __lock_page_or_retry(struct page *page, struct mm_struct *mm, | 822 | int __lock_page_or_retry(struct page *page, struct mm_struct *mm, |
812 | unsigned int flags) | 823 | unsigned int flags) |
813 | { | 824 | { |
@@ -1827,6 +1838,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma, | |||
1827 | * The goto's are kind of ugly, but this streamlines the normal case of having | 1838 | * The goto's are kind of ugly, but this streamlines the normal case of having |
1828 | * it in the page cache, and handles the special cases reasonably without | 1839 | * it in the page cache, and handles the special cases reasonably without |
1829 | * having a lot of duplicated code. | 1840 | * having a lot of duplicated code. |
1841 | * | ||
1842 | * vma->vm_mm->mmap_sem must be held on entry. | ||
1843 | * | ||
1844 | * If our return value has VM_FAULT_RETRY set, it's because | ||
1845 | * lock_page_or_retry() returned 0. | ||
1846 | * The mmap_sem has usually been released in this case. | ||
1847 | * See __lock_page_or_retry() for the exception. | ||
1848 | * | ||
1849 | * If our return value does not have VM_FAULT_RETRY set, the mmap_sem | ||
1850 | * has not been released. | ||
1851 | * | ||
1852 | * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set. | ||
1830 | */ | 1853 | */ |
1831 | int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1854 | int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1832 | { | 1855 | { |
@@ -258,6 +258,11 @@ unmap: | |||
258 | return ret; | 258 | return ret; |
259 | } | 259 | } |
260 | 260 | ||
261 | /* | ||
262 | * mmap_sem must be held on entry. If @nonblocking != NULL and | ||
263 | * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released. | ||
264 | * If it is, *@nonblocking will be set to 0 and -EBUSY returned. | ||
265 | */ | ||
261 | static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, | 266 | static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, |
262 | unsigned long address, unsigned int *flags, int *nonblocking) | 267 | unsigned long address, unsigned int *flags, int *nonblocking) |
263 | { | 268 | { |
@@ -373,7 +378,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) | |||
373 | * with a put_page() call when it is finished with. vmas will only | 378 | * with a put_page() call when it is finished with. vmas will only |
374 | * remain valid while mmap_sem is held. | 379 | * remain valid while mmap_sem is held. |
375 | * | 380 | * |
376 | * Must be called with mmap_sem held for read or write. | 381 | * Must be called with mmap_sem held. It may be released. See below. |
377 | * | 382 | * |
378 | * __get_user_pages walks a process's page tables and takes a reference to | 383 | * __get_user_pages walks a process's page tables and takes a reference to |
379 | * each struct page that each user address corresponds to at a given | 384 | * each struct page that each user address corresponds to at a given |
@@ -396,7 +401,14 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) | |||
396 | * | 401 | * |
397 | * If @nonblocking != NULL, __get_user_pages will not wait for disk IO | 402 | * If @nonblocking != NULL, __get_user_pages will not wait for disk IO |
398 | * or mmap_sem contention, and if waiting is needed to pin all pages, | 403 | * or mmap_sem contention, and if waiting is needed to pin all pages, |
399 | * *@nonblocking will be set to 0. | 404 | * *@nonblocking will be set to 0. Further, if @gup_flags does not |
405 | * include FOLL_NOWAIT, the mmap_sem will be released via up_read() in | ||
406 | * this case. | ||
407 | * | ||
408 | * A caller using such a combination of @nonblocking and @gup_flags | ||
409 | * must therefore hold the mmap_sem for reading only, and recognize | ||
410 | * when it's been released. Otherwise, it must be held for either | ||
411 | * reading or writing and will not be released. | ||
400 | * | 412 | * |
401 | * In most cases, get_user_pages or get_user_pages_fast should be used | 413 | * In most cases, get_user_pages or get_user_pages_fast should be used |
402 | * instead of __get_user_pages. __get_user_pages should be used only if | 414 | * instead of __get_user_pages. __get_user_pages should be used only if |
@@ -528,7 +540,7 @@ EXPORT_SYMBOL(__get_user_pages); | |||
528 | * such architectures, gup() will not be enough to make a subsequent access | 540 | * such architectures, gup() will not be enough to make a subsequent access |
529 | * succeed. | 541 | * succeed. |
530 | * | 542 | * |
531 | * This should be called with the mm_sem held for read. | 543 | * This has the same semantics wrt the @mm->mmap_sem as does filemap_fault(). |
532 | */ | 544 | */ |
533 | int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, | 545 | int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, |
534 | unsigned long address, unsigned int fault_flags) | 546 | unsigned long address, unsigned int fault_flags) |
diff --git a/mm/memory.c b/mm/memory.c index 7e131325bdf8..4d0a543f3bb3 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2399,7 +2399,10 @@ EXPORT_SYMBOL(unmap_mapping_range); | |||
2399 | /* | 2399 | /* |
2400 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | 2400 | * We enter with non-exclusive mmap_sem (to exclude vma changes, |
2401 | * but allow concurrent faults), and pte mapped but not yet locked. | 2401 | * but allow concurrent faults), and pte mapped but not yet locked. |
2402 | * We return with mmap_sem still held, but pte unmapped and unlocked. | 2402 | * We return with pte unmapped and unlocked. |
2403 | * | ||
2404 | * We return with the mmap_sem locked or unlocked in the same cases | ||
2405 | * as does filemap_fault(). | ||
2403 | */ | 2406 | */ |
2404 | static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | 2407 | static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, |
2405 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 2408 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
@@ -2688,6 +2691,11 @@ oom: | |||
2688 | return VM_FAULT_OOM; | 2691 | return VM_FAULT_OOM; |
2689 | } | 2692 | } |
2690 | 2693 | ||
2694 | /* | ||
2695 | * The mmap_sem must have been held on entry, and may have been | ||
2696 | * released depending on flags and vma->vm_ops->fault() return value. | ||
2697 | * See filemap_fault() and __lock_page_retry(). | ||
2698 | */ | ||
2691 | static int __do_fault(struct vm_area_struct *vma, unsigned long address, | 2699 | static int __do_fault(struct vm_area_struct *vma, unsigned long address, |
2692 | pgoff_t pgoff, unsigned int flags, struct page **page) | 2700 | pgoff_t pgoff, unsigned int flags, struct page **page) |
2693 | { | 2701 | { |
@@ -3016,6 +3024,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3016 | return ret; | 3024 | return ret; |
3017 | } | 3025 | } |
3018 | 3026 | ||
3027 | /* | ||
3028 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | ||
3029 | * but allow concurrent faults). | ||
3030 | * The mmap_sem may have been released depending on flags and our | ||
3031 | * return value. See filemap_fault() and __lock_page_or_retry(). | ||
3032 | */ | ||
3019 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 3033 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
3020 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 3034 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
3021 | unsigned int flags, pte_t orig_pte) | 3035 | unsigned int flags, pte_t orig_pte) |
@@ -3040,7 +3054,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3040 | * | 3054 | * |
3041 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | 3055 | * We enter with non-exclusive mmap_sem (to exclude vma changes, |
3042 | * but allow concurrent faults), and pte mapped but not yet locked. | 3056 | * but allow concurrent faults), and pte mapped but not yet locked. |
3043 | * We return with mmap_sem still held, but pte unmapped and unlocked. | 3057 | * We return with pte unmapped and unlocked. |
3058 | * The mmap_sem may have been released depending on flags and our | ||
3059 | * return value. See filemap_fault() and __lock_page_or_retry(). | ||
3044 | */ | 3060 | */ |
3045 | static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 3061 | static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
3046 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 3062 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
@@ -3172,7 +3188,10 @@ out: | |||
3172 | * | 3188 | * |
3173 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | 3189 | * We enter with non-exclusive mmap_sem (to exclude vma changes, |
3174 | * but allow concurrent faults), and pte mapped but not yet locked. | 3190 | * but allow concurrent faults), and pte mapped but not yet locked. |
3175 | * We return with mmap_sem still held, but pte unmapped and unlocked. | 3191 | * We return with pte unmapped and unlocked. |
3192 | * | ||
3193 | * The mmap_sem may have been released depending on flags and our | ||
3194 | * return value. See filemap_fault() and __lock_page_or_retry(). | ||
3176 | */ | 3195 | */ |
3177 | static int handle_pte_fault(struct mm_struct *mm, | 3196 | static int handle_pte_fault(struct mm_struct *mm, |
3178 | struct vm_area_struct *vma, unsigned long address, | 3197 | struct vm_area_struct *vma, unsigned long address, |
@@ -3232,6 +3251,9 @@ unlock: | |||
3232 | 3251 | ||
3233 | /* | 3252 | /* |
3234 | * By the time we get here, we already hold the mm semaphore | 3253 | * By the time we get here, we already hold the mm semaphore |
3254 | * | ||
3255 | * The mmap_sem may have been released depending on flags and our | ||
3256 | * return value. See filemap_fault() and __lock_page_or_retry(). | ||
3235 | */ | 3257 | */ |
3236 | static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 3258 | static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
3237 | unsigned long address, unsigned int flags) | 3259 | unsigned long address, unsigned int flags) |
@@ -3313,6 +3335,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3313 | return handle_pte_fault(mm, vma, address, pte, pmd, flags); | 3335 | return handle_pte_fault(mm, vma, address, pte, pmd, flags); |
3314 | } | 3336 | } |
3315 | 3337 | ||
3338 | /* | ||
3339 | * By the time we get here, we already hold the mm semaphore | ||
3340 | * | ||
3341 | * The mmap_sem may have been released depending on flags and our | ||
3342 | * return value. See filemap_fault() and __lock_page_or_retry(). | ||
3343 | */ | ||
3316 | int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 3344 | int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
3317 | unsigned long address, unsigned int flags) | 3345 | unsigned long address, unsigned int flags) |
3318 | { | 3346 | { |
diff --git a/mm/mlock.c b/mm/mlock.c index b1eb53634005..ce84cb0b83ef 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -210,12 +210,19 @@ out: | |||
210 | * @vma: target vma | 210 | * @vma: target vma |
211 | * @start: start address | 211 | * @start: start address |
212 | * @end: end address | 212 | * @end: end address |
213 | * @nonblocking: | ||
213 | * | 214 | * |
214 | * This takes care of making the pages present too. | 215 | * This takes care of making the pages present too. |
215 | * | 216 | * |
216 | * return 0 on success, negative error code on error. | 217 | * return 0 on success, negative error code on error. |
217 | * | 218 | * |
218 | * vma->vm_mm->mmap_sem must be held for at least read. | 219 | * vma->vm_mm->mmap_sem must be held. |
220 | * | ||
221 | * If @nonblocking is NULL, it may be held for read or write and will | ||
222 | * be unperturbed. | ||
223 | * | ||
224 | * If @nonblocking is non-NULL, it must held for read only and may be | ||
225 | * released. If it's released, *@nonblocking will be set to 0. | ||
219 | */ | 226 | */ |
220 | long __mlock_vma_pages_range(struct vm_area_struct *vma, | 227 | long __mlock_vma_pages_range(struct vm_area_struct *vma, |
221 | unsigned long start, unsigned long end, int *nonblocking) | 228 | unsigned long start, unsigned long end, int *nonblocking) |