diff options
| -rw-r--r-- | arch/x86/mm/fault.c | 3 | ||||
| -rw-r--r-- | include/linux/pagemap.h | 3 | ||||
| -rw-r--r-- | mm/filemap.c | 23 | ||||
| -rw-r--r-- | mm/gup.c | 18 | ||||
| -rw-r--r-- | mm/memory.c | 34 | ||||
| -rw-r--r-- | mm/mlock.c | 9 |
6 files changed, 82 insertions, 8 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 1dbade870f90..a24194681513 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
| @@ -1218,7 +1218,8 @@ good_area: | |||
| 1218 | /* | 1218 | /* |
| 1219 | * If for any reason at all we couldn't handle the fault, | 1219 | * If for any reason at all we couldn't handle the fault, |
| 1220 | * make sure we exit gracefully rather than endlessly redo | 1220 | * make sure we exit gracefully rather than endlessly redo |
| 1221 | * the fault: | 1221 | * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if |
| 1222 | * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked. | ||
| 1222 | */ | 1223 | */ |
| 1223 | fault = handle_mm_fault(mm, vma, address, flags); | 1224 | fault = handle_mm_fault(mm, vma, address, flags); |
| 1224 | 1225 | ||
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e1474ae18c88..3df8c7db7a4e 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h | |||
| @@ -484,6 +484,9 @@ static inline int lock_page_killable(struct page *page) | |||
| 484 | /* | 484 | /* |
| 485 | * lock_page_or_retry - Lock the page, unless this would block and the | 485 | * lock_page_or_retry - Lock the page, unless this would block and the |
| 486 | * caller indicated that it can handle a retry. | 486 | * caller indicated that it can handle a retry. |
| 487 | * | ||
| 488 | * Return value and mmap_sem implications depend on flags; see | ||
| 489 | * __lock_page_or_retry(). | ||
| 487 | */ | 490 | */ |
| 488 | static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, | 491 | static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, |
| 489 | unsigned int flags) | 492 | unsigned int flags) |
diff --git a/mm/filemap.c b/mm/filemap.c index 7e85c8147e1b..af19a6b079f5 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -808,6 +808,17 @@ int __lock_page_killable(struct page *page) | |||
| 808 | } | 808 | } |
| 809 | EXPORT_SYMBOL_GPL(__lock_page_killable); | 809 | EXPORT_SYMBOL_GPL(__lock_page_killable); |
| 810 | 810 | ||
| 811 | /* | ||
| 812 | * Return values: | ||
| 813 | * 1 - page is locked; mmap_sem is still held. | ||
| 814 | * 0 - page is not locked. | ||
| 815 | * mmap_sem has been released (up_read()), unless flags had both | ||
| 816 | * FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in | ||
| 817 | * which case mmap_sem is still held. | ||
| 818 | * | ||
| 819 | * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1 | ||
| 820 | * with the page locked and the mmap_sem unperturbed. | ||
| 821 | */ | ||
| 811 | int __lock_page_or_retry(struct page *page, struct mm_struct *mm, | 822 | int __lock_page_or_retry(struct page *page, struct mm_struct *mm, |
| 812 | unsigned int flags) | 823 | unsigned int flags) |
| 813 | { | 824 | { |
| @@ -1827,6 +1838,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma, | |||
| 1827 | * The goto's are kind of ugly, but this streamlines the normal case of having | 1838 | * The goto's are kind of ugly, but this streamlines the normal case of having |
| 1828 | * it in the page cache, and handles the special cases reasonably without | 1839 | * it in the page cache, and handles the special cases reasonably without |
| 1829 | * having a lot of duplicated code. | 1840 | * having a lot of duplicated code. |
| 1841 | * | ||
| 1842 | * vma->vm_mm->mmap_sem must be held on entry. | ||
| 1843 | * | ||
| 1844 | * If our return value has VM_FAULT_RETRY set, it's because | ||
| 1845 | * lock_page_or_retry() returned 0. | ||
| 1846 | * The mmap_sem has usually been released in this case. | ||
| 1847 | * See __lock_page_or_retry() for the exception. | ||
| 1848 | * | ||
| 1849 | * If our return value does not have VM_FAULT_RETRY set, the mmap_sem | ||
| 1850 | * has not been released. | ||
| 1851 | * | ||
| 1852 | * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set. | ||
| 1830 | */ | 1853 | */ |
| 1831 | int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1854 | int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
| 1832 | { | 1855 | { |
| @@ -258,6 +258,11 @@ unmap: | |||
| 258 | return ret; | 258 | return ret; |
| 259 | } | 259 | } |
| 260 | 260 | ||
| 261 | /* | ||
| 262 | * mmap_sem must be held on entry. If @nonblocking != NULL and | ||
| 263 | * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released. | ||
| 264 | * If it is, *@nonblocking will be set to 0 and -EBUSY returned. | ||
| 265 | */ | ||
| 261 | static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, | 266 | static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, |
| 262 | unsigned long address, unsigned int *flags, int *nonblocking) | 267 | unsigned long address, unsigned int *flags, int *nonblocking) |
| 263 | { | 268 | { |
| @@ -373,7 +378,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) | |||
| 373 | * with a put_page() call when it is finished with. vmas will only | 378 | * with a put_page() call when it is finished with. vmas will only |
| 374 | * remain valid while mmap_sem is held. | 379 | * remain valid while mmap_sem is held. |
| 375 | * | 380 | * |
| 376 | * Must be called with mmap_sem held for read or write. | 381 | * Must be called with mmap_sem held. It may be released. See below. |
| 377 | * | 382 | * |
| 378 | * __get_user_pages walks a process's page tables and takes a reference to | 383 | * __get_user_pages walks a process's page tables and takes a reference to |
| 379 | * each struct page that each user address corresponds to at a given | 384 | * each struct page that each user address corresponds to at a given |
| @@ -396,7 +401,14 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) | |||
| 396 | * | 401 | * |
| 397 | * If @nonblocking != NULL, __get_user_pages will not wait for disk IO | 402 | * If @nonblocking != NULL, __get_user_pages will not wait for disk IO |
| 398 | * or mmap_sem contention, and if waiting is needed to pin all pages, | 403 | * or mmap_sem contention, and if waiting is needed to pin all pages, |
| 399 | * *@nonblocking will be set to 0. | 404 | * *@nonblocking will be set to 0. Further, if @gup_flags does not |
| 405 | * include FOLL_NOWAIT, the mmap_sem will be released via up_read() in | ||
| 406 | * this case. | ||
| 407 | * | ||
| 408 | * A caller using such a combination of @nonblocking and @gup_flags | ||
| 409 | * must therefore hold the mmap_sem for reading only, and recognize | ||
| 410 | * when it's been released. Otherwise, it must be held for either | ||
| 411 | * reading or writing and will not be released. | ||
| 400 | * | 412 | * |
| 401 | * In most cases, get_user_pages or get_user_pages_fast should be used | 413 | * In most cases, get_user_pages or get_user_pages_fast should be used |
| 402 | * instead of __get_user_pages. __get_user_pages should be used only if | 414 | * instead of __get_user_pages. __get_user_pages should be used only if |
| @@ -528,7 +540,7 @@ EXPORT_SYMBOL(__get_user_pages); | |||
| 528 | * such architectures, gup() will not be enough to make a subsequent access | 540 | * such architectures, gup() will not be enough to make a subsequent access |
| 529 | * succeed. | 541 | * succeed. |
| 530 | * | 542 | * |
| 531 | * This should be called with the mm_sem held for read. | 543 | * This has the same semantics wrt the @mm->mmap_sem as does filemap_fault(). |
| 532 | */ | 544 | */ |
| 533 | int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, | 545 | int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, |
| 534 | unsigned long address, unsigned int fault_flags) | 546 | unsigned long address, unsigned int fault_flags) |
diff --git a/mm/memory.c b/mm/memory.c index 7e131325bdf8..4d0a543f3bb3 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -2399,7 +2399,10 @@ EXPORT_SYMBOL(unmap_mapping_range); | |||
| 2399 | /* | 2399 | /* |
| 2400 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | 2400 | * We enter with non-exclusive mmap_sem (to exclude vma changes, |
| 2401 | * but allow concurrent faults), and pte mapped but not yet locked. | 2401 | * but allow concurrent faults), and pte mapped but not yet locked. |
| 2402 | * We return with mmap_sem still held, but pte unmapped and unlocked. | 2402 | * We return with pte unmapped and unlocked. |
| 2403 | * | ||
| 2404 | * We return with the mmap_sem locked or unlocked in the same cases | ||
| 2405 | * as does filemap_fault(). | ||
| 2403 | */ | 2406 | */ |
| 2404 | static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | 2407 | static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, |
| 2405 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 2408 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
| @@ -2688,6 +2691,11 @@ oom: | |||
| 2688 | return VM_FAULT_OOM; | 2691 | return VM_FAULT_OOM; |
| 2689 | } | 2692 | } |
| 2690 | 2693 | ||
| 2694 | /* | ||
| 2695 | * The mmap_sem must have been held on entry, and may have been | ||
| 2696 | * released depending on flags and vma->vm_ops->fault() return value. | ||
| 2697 | * See filemap_fault() and __lock_page_retry(). | ||
| 2698 | */ | ||
| 2691 | static int __do_fault(struct vm_area_struct *vma, unsigned long address, | 2699 | static int __do_fault(struct vm_area_struct *vma, unsigned long address, |
| 2692 | pgoff_t pgoff, unsigned int flags, struct page **page) | 2700 | pgoff_t pgoff, unsigned int flags, struct page **page) |
| 2693 | { | 2701 | { |
| @@ -3016,6 +3024,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3016 | return ret; | 3024 | return ret; |
| 3017 | } | 3025 | } |
| 3018 | 3026 | ||
| 3027 | /* | ||
| 3028 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | ||
| 3029 | * but allow concurrent faults). | ||
| 3030 | * The mmap_sem may have been released depending on flags and our | ||
| 3031 | * return value. See filemap_fault() and __lock_page_or_retry(). | ||
| 3032 | */ | ||
| 3019 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 3033 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
| 3020 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 3034 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
| 3021 | unsigned int flags, pte_t orig_pte) | 3035 | unsigned int flags, pte_t orig_pte) |
| @@ -3040,7 +3054,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3040 | * | 3054 | * |
| 3041 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | 3055 | * We enter with non-exclusive mmap_sem (to exclude vma changes, |
| 3042 | * but allow concurrent faults), and pte mapped but not yet locked. | 3056 | * but allow concurrent faults), and pte mapped but not yet locked. |
| 3043 | * We return with mmap_sem still held, but pte unmapped and unlocked. | 3057 | * We return with pte unmapped and unlocked. |
| 3058 | * The mmap_sem may have been released depending on flags and our | ||
| 3059 | * return value. See filemap_fault() and __lock_page_or_retry(). | ||
| 3044 | */ | 3060 | */ |
| 3045 | static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 3061 | static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
| 3046 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 3062 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
| @@ -3172,7 +3188,10 @@ out: | |||
| 3172 | * | 3188 | * |
| 3173 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | 3189 | * We enter with non-exclusive mmap_sem (to exclude vma changes, |
| 3174 | * but allow concurrent faults), and pte mapped but not yet locked. | 3190 | * but allow concurrent faults), and pte mapped but not yet locked. |
| 3175 | * We return with mmap_sem still held, but pte unmapped and unlocked. | 3191 | * We return with pte unmapped and unlocked. |
| 3192 | * | ||
| 3193 | * The mmap_sem may have been released depending on flags and our | ||
| 3194 | * return value. See filemap_fault() and __lock_page_or_retry(). | ||
| 3176 | */ | 3195 | */ |
| 3177 | static int handle_pte_fault(struct mm_struct *mm, | 3196 | static int handle_pte_fault(struct mm_struct *mm, |
| 3178 | struct vm_area_struct *vma, unsigned long address, | 3197 | struct vm_area_struct *vma, unsigned long address, |
| @@ -3232,6 +3251,9 @@ unlock: | |||
| 3232 | 3251 | ||
| 3233 | /* | 3252 | /* |
| 3234 | * By the time we get here, we already hold the mm semaphore | 3253 | * By the time we get here, we already hold the mm semaphore |
| 3254 | * | ||
| 3255 | * The mmap_sem may have been released depending on flags and our | ||
| 3256 | * return value. See filemap_fault() and __lock_page_or_retry(). | ||
| 3235 | */ | 3257 | */ |
| 3236 | static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 3258 | static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
| 3237 | unsigned long address, unsigned int flags) | 3259 | unsigned long address, unsigned int flags) |
| @@ -3313,6 +3335,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3313 | return handle_pte_fault(mm, vma, address, pte, pmd, flags); | 3335 | return handle_pte_fault(mm, vma, address, pte, pmd, flags); |
| 3314 | } | 3336 | } |
| 3315 | 3337 | ||
| 3338 | /* | ||
| 3339 | * By the time we get here, we already hold the mm semaphore | ||
| 3340 | * | ||
| 3341 | * The mmap_sem may have been released depending on flags and our | ||
| 3342 | * return value. See filemap_fault() and __lock_page_or_retry(). | ||
| 3343 | */ | ||
| 3316 | int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 3344 | int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
| 3317 | unsigned long address, unsigned int flags) | 3345 | unsigned long address, unsigned int flags) |
| 3318 | { | 3346 | { |
diff --git a/mm/mlock.c b/mm/mlock.c index b1eb53634005..ce84cb0b83ef 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
| @@ -210,12 +210,19 @@ out: | |||
| 210 | * @vma: target vma | 210 | * @vma: target vma |
| 211 | * @start: start address | 211 | * @start: start address |
| 212 | * @end: end address | 212 | * @end: end address |
| 213 | * @nonblocking: | ||
| 213 | * | 214 | * |
| 214 | * This takes care of making the pages present too. | 215 | * This takes care of making the pages present too. |
| 215 | * | 216 | * |
| 216 | * return 0 on success, negative error code on error. | 217 | * return 0 on success, negative error code on error. |
| 217 | * | 218 | * |
| 218 | * vma->vm_mm->mmap_sem must be held for at least read. | 219 | * vma->vm_mm->mmap_sem must be held. |
| 220 | * | ||
| 221 | * If @nonblocking is NULL, it may be held for read or write and will | ||
| 222 | * be unperturbed. | ||
| 223 | * | ||
| 224 | * If @nonblocking is non-NULL, it must held for read only and may be | ||
| 225 | * released. If it's released, *@nonblocking will be set to 0. | ||
| 219 | */ | 226 | */ |
| 220 | long __mlock_vma_pages_range(struct vm_area_struct *vma, | 227 | long __mlock_vma_pages_range(struct vm_area_struct *vma, |
| 221 | unsigned long start, unsigned long end, int *nonblocking) | 228 | unsigned long start, unsigned long end, int *nonblocking) |
