aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDominik Dingel <dingel@linux.vnet.ibm.com>2016-01-15 19:57:04 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-15 20:56:32 -0500
commit4a9e1cda274893eca7d178d7dc265503ccb9d87a (patch)
treeef92b43a0c3c38edeb554beb64ca8962d795eb3b /mm
parentc046c321cb4a0bdac9fb922db3859893ca556d27 (diff)
mm: bring in additional flag for fixup_user_fault to signal unlock
During Jason's work with postcopy migration support for s390 a problem regarding gmap faults was discovered. The gmap code will call fixup_user_fault which will end up always in handle_mm_fault. Till now we never cared about retries, but as the userfaultfd code kind of relies on it. this needs some fix. This patchset does not take care of the futex code. I will now look closer at this. This patch (of 2): With the introduction of userfaultfd, kvm on s390 needs fixup_user_fault to pass in FAULT_FLAG_ALLOW_RETRY and give feedback if during the faulting we ever unlocked mmap_sem. This patch brings in the logic to handle retries as well as it cleans up the current documentation. fixup_user_fault was not having the same semantics as filemap_fault. It never indicated if a retry happened and so a caller wasn't able to handle that case. So we now changed the behaviour to always retry a locked mmap_sem. Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com> Reviewed-by: Andrea Arcangeli <aarcange@redhat.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: "Jason J. Herne" <jjherne@linux.vnet.ibm.com> Cc: David Rientjes <rientjes@google.com> Cc: Eric B Munson <emunson@akamai.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Dominik Dingel <dingel@linux.vnet.ibm.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/gup.c30
1 files changed, 25 insertions, 5 deletions
diff --git a/mm/gup.c b/mm/gup.c
index aa21c4b865a5..b64a36175884 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -618,6 +618,8 @@ EXPORT_SYMBOL(__get_user_pages);
618 * @mm: mm_struct of target mm 618 * @mm: mm_struct of target mm
619 * @address: user address 619 * @address: user address
620 * @fault_flags:flags to pass down to handle_mm_fault() 620 * @fault_flags:flags to pass down to handle_mm_fault()
621 * @unlocked: did we unlock the mmap_sem while retrying, maybe NULL if caller
622 * does not allow retry
621 * 623 *
622 * This is meant to be called in the specific scenario where for locking reasons 624 * This is meant to be called in the specific scenario where for locking reasons
623 * we try to access user memory in atomic context (within a pagefault_disable() 625 * we try to access user memory in atomic context (within a pagefault_disable()
@@ -629,22 +631,28 @@ EXPORT_SYMBOL(__get_user_pages);
629 * The main difference with get_user_pages() is that this function will 631 * The main difference with get_user_pages() is that this function will
630 * unconditionally call handle_mm_fault() which will in turn perform all the 632 * unconditionally call handle_mm_fault() which will in turn perform all the
631 * necessary SW fixup of the dirty and young bits in the PTE, while 633 * necessary SW fixup of the dirty and young bits in the PTE, while
632 * handle_mm_fault() only guarantees to update these in the struct page. 634 * get_user_pages() only guarantees to update these in the struct page.
633 * 635 *
634 * This is important for some architectures where those bits also gate the 636 * This is important for some architectures where those bits also gate the
635 * access permission to the page because they are maintained in software. On 637 * access permission to the page because they are maintained in software. On
636 * such architectures, gup() will not be enough to make a subsequent access 638 * such architectures, gup() will not be enough to make a subsequent access
637 * succeed. 639 * succeed.
638 * 640 *
639 * This has the same semantics wrt the @mm->mmap_sem as does filemap_fault(). 641 * This function will not return with an unlocked mmap_sem. So it has not the
642 * same semantics wrt the @mm->mmap_sem as does filemap_fault().
640 */ 643 */
641int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, 644int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
642 unsigned long address, unsigned int fault_flags) 645 unsigned long address, unsigned int fault_flags,
646 bool *unlocked)
643{ 647{
644 struct vm_area_struct *vma; 648 struct vm_area_struct *vma;
645 vm_flags_t vm_flags; 649 vm_flags_t vm_flags;
646 int ret; 650 int ret, major = 0;
651
652 if (unlocked)
653 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
647 654
655retry:
648 vma = find_extend_vma(mm, address); 656 vma = find_extend_vma(mm, address);
649 if (!vma || address < vma->vm_start) 657 if (!vma || address < vma->vm_start)
650 return -EFAULT; 658 return -EFAULT;
@@ -654,6 +662,7 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
654 return -EFAULT; 662 return -EFAULT;
655 663
656 ret = handle_mm_fault(mm, vma, address, fault_flags); 664 ret = handle_mm_fault(mm, vma, address, fault_flags);
665 major |= ret & VM_FAULT_MAJOR;
657 if (ret & VM_FAULT_ERROR) { 666 if (ret & VM_FAULT_ERROR) {
658 if (ret & VM_FAULT_OOM) 667 if (ret & VM_FAULT_OOM)
659 return -ENOMEM; 668 return -ENOMEM;
@@ -663,8 +672,19 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
663 return -EFAULT; 672 return -EFAULT;
664 BUG(); 673 BUG();
665 } 674 }
675
676 if (ret & VM_FAULT_RETRY) {
677 down_read(&mm->mmap_sem);
678 if (!(fault_flags & FAULT_FLAG_TRIED)) {
679 *unlocked = true;
680 fault_flags &= ~FAULT_FLAG_ALLOW_RETRY;
681 fault_flags |= FAULT_FLAG_TRIED;
682 goto retry;
683 }
684 }
685
666 if (tsk) { 686 if (tsk) {
667 if (ret & VM_FAULT_MAJOR) 687 if (major)
668 tsk->maj_flt++; 688 tsk->maj_flt++;
669 else 689 else
670 tsk->min_flt++; 690 tsk->min_flt++;