aboutsummaryrefslogtreecommitdiffstats
path: root/mm/gup.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/gup.c')
-rw-r--r--mm/gup.c246
1 files changed, 205 insertions, 41 deletions
diff --git a/mm/gup.c b/mm/gup.c
index a900759cc807..a6e24e246f86 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -55,7 +55,7 @@ retry:
55 */ 55 */
56 if (likely(!(flags & FOLL_MIGRATION))) 56 if (likely(!(flags & FOLL_MIGRATION)))
57 goto no_page; 57 goto no_page;
58 if (pte_none(pte) || pte_file(pte)) 58 if (pte_none(pte))
59 goto no_page; 59 goto no_page;
60 entry = pte_to_swp_entry(pte); 60 entry = pte_to_swp_entry(pte);
61 if (!is_migration_entry(entry)) 61 if (!is_migration_entry(entry))
@@ -64,7 +64,7 @@ retry:
64 migration_entry_wait(mm, pmd, address); 64 migration_entry_wait(mm, pmd, address);
65 goto retry; 65 goto retry;
66 } 66 }
67 if ((flags & FOLL_NUMA) && pte_numa(pte)) 67 if ((flags & FOLL_NUMA) && pte_protnone(pte))
68 goto no_page; 68 goto no_page;
69 if ((flags & FOLL_WRITE) && !pte_write(pte)) { 69 if ((flags & FOLL_WRITE) && !pte_write(pte)) {
70 pte_unmap_unlock(ptep, ptl); 70 pte_unmap_unlock(ptep, ptl);
@@ -167,10 +167,10 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
167 if (pud_none(*pud)) 167 if (pud_none(*pud))
168 return no_page_table(vma, flags); 168 return no_page_table(vma, flags);
169 if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) { 169 if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
170 if (flags & FOLL_GET) 170 page = follow_huge_pud(mm, address, pud, flags);
171 return NULL; 171 if (page)
172 page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE); 172 return page;
173 return page; 173 return no_page_table(vma, flags);
174 } 174 }
175 if (unlikely(pud_bad(*pud))) 175 if (unlikely(pud_bad(*pud)))
176 return no_page_table(vma, flags); 176 return no_page_table(vma, flags);
@@ -179,21 +179,12 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
179 if (pmd_none(*pmd)) 179 if (pmd_none(*pmd))
180 return no_page_table(vma, flags); 180 return no_page_table(vma, flags);
181 if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) { 181 if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
182 page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); 182 page = follow_huge_pmd(mm, address, pmd, flags);
183 if (flags & FOLL_GET) { 183 if (page)
184 /* 184 return page;
185 * Refcount on tail pages are not well-defined and 185 return no_page_table(vma, flags);
186 * shouldn't be taken. The caller should handle a NULL
187 * return when trying to follow tail pages.
188 */
189 if (PageHead(page))
190 get_page(page);
191 else
192 page = NULL;
193 }
194 return page;
195 } 186 }
196 if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) 187 if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
197 return no_page_table(vma, flags); 188 return no_page_table(vma, flags);
198 if (pmd_trans_huge(*pmd)) { 189 if (pmd_trans_huge(*pmd)) {
199 if (flags & FOLL_SPLIT) { 190 if (flags & FOLL_SPLIT) {
@@ -296,7 +287,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
296 return -ENOMEM; 287 return -ENOMEM;
297 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) 288 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
298 return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT; 289 return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
299 if (ret & VM_FAULT_SIGBUS) 290 if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
300 return -EFAULT; 291 return -EFAULT;
301 BUG(); 292 BUG();
302 } 293 }
@@ -571,7 +562,7 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
571 return -ENOMEM; 562 return -ENOMEM;
572 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) 563 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
573 return -EHWPOISON; 564 return -EHWPOISON;
574 if (ret & VM_FAULT_SIGBUS) 565 if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
575 return -EFAULT; 566 return -EFAULT;
576 BUG(); 567 BUG();
577 } 568 }
@@ -584,6 +575,185 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
584 return 0; 575 return 0;
585} 576}
586 577
578static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
579 struct mm_struct *mm,
580 unsigned long start,
581 unsigned long nr_pages,
582 int write, int force,
583 struct page **pages,
584 struct vm_area_struct **vmas,
585 int *locked, bool notify_drop,
586 unsigned int flags)
587{
588 long ret, pages_done;
589 bool lock_dropped;
590
591 if (locked) {
592 /* if VM_FAULT_RETRY can be returned, vmas become invalid */
593 BUG_ON(vmas);
594 /* check caller initialized locked */
595 BUG_ON(*locked != 1);
596 }
597
598 if (pages)
599 flags |= FOLL_GET;
600 if (write)
601 flags |= FOLL_WRITE;
602 if (force)
603 flags |= FOLL_FORCE;
604
605 pages_done = 0;
606 lock_dropped = false;
607 for (;;) {
608 ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
609 vmas, locked);
610 if (!locked)
611 /* VM_FAULT_RETRY couldn't trigger, bypass */
612 return ret;
613
614 /* VM_FAULT_RETRY cannot return errors */
615 if (!*locked) {
616 BUG_ON(ret < 0);
617 BUG_ON(ret >= nr_pages);
618 }
619
620 if (!pages)
621 /* If it's a prefault don't insist harder */
622 return ret;
623
624 if (ret > 0) {
625 nr_pages -= ret;
626 pages_done += ret;
627 if (!nr_pages)
628 break;
629 }
630 if (*locked) {
631 /* VM_FAULT_RETRY didn't trigger */
632 if (!pages_done)
633 pages_done = ret;
634 break;
635 }
636 /* VM_FAULT_RETRY triggered, so seek to the faulting offset */
637 pages += ret;
638 start += ret << PAGE_SHIFT;
639
640 /*
641 * Repeat on the address that fired VM_FAULT_RETRY
642 * without FAULT_FLAG_ALLOW_RETRY but with
643 * FAULT_FLAG_TRIED.
644 */
645 *locked = 1;
646 lock_dropped = true;
647 down_read(&mm->mmap_sem);
648 ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
649 pages, NULL, NULL);
650 if (ret != 1) {
651 BUG_ON(ret > 1);
652 if (!pages_done)
653 pages_done = ret;
654 break;
655 }
656 nr_pages--;
657 pages_done++;
658 if (!nr_pages)
659 break;
660 pages++;
661 start += PAGE_SIZE;
662 }
663 if (notify_drop && lock_dropped && *locked) {
664 /*
665 * We must let the caller know we temporarily dropped the lock
666 * and so the critical section protected by it was lost.
667 */
668 up_read(&mm->mmap_sem);
669 *locked = 0;
670 }
671 return pages_done;
672}
673
674/*
675 * We can leverage the VM_FAULT_RETRY functionality in the page fault
676 * paths better by using either get_user_pages_locked() or
677 * get_user_pages_unlocked().
678 *
679 * get_user_pages_locked() is suitable to replace the form:
680 *
681 * down_read(&mm->mmap_sem);
682 * do_something()
683 * get_user_pages(tsk, mm, ..., pages, NULL);
684 * up_read(&mm->mmap_sem);
685 *
686 * to:
687 *
688 * int locked = 1;
689 * down_read(&mm->mmap_sem);
690 * do_something()
691 * get_user_pages_locked(tsk, mm, ..., pages, &locked);
692 * if (locked)
693 * up_read(&mm->mmap_sem);
694 */
695long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm,
696 unsigned long start, unsigned long nr_pages,
697 int write, int force, struct page **pages,
698 int *locked)
699{
700 return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
701 pages, NULL, locked, true, FOLL_TOUCH);
702}
703EXPORT_SYMBOL(get_user_pages_locked);
704
705/*
706 * Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows to
707 * pass additional gup_flags as last parameter (like FOLL_HWPOISON).
708 *
709 * NOTE: here FOLL_TOUCH is not set implicitly and must be set by the
710 * caller if required (just like with __get_user_pages). "FOLL_GET",
711 * "FOLL_WRITE" and "FOLL_FORCE" are set implicitly as needed
712 * according to the parameters "pages", "write", "force"
713 * respectively.
714 */
715__always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
716 unsigned long start, unsigned long nr_pages,
717 int write, int force, struct page **pages,
718 unsigned int gup_flags)
719{
720 long ret;
721 int locked = 1;
722 down_read(&mm->mmap_sem);
723 ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
724 pages, NULL, &locked, false, gup_flags);
725 if (locked)
726 up_read(&mm->mmap_sem);
727 return ret;
728}
729EXPORT_SYMBOL(__get_user_pages_unlocked);
730
731/*
732 * get_user_pages_unlocked() is suitable to replace the form:
733 *
734 * down_read(&mm->mmap_sem);
735 * get_user_pages(tsk, mm, ..., pages, NULL);
736 * up_read(&mm->mmap_sem);
737 *
738 * with:
739 *
740 * get_user_pages_unlocked(tsk, mm, ..., pages);
741 *
742 * It is functionally equivalent to get_user_pages_fast so
743 * get_user_pages_fast should be used instead, if the two parameters
744 * "tsk" and "mm" are respectively equal to current and current->mm,
745 * or if "force" shall be set to 1 (get_user_pages_fast misses the
746 * "force" parameter).
747 */
748long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
749 unsigned long start, unsigned long nr_pages,
750 int write, int force, struct page **pages)
751{
752 return __get_user_pages_unlocked(tsk, mm, start, nr_pages, write,
753 force, pages, FOLL_TOUCH);
754}
755EXPORT_SYMBOL(get_user_pages_unlocked);
756
587/* 757/*
588 * get_user_pages() - pin user pages in memory 758 * get_user_pages() - pin user pages in memory
589 * @tsk: the task_struct to use for page fault accounting, or 759 * @tsk: the task_struct to use for page fault accounting, or
@@ -633,22 +803,18 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
633 * use the correct cache flushing APIs. 803 * use the correct cache flushing APIs.
634 * 804 *
635 * See also get_user_pages_fast, for performance critical applications. 805 * See also get_user_pages_fast, for performance critical applications.
806 *
807 * get_user_pages should be phased out in favor of
808 * get_user_pages_locked|unlocked or get_user_pages_fast. Nothing
809 * should use get_user_pages because it cannot pass
810 * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault.
636 */ 811 */
637long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 812long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
638 unsigned long start, unsigned long nr_pages, int write, 813 unsigned long start, unsigned long nr_pages, int write,
639 int force, struct page **pages, struct vm_area_struct **vmas) 814 int force, struct page **pages, struct vm_area_struct **vmas)
640{ 815{
641 int flags = FOLL_TOUCH; 816 return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
642 817 pages, vmas, NULL, false, FOLL_TOUCH);
643 if (pages)
644 flags |= FOLL_GET;
645 if (write)
646 flags |= FOLL_WRITE;
647 if (force)
648 flags |= FOLL_FORCE;
649
650 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
651 NULL);
652} 818}
653EXPORT_SYMBOL(get_user_pages); 819EXPORT_SYMBOL(get_user_pages);
654 820
@@ -740,10 +906,10 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
740 906
741 /* 907 /*
742 * Similar to the PMD case below, NUMA hinting must take slow 908 * Similar to the PMD case below, NUMA hinting must take slow
743 * path 909 * path using the pte_protnone check.
744 */ 910 */
745 if (!pte_present(pte) || pte_special(pte) || 911 if (!pte_present(pte) || pte_special(pte) ||
746 pte_numa(pte) || (write && !pte_write(pte))) 912 pte_protnone(pte) || (write && !pte_write(pte)))
747 goto pte_unmap; 913 goto pte_unmap;
748 914
749 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 915 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
@@ -926,7 +1092,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
926 1092
927 pmdp = pmd_offset(&pud, addr); 1093 pmdp = pmd_offset(&pud, addr);
928 do { 1094 do {
929 pmd_t pmd = ACCESS_ONCE(*pmdp); 1095 pmd_t pmd = READ_ONCE(*pmdp);
930 1096
931 next = pmd_addr_end(addr, end); 1097 next = pmd_addr_end(addr, end);
932 if (pmd_none(pmd) || pmd_trans_splitting(pmd)) 1098 if (pmd_none(pmd) || pmd_trans_splitting(pmd))
@@ -938,7 +1104,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
938 * slowpath for accounting purposes and so that they 1104 * slowpath for accounting purposes and so that they
939 * can be serialised against THP migration. 1105 * can be serialised against THP migration.
940 */ 1106 */
941 if (pmd_numa(pmd)) 1107 if (pmd_protnone(pmd))
942 return 0; 1108 return 0;
943 1109
944 if (!gup_huge_pmd(pmd, pmdp, addr, next, write, 1110 if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
@@ -1077,10 +1243,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
1077 start += nr << PAGE_SHIFT; 1243 start += nr << PAGE_SHIFT;
1078 pages += nr; 1244 pages += nr;
1079 1245
1080 down_read(&mm->mmap_sem); 1246 ret = get_user_pages_unlocked(current, mm, start,
1081 ret = get_user_pages(current, mm, start, 1247 nr_pages - nr, write, 0, pages);
1082 nr_pages - nr, write, 0, pages, NULL);
1083 up_read(&mm->mmap_sem);
1084 1248
1085 /* Have to be a bit careful with return values */ 1249 /* Have to be a bit careful with return values */
1086 if (nr > 0) { 1250 if (nr > 0) {