aboutsummaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2019-08-14 03:59:19 -0400
committerJason Gunthorpe <jgg@mellanox.com>2019-08-20 08:35:02 -0400
commita7d1f22bb74f32cf3cd93f52776007e161f1a738 (patch)
treed3cbbf0483499da1f858f3b2facda271670e8d69 /mm/migrate.c
parentf4fb3b9c1971ec210b30845a9f62dc823c5242d0 (diff)
mm: turn migrate_vma upside down
There isn't any good reason to pass callbacks to migrate_vma. Instead we can just export the three steps done by this function to drivers and let them sequence the operation without callbacks. This removes a lot of boilerplate code as-is, and will allow the drivers to drastically improve code flow and error handling further on. Link: https://lore.kernel.org/r/20190814075928.23766-2-hch@lst.de Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Ralph Campbell <rcampbell@nvidia.com> Tested-by: Ralph Campbell <rcampbell@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c244
1 files changed, 110 insertions, 134 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 8992741f10aa..8111e031fa2b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2118,16 +2118,6 @@ out_unlock:
2118#endif /* CONFIG_NUMA */ 2118#endif /* CONFIG_NUMA */
2119 2119
2120#if defined(CONFIG_MIGRATE_VMA_HELPER) 2120#if defined(CONFIG_MIGRATE_VMA_HELPER)
2121struct migrate_vma {
2122 struct vm_area_struct *vma;
2123 unsigned long *dst;
2124 unsigned long *src;
2125 unsigned long cpages;
2126 unsigned long npages;
2127 unsigned long start;
2128 unsigned long end;
2129};
2130
2131static int migrate_vma_collect_hole(unsigned long start, 2121static int migrate_vma_collect_hole(unsigned long start,
2132 unsigned long end, 2122 unsigned long end,
2133 struct mm_walk *walk) 2123 struct mm_walk *walk)
@@ -2578,6 +2568,110 @@ restore:
2578 } 2568 }
2579} 2569}
2580 2570
2571/**
2572 * migrate_vma_setup() - prepare to migrate a range of memory
2573 * @args: contains the vma, start, and and pfns arrays for the migration
2574 *
2575 * Returns: negative errno on failures, 0 when 0 or more pages were migrated
2576 * without an error.
2577 *
2578 * Prepare to migrate a range of memory virtual address range by collecting all
2579 * the pages backing each virtual address in the range, saving them inside the
2580 * src array. Then lock those pages and unmap them. Once the pages are locked
2581 * and unmapped, check whether each page is pinned or not. Pages that aren't
2582 * pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) in the
2583 * corresponding src array entry. Then restores any pages that are pinned, by
2584 * remapping and unlocking those pages.
2585 *
2586 * The caller should then allocate destination memory and copy source memory to
2587 * it for all those entries (ie with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE
2588 * flag set). Once these are allocated and copied, the caller must update each
2589 * corresponding entry in the dst array with the pfn value of the destination
2590 * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_LOCKED flags set
2591 * (destination pages must have their struct pages locked, via lock_page()).
2592 *
2593 * Note that the caller does not have to migrate all the pages that are marked
2594 * with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration from
2595 * device memory to system memory. If the caller cannot migrate a device page
2596 * back to system memory, then it must return VM_FAULT_SIGBUS, which has severe
2597 * consequences for the userspace process, so it must be avoided if at all
2598 * possible.
2599 *
2600 * For empty entries inside CPU page table (pte_none() or pmd_none() is true) we
2601 * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus
2602 * allowing the caller to allocate device memory for those unback virtual
2603 * address. For this the caller simply has to allocate device memory and
2604 * properly set the destination entry like for regular migration. Note that
2605 * this can still fails and thus inside the device driver must check if the
2606 * migration was successful for those entries after calling migrate_vma_pages()
2607 * just like for regular migration.
2608 *
2609 * After that, the callers must call migrate_vma_pages() to go over each entry
2610 * in the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag
2611 * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set,
2612 * then migrate_vma_pages() to migrate struct page information from the source
2613 * struct page to the destination struct page. If it fails to migrate the
2614 * struct page information, then it clears the MIGRATE_PFN_MIGRATE flag in the
2615 * src array.
2616 *
2617 * At this point all successfully migrated pages have an entry in the src
2618 * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst
2619 * array entry with MIGRATE_PFN_VALID flag set.
2620 *
2621 * Once migrate_vma_pages() returns the caller may inspect which pages were
2622 * successfully migrated, and which were not. Successfully migrated pages will
2623 * have the MIGRATE_PFN_MIGRATE flag set for their src array entry.
2624 *
2625 * It is safe to update device page table after migrate_vma_pages() because
2626 * both destination and source page are still locked, and the mmap_sem is held
2627 * in read mode (hence no one can unmap the range being migrated).
2628 *
2629 * Once the caller is done cleaning up things and updating its page table (if it
2630 * chose to do so, this is not an obligation) it finally calls
2631 * migrate_vma_finalize() to update the CPU page table to point to new pages
2632 * for successfully migrated pages or otherwise restore the CPU page table to
2633 * point to the original source pages.
2634 */
2635int migrate_vma_setup(struct migrate_vma *args)
2636{
2637 long nr_pages = (args->end - args->start) >> PAGE_SHIFT;
2638
2639 args->start &= PAGE_MASK;
2640 args->end &= PAGE_MASK;
2641 if (!args->vma || is_vm_hugetlb_page(args->vma) ||
2642 (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma))
2643 return -EINVAL;
2644 if (nr_pages <= 0)
2645 return -EINVAL;
2646 if (args->start < args->vma->vm_start ||
2647 args->start >= args->vma->vm_end)
2648 return -EINVAL;
2649 if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end)
2650 return -EINVAL;
2651 if (!args->src || !args->dst)
2652 return -EINVAL;
2653
2654 memset(args->src, 0, sizeof(*args->src) * nr_pages);
2655 args->cpages = 0;
2656 args->npages = 0;
2657
2658 migrate_vma_collect(args);
2659
2660 if (args->cpages)
2661 migrate_vma_prepare(args);
2662 if (args->cpages)
2663 migrate_vma_unmap(args);
2664
2665 /*
2666 * At this point pages are locked and unmapped, and thus they have
2667 * stable content and can safely be copied to destination memory that
2668 * is allocated by the drivers.
2669 */
2670 return 0;
2671
2672}
2673EXPORT_SYMBOL(migrate_vma_setup);
2674
2581static void migrate_vma_insert_page(struct migrate_vma *migrate, 2675static void migrate_vma_insert_page(struct migrate_vma *migrate,
2582 unsigned long addr, 2676 unsigned long addr,
2583 struct page *page, 2677 struct page *page,
@@ -2709,7 +2803,7 @@ abort:
2709 *src &= ~MIGRATE_PFN_MIGRATE; 2803 *src &= ~MIGRATE_PFN_MIGRATE;
2710} 2804}
2711 2805
2712/* 2806/**
2713 * migrate_vma_pages() - migrate meta-data from src page to dst page 2807 * migrate_vma_pages() - migrate meta-data from src page to dst page
2714 * @migrate: migrate struct containing all migration information 2808 * @migrate: migrate struct containing all migration information
2715 * 2809 *
@@ -2717,7 +2811,7 @@ abort:
2717 * struct page. This effectively finishes the migration from source page to the 2811 * struct page. This effectively finishes the migration from source page to the
2718 * destination page. 2812 * destination page.
2719 */ 2813 */
2720static void migrate_vma_pages(struct migrate_vma *migrate) 2814void migrate_vma_pages(struct migrate_vma *migrate)
2721{ 2815{
2722 const unsigned long npages = migrate->npages; 2816 const unsigned long npages = migrate->npages;
2723 const unsigned long start = migrate->start; 2817 const unsigned long start = migrate->start;
@@ -2791,8 +2885,9 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
2791 if (notified) 2885 if (notified)
2792 mmu_notifier_invalidate_range_only_end(&range); 2886 mmu_notifier_invalidate_range_only_end(&range);
2793} 2887}
2888EXPORT_SYMBOL(migrate_vma_pages);
2794 2889
2795/* 2890/**
2796 * migrate_vma_finalize() - restore CPU page table entry 2891 * migrate_vma_finalize() - restore CPU page table entry
2797 * @migrate: migrate struct containing all migration information 2892 * @migrate: migrate struct containing all migration information
2798 * 2893 *
@@ -2803,7 +2898,7 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
2803 * This also unlocks the pages and puts them back on the lru, or drops the extra 2898 * This also unlocks the pages and puts them back on the lru, or drops the extra
2804 * refcount, for device pages. 2899 * refcount, for device pages.
2805 */ 2900 */
2806static void migrate_vma_finalize(struct migrate_vma *migrate) 2901void migrate_vma_finalize(struct migrate_vma *migrate)
2807{ 2902{
2808 const unsigned long npages = migrate->npages; 2903 const unsigned long npages = migrate->npages;
2809 unsigned long i; 2904 unsigned long i;
@@ -2846,124 +2941,5 @@ static void migrate_vma_finalize(struct migrate_vma *migrate)
2846 } 2941 }
2847 } 2942 }
2848} 2943}
2849 2944EXPORT_SYMBOL(migrate_vma_finalize);
2850/*
2851 * migrate_vma() - migrate a range of memory inside vma
2852 *
2853 * @ops: migration callback for allocating destination memory and copying
2854 * @vma: virtual memory area containing the range to be migrated
2855 * @start: start address of the range to migrate (inclusive)
2856 * @end: end address of the range to migrate (exclusive)
2857 * @src: array of hmm_pfn_t containing source pfns
2858 * @dst: array of hmm_pfn_t containing destination pfns
2859 * @private: pointer passed back to each of the callback
2860 * Returns: 0 on success, error code otherwise
2861 *
2862 * This function tries to migrate a range of memory virtual address range, using
2863 * callbacks to allocate and copy memory from source to destination. First it
2864 * collects all the pages backing each virtual address in the range, saving this
2865 * inside the src array. Then it locks those pages and unmaps them. Once the pages
2866 * are locked and unmapped, it checks whether each page is pinned or not. Pages
2867 * that aren't pinned have the MIGRATE_PFN_MIGRATE flag set (by this function)
2868 * in the corresponding src array entry. It then restores any pages that are
2869 * pinned, by remapping and unlocking those pages.
2870 *
2871 * At this point it calls the alloc_and_copy() callback. For documentation on
2872 * what is expected from that callback, see struct migrate_vma_ops comments in
2873 * include/linux/migrate.h
2874 *
2875 * After the alloc_and_copy() callback, this function goes over each entry in
2876 * the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag
2877 * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set,
2878 * then the function tries to migrate struct page information from the source
2879 * struct page to the destination struct page. If it fails to migrate the struct
2880 * page information, then it clears the MIGRATE_PFN_MIGRATE flag in the src
2881 * array.
2882 *
2883 * At this point all successfully migrated pages have an entry in the src
2884 * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst
2885 * array entry with MIGRATE_PFN_VALID flag set.
2886 *
2887 * It then calls the finalize_and_map() callback. See comments for "struct
2888 * migrate_vma_ops", in include/linux/migrate.h for details about
2889 * finalize_and_map() behavior.
2890 *
2891 * After the finalize_and_map() callback, for successfully migrated pages, this
2892 * function updates the CPU page table to point to new pages, otherwise it
2893 * restores the CPU page table to point to the original source pages.
2894 *
2895 * Function returns 0 after the above steps, even if no pages were migrated
2896 * (The function only returns an error if any of the arguments are invalid.)
2897 *
2898 * Both src and dst array must be big enough for (end - start) >> PAGE_SHIFT
2899 * unsigned long entries.
2900 */
2901int migrate_vma(const struct migrate_vma_ops *ops,
2902 struct vm_area_struct *vma,
2903 unsigned long start,
2904 unsigned long end,
2905 unsigned long *src,
2906 unsigned long *dst,
2907 void *private)
2908{
2909 struct migrate_vma migrate;
2910
2911 /* Sanity check the arguments */
2912 start &= PAGE_MASK;
2913 end &= PAGE_MASK;
2914 if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) ||
2915 vma_is_dax(vma))
2916 return -EINVAL;
2917 if (start < vma->vm_start || start >= vma->vm_end)
2918 return -EINVAL;
2919 if (end <= vma->vm_start || end > vma->vm_end)
2920 return -EINVAL;
2921 if (!ops || !src || !dst || start >= end)
2922 return -EINVAL;
2923
2924 memset(src, 0, sizeof(*src) * ((end - start) >> PAGE_SHIFT));
2925 migrate.src = src;
2926 migrate.dst = dst;
2927 migrate.start = start;
2928 migrate.npages = 0;
2929 migrate.cpages = 0;
2930 migrate.end = end;
2931 migrate.vma = vma;
2932
2933 /* Collect, and try to unmap source pages */
2934 migrate_vma_collect(&migrate);
2935 if (!migrate.cpages)
2936 return 0;
2937
2938 /* Lock and isolate page */
2939 migrate_vma_prepare(&migrate);
2940 if (!migrate.cpages)
2941 return 0;
2942
2943 /* Unmap pages */
2944 migrate_vma_unmap(&migrate);
2945 if (!migrate.cpages)
2946 return 0;
2947
2948 /*
2949 * At this point pages are locked and unmapped, and thus they have
2950 * stable content and can safely be copied to destination memory that
2951 * is allocated by the callback.
2952 *
2953 * Note that migration can fail in migrate_vma_struct_page() for each
2954 * individual page.
2955 */
2956 ops->alloc_and_copy(vma, src, dst, start, end, private);
2957
2958 /* This does the real migration of struct page */
2959 migrate_vma_pages(&migrate);
2960
2961 ops->finalize_and_map(vma, src, dst, start, end, private);
2962
2963 /* Unlock and remap pages */
2964 migrate_vma_finalize(&migrate);
2965
2966 return 0;
2967}
2968EXPORT_SYMBOL(migrate_vma);
2969#endif /* defined(MIGRATE_VMA_HELPER) */ 2945#endif /* defined(MIGRATE_VMA_HELPER) */