diff options
author | Christoph Hellwig <hch@lst.de> | 2019-08-14 03:59:19 -0400 |
---|---|---|
committer | Jason Gunthorpe <jgg@mellanox.com> | 2019-08-20 08:35:02 -0400 |
commit | a7d1f22bb74f32cf3cd93f52776007e161f1a738 (patch) | |
tree | d3cbbf0483499da1f858f3b2facda271670e8d69 /mm/migrate.c | |
parent | f4fb3b9c1971ec210b30845a9f62dc823c5242d0 (diff) |
mm: turn migrate_vma upside down
There isn't any good reason to pass callbacks to migrate_vma. Instead
we can just export the three steps done by this function to drivers and
let them sequence the operation without callbacks. This removes a lot
of boilerplate code as-is, and will allow the drivers to drastically
improve code flow and error handling further on.
Link: https://lore.kernel.org/r/20190814075928.23766-2-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Tested-by: Ralph Campbell <rcampbell@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 244 |
1 files changed, 110 insertions, 134 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 8992741f10aa..8111e031fa2b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -2118,16 +2118,6 @@ out_unlock: | |||
2118 | #endif /* CONFIG_NUMA */ | 2118 | #endif /* CONFIG_NUMA */ |
2119 | 2119 | ||
2120 | #if defined(CONFIG_MIGRATE_VMA_HELPER) | 2120 | #if defined(CONFIG_MIGRATE_VMA_HELPER) |
2121 | struct migrate_vma { | ||
2122 | struct vm_area_struct *vma; | ||
2123 | unsigned long *dst; | ||
2124 | unsigned long *src; | ||
2125 | unsigned long cpages; | ||
2126 | unsigned long npages; | ||
2127 | unsigned long start; | ||
2128 | unsigned long end; | ||
2129 | }; | ||
2130 | |||
2131 | static int migrate_vma_collect_hole(unsigned long start, | 2121 | static int migrate_vma_collect_hole(unsigned long start, |
2132 | unsigned long end, | 2122 | unsigned long end, |
2133 | struct mm_walk *walk) | 2123 | struct mm_walk *walk) |
@@ -2578,6 +2568,110 @@ restore: | |||
2578 | } | 2568 | } |
2579 | } | 2569 | } |
2580 | 2570 | ||
2571 | /** | ||
2572 | * migrate_vma_setup() - prepare to migrate a range of memory | ||
2573 | * @args: contains the vma, start, and and pfns arrays for the migration | ||
2574 | * | ||
2575 | * Returns: negative errno on failures, 0 when 0 or more pages were migrated | ||
2576 | * without an error. | ||
2577 | * | ||
2578 | * Prepare to migrate a range of memory virtual address range by collecting all | ||
2579 | * the pages backing each virtual address in the range, saving them inside the | ||
2580 | * src array. Then lock those pages and unmap them. Once the pages are locked | ||
2581 | * and unmapped, check whether each page is pinned or not. Pages that aren't | ||
2582 | * pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) in the | ||
2583 | * corresponding src array entry. Then restores any pages that are pinned, by | ||
2584 | * remapping and unlocking those pages. | ||
2585 | * | ||
2586 | * The caller should then allocate destination memory and copy source memory to | ||
2587 | * it for all those entries (ie with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE | ||
2588 | * flag set). Once these are allocated and copied, the caller must update each | ||
2589 | * corresponding entry in the dst array with the pfn value of the destination | ||
2590 | * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_LOCKED flags set | ||
2591 | * (destination pages must have their struct pages locked, via lock_page()). | ||
2592 | * | ||
2593 | * Note that the caller does not have to migrate all the pages that are marked | ||
2594 | * with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration from | ||
2595 | * device memory to system memory. If the caller cannot migrate a device page | ||
2596 | * back to system memory, then it must return VM_FAULT_SIGBUS, which has severe | ||
2597 | * consequences for the userspace process, so it must be avoided if at all | ||
2598 | * possible. | ||
2599 | * | ||
2600 | * For empty entries inside CPU page table (pte_none() or pmd_none() is true) we | ||
2601 | * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus | ||
2602 | * allowing the caller to allocate device memory for those unback virtual | ||
2603 | * address. For this the caller simply has to allocate device memory and | ||
2604 | * properly set the destination entry like for regular migration. Note that | ||
2605 | * this can still fails and thus inside the device driver must check if the | ||
2606 | * migration was successful for those entries after calling migrate_vma_pages() | ||
2607 | * just like for regular migration. | ||
2608 | * | ||
2609 | * After that, the callers must call migrate_vma_pages() to go over each entry | ||
2610 | * in the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag | ||
2611 | * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set, | ||
2612 | * then migrate_vma_pages() to migrate struct page information from the source | ||
2613 | * struct page to the destination struct page. If it fails to migrate the | ||
2614 | * struct page information, then it clears the MIGRATE_PFN_MIGRATE flag in the | ||
2615 | * src array. | ||
2616 | * | ||
2617 | * At this point all successfully migrated pages have an entry in the src | ||
2618 | * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst | ||
2619 | * array entry with MIGRATE_PFN_VALID flag set. | ||
2620 | * | ||
2621 | * Once migrate_vma_pages() returns the caller may inspect which pages were | ||
2622 | * successfully migrated, and which were not. Successfully migrated pages will | ||
2623 | * have the MIGRATE_PFN_MIGRATE flag set for their src array entry. | ||
2624 | * | ||
2625 | * It is safe to update device page table after migrate_vma_pages() because | ||
2626 | * both destination and source page are still locked, and the mmap_sem is held | ||
2627 | * in read mode (hence no one can unmap the range being migrated). | ||
2628 | * | ||
2629 | * Once the caller is done cleaning up things and updating its page table (if it | ||
2630 | * chose to do so, this is not an obligation) it finally calls | ||
2631 | * migrate_vma_finalize() to update the CPU page table to point to new pages | ||
2632 | * for successfully migrated pages or otherwise restore the CPU page table to | ||
2633 | * point to the original source pages. | ||
2634 | */ | ||
2635 | int migrate_vma_setup(struct migrate_vma *args) | ||
2636 | { | ||
2637 | long nr_pages = (args->end - args->start) >> PAGE_SHIFT; | ||
2638 | |||
2639 | args->start &= PAGE_MASK; | ||
2640 | args->end &= PAGE_MASK; | ||
2641 | if (!args->vma || is_vm_hugetlb_page(args->vma) || | ||
2642 | (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma)) | ||
2643 | return -EINVAL; | ||
2644 | if (nr_pages <= 0) | ||
2645 | return -EINVAL; | ||
2646 | if (args->start < args->vma->vm_start || | ||
2647 | args->start >= args->vma->vm_end) | ||
2648 | return -EINVAL; | ||
2649 | if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end) | ||
2650 | return -EINVAL; | ||
2651 | if (!args->src || !args->dst) | ||
2652 | return -EINVAL; | ||
2653 | |||
2654 | memset(args->src, 0, sizeof(*args->src) * nr_pages); | ||
2655 | args->cpages = 0; | ||
2656 | args->npages = 0; | ||
2657 | |||
2658 | migrate_vma_collect(args); | ||
2659 | |||
2660 | if (args->cpages) | ||
2661 | migrate_vma_prepare(args); | ||
2662 | if (args->cpages) | ||
2663 | migrate_vma_unmap(args); | ||
2664 | |||
2665 | /* | ||
2666 | * At this point pages are locked and unmapped, and thus they have | ||
2667 | * stable content and can safely be copied to destination memory that | ||
2668 | * is allocated by the drivers. | ||
2669 | */ | ||
2670 | return 0; | ||
2671 | |||
2672 | } | ||
2673 | EXPORT_SYMBOL(migrate_vma_setup); | ||
2674 | |||
2581 | static void migrate_vma_insert_page(struct migrate_vma *migrate, | 2675 | static void migrate_vma_insert_page(struct migrate_vma *migrate, |
2582 | unsigned long addr, | 2676 | unsigned long addr, |
2583 | struct page *page, | 2677 | struct page *page, |
@@ -2709,7 +2803,7 @@ abort: | |||
2709 | *src &= ~MIGRATE_PFN_MIGRATE; | 2803 | *src &= ~MIGRATE_PFN_MIGRATE; |
2710 | } | 2804 | } |
2711 | 2805 | ||
2712 | /* | 2806 | /** |
2713 | * migrate_vma_pages() - migrate meta-data from src page to dst page | 2807 | * migrate_vma_pages() - migrate meta-data from src page to dst page |
2714 | * @migrate: migrate struct containing all migration information | 2808 | * @migrate: migrate struct containing all migration information |
2715 | * | 2809 | * |
@@ -2717,7 +2811,7 @@ abort: | |||
2717 | * struct page. This effectively finishes the migration from source page to the | 2811 | * struct page. This effectively finishes the migration from source page to the |
2718 | * destination page. | 2812 | * destination page. |
2719 | */ | 2813 | */ |
2720 | static void migrate_vma_pages(struct migrate_vma *migrate) | 2814 | void migrate_vma_pages(struct migrate_vma *migrate) |
2721 | { | 2815 | { |
2722 | const unsigned long npages = migrate->npages; | 2816 | const unsigned long npages = migrate->npages; |
2723 | const unsigned long start = migrate->start; | 2817 | const unsigned long start = migrate->start; |
@@ -2791,8 +2885,9 @@ static void migrate_vma_pages(struct migrate_vma *migrate) | |||
2791 | if (notified) | 2885 | if (notified) |
2792 | mmu_notifier_invalidate_range_only_end(&range); | 2886 | mmu_notifier_invalidate_range_only_end(&range); |
2793 | } | 2887 | } |
2888 | EXPORT_SYMBOL(migrate_vma_pages); | ||
2794 | 2889 | ||
2795 | /* | 2890 | /** |
2796 | * migrate_vma_finalize() - restore CPU page table entry | 2891 | * migrate_vma_finalize() - restore CPU page table entry |
2797 | * @migrate: migrate struct containing all migration information | 2892 | * @migrate: migrate struct containing all migration information |
2798 | * | 2893 | * |
@@ -2803,7 +2898,7 @@ static void migrate_vma_pages(struct migrate_vma *migrate) | |||
2803 | * This also unlocks the pages and puts them back on the lru, or drops the extra | 2898 | * This also unlocks the pages and puts them back on the lru, or drops the extra |
2804 | * refcount, for device pages. | 2899 | * refcount, for device pages. |
2805 | */ | 2900 | */ |
2806 | static void migrate_vma_finalize(struct migrate_vma *migrate) | 2901 | void migrate_vma_finalize(struct migrate_vma *migrate) |
2807 | { | 2902 | { |
2808 | const unsigned long npages = migrate->npages; | 2903 | const unsigned long npages = migrate->npages; |
2809 | unsigned long i; | 2904 | unsigned long i; |
@@ -2846,124 +2941,5 @@ static void migrate_vma_finalize(struct migrate_vma *migrate) | |||
2846 | } | 2941 | } |
2847 | } | 2942 | } |
2848 | } | 2943 | } |
2849 | 2944 | EXPORT_SYMBOL(migrate_vma_finalize); | |
2850 | /* | ||
2851 | * migrate_vma() - migrate a range of memory inside vma | ||
2852 | * | ||
2853 | * @ops: migration callback for allocating destination memory and copying | ||
2854 | * @vma: virtual memory area containing the range to be migrated | ||
2855 | * @start: start address of the range to migrate (inclusive) | ||
2856 | * @end: end address of the range to migrate (exclusive) | ||
2857 | * @src: array of hmm_pfn_t containing source pfns | ||
2858 | * @dst: array of hmm_pfn_t containing destination pfns | ||
2859 | * @private: pointer passed back to each of the callback | ||
2860 | * Returns: 0 on success, error code otherwise | ||
2861 | * | ||
2862 | * This function tries to migrate a range of memory virtual address range, using | ||
2863 | * callbacks to allocate and copy memory from source to destination. First it | ||
2864 | * collects all the pages backing each virtual address in the range, saving this | ||
2865 | * inside the src array. Then it locks those pages and unmaps them. Once the pages | ||
2866 | * are locked and unmapped, it checks whether each page is pinned or not. Pages | ||
2867 | * that aren't pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) | ||
2868 | * in the corresponding src array entry. It then restores any pages that are | ||
2869 | * pinned, by remapping and unlocking those pages. | ||
2870 | * | ||
2871 | * At this point it calls the alloc_and_copy() callback. For documentation on | ||
2872 | * what is expected from that callback, see struct migrate_vma_ops comments in | ||
2873 | * include/linux/migrate.h | ||
2874 | * | ||
2875 | * After the alloc_and_copy() callback, this function goes over each entry in | ||
2876 | * the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag | ||
2877 | * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set, | ||
2878 | * then the function tries to migrate struct page information from the source | ||
2879 | * struct page to the destination struct page. If it fails to migrate the struct | ||
2880 | * page information, then it clears the MIGRATE_PFN_MIGRATE flag in the src | ||
2881 | * array. | ||
2882 | * | ||
2883 | * At this point all successfully migrated pages have an entry in the src | ||
2884 | * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst | ||
2885 | * array entry with MIGRATE_PFN_VALID flag set. | ||
2886 | * | ||
2887 | * It then calls the finalize_and_map() callback. See comments for "struct | ||
2888 | * migrate_vma_ops", in include/linux/migrate.h for details about | ||
2889 | * finalize_and_map() behavior. | ||
2890 | * | ||
2891 | * After the finalize_and_map() callback, for successfully migrated pages, this | ||
2892 | * function updates the CPU page table to point to new pages, otherwise it | ||
2893 | * restores the CPU page table to point to the original source pages. | ||
2894 | * | ||
2895 | * Function returns 0 after the above steps, even if no pages were migrated | ||
2896 | * (The function only returns an error if any of the arguments are invalid.) | ||
2897 | * | ||
2898 | * Both src and dst array must be big enough for (end - start) >> PAGE_SHIFT | ||
2899 | * unsigned long entries. | ||
2900 | */ | ||
2901 | int migrate_vma(const struct migrate_vma_ops *ops, | ||
2902 | struct vm_area_struct *vma, | ||
2903 | unsigned long start, | ||
2904 | unsigned long end, | ||
2905 | unsigned long *src, | ||
2906 | unsigned long *dst, | ||
2907 | void *private) | ||
2908 | { | ||
2909 | struct migrate_vma migrate; | ||
2910 | |||
2911 | /* Sanity check the arguments */ | ||
2912 | start &= PAGE_MASK; | ||
2913 | end &= PAGE_MASK; | ||
2914 | if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) || | ||
2915 | vma_is_dax(vma)) | ||
2916 | return -EINVAL; | ||
2917 | if (start < vma->vm_start || start >= vma->vm_end) | ||
2918 | return -EINVAL; | ||
2919 | if (end <= vma->vm_start || end > vma->vm_end) | ||
2920 | return -EINVAL; | ||
2921 | if (!ops || !src || !dst || start >= end) | ||
2922 | return -EINVAL; | ||
2923 | |||
2924 | memset(src, 0, sizeof(*src) * ((end - start) >> PAGE_SHIFT)); | ||
2925 | migrate.src = src; | ||
2926 | migrate.dst = dst; | ||
2927 | migrate.start = start; | ||
2928 | migrate.npages = 0; | ||
2929 | migrate.cpages = 0; | ||
2930 | migrate.end = end; | ||
2931 | migrate.vma = vma; | ||
2932 | |||
2933 | /* Collect, and try to unmap source pages */ | ||
2934 | migrate_vma_collect(&migrate); | ||
2935 | if (!migrate.cpages) | ||
2936 | return 0; | ||
2937 | |||
2938 | /* Lock and isolate page */ | ||
2939 | migrate_vma_prepare(&migrate); | ||
2940 | if (!migrate.cpages) | ||
2941 | return 0; | ||
2942 | |||
2943 | /* Unmap pages */ | ||
2944 | migrate_vma_unmap(&migrate); | ||
2945 | if (!migrate.cpages) | ||
2946 | return 0; | ||
2947 | |||
2948 | /* | ||
2949 | * At this point pages are locked and unmapped, and thus they have | ||
2950 | * stable content and can safely be copied to destination memory that | ||
2951 | * is allocated by the callback. | ||
2952 | * | ||
2953 | * Note that migration can fail in migrate_vma_struct_page() for each | ||
2954 | * individual page. | ||
2955 | */ | ||
2956 | ops->alloc_and_copy(vma, src, dst, start, end, private); | ||
2957 | |||
2958 | /* This does the real migration of struct page */ | ||
2959 | migrate_vma_pages(&migrate); | ||
2960 | |||
2961 | ops->finalize_and_map(vma, src, dst, start, end, private); | ||
2962 | |||
2963 | /* Unlock and remap pages */ | ||
2964 | migrate_vma_finalize(&migrate); | ||
2965 | |||
2966 | return 0; | ||
2967 | } | ||
2968 | EXPORT_SYMBOL(migrate_vma); | ||
2969 | #endif /* defined(MIGRATE_VMA_HELPER) */ | 2945 | #endif /* defined(MIGRATE_VMA_HELPER) */ |