summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2019-08-14 03:59:19 -0400
committerJason Gunthorpe <jgg@mellanox.com>2019-08-20 08:35:02 -0400
commita7d1f22bb74f32cf3cd93f52776007e161f1a738 (patch)
treed3cbbf0483499da1f858f3b2facda271670e8d69
parentf4fb3b9c1971ec210b30845a9f62dc823c5242d0 (diff)
mm: turn migrate_vma upside down
There isn't any good reason to pass callbacks to migrate_vma. Instead we can just export the three steps done by this function to drivers and let them sequence the operation without callbacks. This removes a lot of boilerplate code as-is, and will allow the drivers to drastically improve code flow and error handling further on. Link: https://lore.kernel.org/r/20190814075928.23766-2-hch@lst.de Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Ralph Campbell <rcampbell@nvidia.com> Tested-by: Ralph Campbell <rcampbell@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r--Documentation/vm/hmm.rst54
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dmem.c122
-rw-r--r--include/linux/migrate.h118
-rw-r--r--mm/migrate.c244
4 files changed, 194 insertions, 344 deletions
diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index e63c11f7e0e0..0a5960beccf7 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -339,58 +339,8 @@ Migration to and from device memory
339=================================== 339===================================
340 340
341Because the CPU cannot access device memory, migration must use the device DMA 341Because the CPU cannot access device memory, migration must use the device DMA
342engine to perform copy from and to device memory. For this we need a new 342engine to perform copy from and to device memory. For this we need to use
343migration helper:: 343migrate_vma_setup(), migrate_vma_pages(), and migrate_vma_finalize() helpers.
344
345 int migrate_vma(const struct migrate_vma_ops *ops,
346 struct vm_area_struct *vma,
347 unsigned long mentries,
348 unsigned long start,
349 unsigned long end,
350 unsigned long *src,
351 unsigned long *dst,
352 void *private);
353
354Unlike other migration functions it works on a range of virtual address, there
355are two reasons for that. First, device DMA copy has a high setup overhead cost
356and thus batching multiple pages is needed as otherwise the migration overhead
357makes the whole exercise pointless. The second reason is because the
358migration might be for a range of addresses the device is actively accessing.
359
360The migrate_vma_ops struct defines two callbacks. First one (alloc_and_copy())
361controls destination memory allocation and copy operation. Second one is there
362to allow the device driver to perform cleanup operations after migration::
363
364 struct migrate_vma_ops {
365 void (*alloc_and_copy)(struct vm_area_struct *vma,
366 const unsigned long *src,
367 unsigned long *dst,
368 unsigned long start,
369 unsigned long end,
370 void *private);
371 void (*finalize_and_map)(struct vm_area_struct *vma,
372 const unsigned long *src,
373 const unsigned long *dst,
374 unsigned long start,
375 unsigned long end,
376 void *private);
377 };
378
379It is important to stress that these migration helpers allow for holes in the
380virtual address range. Some pages in the range might not be migrated for all
381the usual reasons (page is pinned, page is locked, ...). This helper does not
382fail but just skips over those pages.
383
384The alloc_and_copy() might decide to not migrate all pages in the
385range (for reasons under the callback control). For those, the callback just
386has to leave the corresponding dst entry empty.
387
388Finally, the migration of the struct page might fail (for file backed page) for
389various reasons (failure to freeze reference, or update page cache, ...). If
390that happens, then the finalize_and_map() can catch any pages that were not
391migrated. Note those pages were still copied to a new page and thus we wasted
392bandwidth but this is considered as a rare event and a price that we are
393willing to pay to keep all the code simpler.
394 344
395 345
396Memory cgroup (memcg) and rss accounting 346Memory cgroup (memcg) and rss accounting
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 345c63cb752a..38416798abd4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -131,9 +131,8 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma,
131 unsigned long *dst_pfns, 131 unsigned long *dst_pfns,
132 unsigned long start, 132 unsigned long start,
133 unsigned long end, 133 unsigned long end,
134 void *private) 134 struct nouveau_dmem_fault *fault)
135{ 135{
136 struct nouveau_dmem_fault *fault = private;
137 struct nouveau_drm *drm = fault->drm; 136 struct nouveau_drm *drm = fault->drm;
138 struct device *dev = drm->dev->dev; 137 struct device *dev = drm->dev->dev;
139 unsigned long addr, i, npages = 0; 138 unsigned long addr, i, npages = 0;
@@ -230,14 +229,9 @@ error:
230 } 229 }
231} 230}
232 231
233void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma, 232static void
234 const unsigned long *src_pfns, 233nouveau_dmem_fault_finalize_and_map(struct nouveau_dmem_fault *fault)
235 const unsigned long *dst_pfns,
236 unsigned long start,
237 unsigned long end,
238 void *private)
239{ 234{
240 struct nouveau_dmem_fault *fault = private;
241 struct nouveau_drm *drm = fault->drm; 235 struct nouveau_drm *drm = fault->drm;
242 236
243 if (fault->fence) { 237 if (fault->fence) {
@@ -257,29 +251,35 @@ void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma,
257 kfree(fault->dma); 251 kfree(fault->dma);
258} 252}
259 253
260static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = {
261 .alloc_and_copy = nouveau_dmem_fault_alloc_and_copy,
262 .finalize_and_map = nouveau_dmem_fault_finalize_and_map,
263};
264
265static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) 254static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
266{ 255{
267 struct nouveau_dmem *dmem = page_to_dmem(vmf->page); 256 struct nouveau_dmem *dmem = page_to_dmem(vmf->page);
268 unsigned long src[1] = {0}, dst[1] = {0}; 257 unsigned long src[1] = {0}, dst[1] = {0};
258 struct migrate_vma args = {
259 .vma = vmf->vma,
260 .start = vmf->address,
261 .end = vmf->address + PAGE_SIZE,
262 .src = src,
263 .dst = dst,
264 };
269 struct nouveau_dmem_fault fault = { .drm = dmem->drm }; 265 struct nouveau_dmem_fault fault = { .drm = dmem->drm };
270 int ret;
271 266
272 /* 267 /*
273 * FIXME what we really want is to find some heuristic to migrate more 268 * FIXME what we really want is to find some heuristic to migrate more
274 * than just one page on CPU fault. When such fault happens it is very 269 * than just one page on CPU fault. When such fault happens it is very
275 * likely that more surrounding page will CPU fault too. 270 * likely that more surrounding page will CPU fault too.
276 */ 271 */
277 ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vmf->vma, 272 if (migrate_vma_setup(&args) < 0)
278 vmf->address, vmf->address + PAGE_SIZE,
279 src, dst, &fault);
280 if (ret)
281 return VM_FAULT_SIGBUS; 273 return VM_FAULT_SIGBUS;
274 if (!args.cpages)
275 return 0;
276
277 nouveau_dmem_fault_alloc_and_copy(args.vma, src, dst, args.start,
278 args.end, &fault);
279 migrate_vma_pages(&args);
280 nouveau_dmem_fault_finalize_and_map(&fault);
282 281
282 migrate_vma_finalize(&args);
283 if (dst[0] == MIGRATE_PFN_ERROR) 283 if (dst[0] == MIGRATE_PFN_ERROR)
284 return VM_FAULT_SIGBUS; 284 return VM_FAULT_SIGBUS;
285 285
@@ -648,9 +648,8 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma,
648 unsigned long *dst_pfns, 648 unsigned long *dst_pfns,
649 unsigned long start, 649 unsigned long start,
650 unsigned long end, 650 unsigned long end,
651 void *private) 651 struct nouveau_migrate *migrate)
652{ 652{
653 struct nouveau_migrate *migrate = private;
654 struct nouveau_drm *drm = migrate->drm; 653 struct nouveau_drm *drm = migrate->drm;
655 struct device *dev = drm->dev->dev; 654 struct device *dev = drm->dev->dev;
656 unsigned long addr, i, npages = 0; 655 unsigned long addr, i, npages = 0;
@@ -747,14 +746,9 @@ error:
747 } 746 }
748} 747}
749 748
750void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma, 749static void
751 const unsigned long *src_pfns, 750nouveau_dmem_migrate_finalize_and_map(struct nouveau_migrate *migrate)
752 const unsigned long *dst_pfns,
753 unsigned long start,
754 unsigned long end,
755 void *private)
756{ 751{
757 struct nouveau_migrate *migrate = private;
758 struct nouveau_drm *drm = migrate->drm; 752 struct nouveau_drm *drm = migrate->drm;
759 753
760 if (migrate->fence) { 754 if (migrate->fence) {
@@ -779,10 +773,15 @@ void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma,
779 */ 773 */
780} 774}
781 775
782static const struct migrate_vma_ops nouveau_dmem_migrate_ops = { 776static void nouveau_dmem_migrate_chunk(struct migrate_vma *args,
783 .alloc_and_copy = nouveau_dmem_migrate_alloc_and_copy, 777 struct nouveau_migrate *migrate)
784 .finalize_and_map = nouveau_dmem_migrate_finalize_and_map, 778{
785}; 779 nouveau_dmem_migrate_alloc_and_copy(args->vma, args->src, args->dst,
780 args->start, args->end, migrate);
781 migrate_vma_pages(args);
782 nouveau_dmem_migrate_finalize_and_map(migrate);
783 migrate_vma_finalize(args);
784}
786 785
787int 786int
788nouveau_dmem_migrate_vma(struct nouveau_drm *drm, 787nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
@@ -790,40 +789,45 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
790 unsigned long start, 789 unsigned long start,
791 unsigned long end) 790 unsigned long end)
792{ 791{
793 unsigned long *src_pfns, *dst_pfns, npages; 792 unsigned long npages = (end - start) >> PAGE_SHIFT;
794 struct nouveau_migrate migrate = {0}; 793 unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages);
795 unsigned long i, c, max; 794 struct migrate_vma args = {
796 int ret = 0; 795 .vma = vma,
797 796 .start = start,
798 npages = (end - start) >> PAGE_SHIFT; 797 };
799 max = min(SG_MAX_SINGLE_ALLOC, npages); 798 struct nouveau_migrate migrate = {
800 src_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); 799 .drm = drm,
801 if (src_pfns == NULL) 800 .vma = vma,
802 return -ENOMEM; 801 .npages = npages,
803 dst_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); 802 };
804 if (dst_pfns == NULL) { 803 unsigned long c, i;
805 kfree(src_pfns); 804 int ret = -ENOMEM;
806 return -ENOMEM; 805
807 } 806 args.src = kzalloc(sizeof(long) * max, GFP_KERNEL);
807 if (!args.src)
808 goto out;
809 args.dst = kzalloc(sizeof(long) * max, GFP_KERNEL);
810 if (!args.dst)
811 goto out_free_src;
808 812
809 migrate.drm = drm;
810 migrate.vma = vma;
811 migrate.npages = npages;
812 for (i = 0; i < npages; i += c) { 813 for (i = 0; i < npages; i += c) {
813 unsigned long next;
814
815 c = min(SG_MAX_SINGLE_ALLOC, npages); 814 c = min(SG_MAX_SINGLE_ALLOC, npages);
816 next = start + (c << PAGE_SHIFT); 815 args.end = start + (c << PAGE_SHIFT);
817 ret = migrate_vma(&nouveau_dmem_migrate_ops, vma, start, 816 ret = migrate_vma_setup(&args);
818 next, src_pfns, dst_pfns, &migrate);
819 if (ret) 817 if (ret)
820 goto out; 818 goto out_free_dst;
821 start = next; 819
820 if (args.cpages)
821 nouveau_dmem_migrate_chunk(&args, &migrate);
822 args.start = args.end;
822 } 823 }
823 824
825 ret = 0;
826out_free_dst:
827 kfree(args.dst);
828out_free_src:
829 kfree(args.src);
824out: 830out:
825 kfree(dst_pfns);
826 kfree(src_pfns);
827 return ret; 831 return ret;
828} 832}
829 833
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 7f04754c7f2b..18156d379ebf 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -182,107 +182,27 @@ static inline unsigned long migrate_pfn(unsigned long pfn)
182 return (pfn << MIGRATE_PFN_SHIFT) | MIGRATE_PFN_VALID; 182 return (pfn << MIGRATE_PFN_SHIFT) | MIGRATE_PFN_VALID;
183} 183}
184 184
185/* 185struct migrate_vma {
186 * struct migrate_vma_ops - migrate operation callback 186 struct vm_area_struct *vma;
187 * 187 /*
188 * @alloc_and_copy: alloc destination memory and copy source memory to it 188 * Both src and dst array must be big enough for
189 * @finalize_and_map: allow caller to map the successfully migrated pages 189 * (end - start) >> PAGE_SHIFT entries.
190 * 190 *
191 * 191 * The src array must not be modified by the caller after
192 * The alloc_and_copy() callback happens once all source pages have been locked, 192 * migrate_vma_setup(), and must not change the dst array after
193 * unmapped and checked (checked whether pinned or not). All pages that can be 193 * migrate_vma_pages() returns.
194 * migrated will have an entry in the src array set with the pfn value of the 194 */
195 * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set (other 195 unsigned long *dst;
196 * flags might be set but should be ignored by the callback). 196 unsigned long *src;
197 * 197 unsigned long cpages;
198 * The alloc_and_copy() callback can then allocate destination memory and copy 198 unsigned long npages;
199 * source memory to it for all those entries (ie with MIGRATE_PFN_VALID and 199 unsigned long start;
200 * MIGRATE_PFN_MIGRATE flag set). Once these are allocated and copied, the 200 unsigned long end;
201 * callback must update each corresponding entry in the dst array with the pfn
202 * value of the destination page and with the MIGRATE_PFN_VALID and
203 * MIGRATE_PFN_LOCKED flags set (destination pages must have their struct pages
204 * locked, via lock_page()).
205 *
206 * At this point the alloc_and_copy() callback is done and returns.
207 *
208 * Note that the callback does not have to migrate all the pages that are
209 * marked with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration
210 * from device memory to system memory (ie the MIGRATE_PFN_DEVICE flag is also
211 * set in the src array entry). If the device driver cannot migrate a device
212 * page back to system memory, then it must set the corresponding dst array
213 * entry to MIGRATE_PFN_ERROR. This will trigger a SIGBUS if CPU tries to
214 * access any of the virtual addresses originally backed by this page. Because
215 * a SIGBUS is such a severe result for the userspace process, the device
216 * driver should avoid setting MIGRATE_PFN_ERROR unless it is really in an
217 * unrecoverable state.
218 *
219 * For empty entry inside CPU page table (pte_none() or pmd_none() is true) we
220 * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus
221 * allowing device driver to allocate device memory for those unback virtual
222 * address. For this the device driver simply have to allocate device memory
223 * and properly set the destination entry like for regular migration. Note that
224 * this can still fails and thus inside the device driver must check if the
225 * migration was successful for those entry inside the finalize_and_map()
226 * callback just like for regular migration.
227 *
228 * THE alloc_and_copy() CALLBACK MUST NOT CHANGE ANY OF THE SRC ARRAY ENTRIES
229 * OR BAD THINGS WILL HAPPEN !
230 *
231 *
232 * The finalize_and_map() callback happens after struct page migration from
233 * source to destination (destination struct pages are the struct pages for the
234 * memory allocated by the alloc_and_copy() callback). Migration can fail, and
235 * thus the finalize_and_map() allows the driver to inspect which pages were
236 * successfully migrated, and which were not. Successfully migrated pages will
237 * have the MIGRATE_PFN_MIGRATE flag set for their src array entry.
238 *
239 * It is safe to update device page table from within the finalize_and_map()
240 * callback because both destination and source page are still locked, and the
241 * mmap_sem is held in read mode (hence no one can unmap the range being
242 * migrated).
243 *
244 * Once callback is done cleaning up things and updating its page table (if it
245 * chose to do so, this is not an obligation) then it returns. At this point,
246 * the HMM core will finish up the final steps, and the migration is complete.
247 *
248 * THE finalize_and_map() CALLBACK MUST NOT CHANGE ANY OF THE SRC OR DST ARRAY
249 * ENTRIES OR BAD THINGS WILL HAPPEN !
250 */
251struct migrate_vma_ops {
252 void (*alloc_and_copy)(struct vm_area_struct *vma,
253 const unsigned long *src,
254 unsigned long *dst,
255 unsigned long start,
256 unsigned long end,
257 void *private);
258 void (*finalize_and_map)(struct vm_area_struct *vma,
259 const unsigned long *src,
260 const unsigned long *dst,
261 unsigned long start,
262 unsigned long end,
263 void *private);
264}; 201};
265 202
266#if defined(CONFIG_MIGRATE_VMA_HELPER) 203int migrate_vma_setup(struct migrate_vma *args);
267int migrate_vma(const struct migrate_vma_ops *ops, 204void migrate_vma_pages(struct migrate_vma *migrate);
268 struct vm_area_struct *vma, 205void migrate_vma_finalize(struct migrate_vma *migrate);
269 unsigned long start,
270 unsigned long end,
271 unsigned long *src,
272 unsigned long *dst,
273 void *private);
274#else
275static inline int migrate_vma(const struct migrate_vma_ops *ops,
276 struct vm_area_struct *vma,
277 unsigned long start,
278 unsigned long end,
279 unsigned long *src,
280 unsigned long *dst,
281 void *private)
282{
283 return -EINVAL;
284}
285#endif /* IS_ENABLED(CONFIG_MIGRATE_VMA_HELPER) */
286 206
287#endif /* CONFIG_MIGRATION */ 207#endif /* CONFIG_MIGRATION */
288 208
diff --git a/mm/migrate.c b/mm/migrate.c
index 8992741f10aa..8111e031fa2b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2118,16 +2118,6 @@ out_unlock:
2118#endif /* CONFIG_NUMA */ 2118#endif /* CONFIG_NUMA */
2119 2119
2120#if defined(CONFIG_MIGRATE_VMA_HELPER) 2120#if defined(CONFIG_MIGRATE_VMA_HELPER)
2121struct migrate_vma {
2122 struct vm_area_struct *vma;
2123 unsigned long *dst;
2124 unsigned long *src;
2125 unsigned long cpages;
2126 unsigned long npages;
2127 unsigned long start;
2128 unsigned long end;
2129};
2130
2131static int migrate_vma_collect_hole(unsigned long start, 2121static int migrate_vma_collect_hole(unsigned long start,
2132 unsigned long end, 2122 unsigned long end,
2133 struct mm_walk *walk) 2123 struct mm_walk *walk)
@@ -2578,6 +2568,110 @@ restore:
2578 } 2568 }
2579} 2569}
2580 2570
2571/**
2572 * migrate_vma_setup() - prepare to migrate a range of memory
2573 * @args: contains the vma, start, and and pfns arrays for the migration
2574 *
2575 * Returns: negative errno on failures, 0 when 0 or more pages were migrated
2576 * without an error.
2577 *
2578 * Prepare to migrate a range of memory virtual address range by collecting all
2579 * the pages backing each virtual address in the range, saving them inside the
2580 * src array. Then lock those pages and unmap them. Once the pages are locked
2581 * and unmapped, check whether each page is pinned or not. Pages that aren't
2582 * pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) in the
2583 * corresponding src array entry. Then restores any pages that are pinned, by
2584 * remapping and unlocking those pages.
2585 *
2586 * The caller should then allocate destination memory and copy source memory to
2587 * it for all those entries (ie with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE
2588 * flag set). Once these are allocated and copied, the caller must update each
2589 * corresponding entry in the dst array with the pfn value of the destination
2590 * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_LOCKED flags set
2591 * (destination pages must have their struct pages locked, via lock_page()).
2592 *
2593 * Note that the caller does not have to migrate all the pages that are marked
2594 * with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration from
2595 * device memory to system memory. If the caller cannot migrate a device page
2596 * back to system memory, then it must return VM_FAULT_SIGBUS, which has severe
2597 * consequences for the userspace process, so it must be avoided if at all
2598 * possible.
2599 *
2600 * For empty entries inside CPU page table (pte_none() or pmd_none() is true) we
2601 * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus
2602 * allowing the caller to allocate device memory for those unback virtual
2603 * address. For this the caller simply has to allocate device memory and
2604 * properly set the destination entry like for regular migration. Note that
2605 * this can still fails and thus inside the device driver must check if the
2606 * migration was successful for those entries after calling migrate_vma_pages()
2607 * just like for regular migration.
2608 *
2609 * After that, the callers must call migrate_vma_pages() to go over each entry
2610 * in the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag
2611 * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set,
2612 * then migrate_vma_pages() to migrate struct page information from the source
2613 * struct page to the destination struct page. If it fails to migrate the
2614 * struct page information, then it clears the MIGRATE_PFN_MIGRATE flag in the
2615 * src array.
2616 *
2617 * At this point all successfully migrated pages have an entry in the src
2618 * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst
2619 * array entry with MIGRATE_PFN_VALID flag set.
2620 *
2621 * Once migrate_vma_pages() returns the caller may inspect which pages were
2622 * successfully migrated, and which were not. Successfully migrated pages will
2623 * have the MIGRATE_PFN_MIGRATE flag set for their src array entry.
2624 *
2625 * It is safe to update device page table after migrate_vma_pages() because
2626 * both destination and source page are still locked, and the mmap_sem is held
2627 * in read mode (hence no one can unmap the range being migrated).
2628 *
2629 * Once the caller is done cleaning up things and updating its page table (if it
2630 * chose to do so, this is not an obligation) it finally calls
2631 * migrate_vma_finalize() to update the CPU page table to point to new pages
2632 * for successfully migrated pages or otherwise restore the CPU page table to
2633 * point to the original source pages.
2634 */
2635int migrate_vma_setup(struct migrate_vma *args)
2636{
2637 long nr_pages = (args->end - args->start) >> PAGE_SHIFT;
2638
2639 args->start &= PAGE_MASK;
2640 args->end &= PAGE_MASK;
2641 if (!args->vma || is_vm_hugetlb_page(args->vma) ||
2642 (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma))
2643 return -EINVAL;
2644 if (nr_pages <= 0)
2645 return -EINVAL;
2646 if (args->start < args->vma->vm_start ||
2647 args->start >= args->vma->vm_end)
2648 return -EINVAL;
2649 if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end)
2650 return -EINVAL;
2651 if (!args->src || !args->dst)
2652 return -EINVAL;
2653
2654 memset(args->src, 0, sizeof(*args->src) * nr_pages);
2655 args->cpages = 0;
2656 args->npages = 0;
2657
2658 migrate_vma_collect(args);
2659
2660 if (args->cpages)
2661 migrate_vma_prepare(args);
2662 if (args->cpages)
2663 migrate_vma_unmap(args);
2664
2665 /*
2666 * At this point pages are locked and unmapped, and thus they have
2667 * stable content and can safely be copied to destination memory that
2668 * is allocated by the drivers.
2669 */
2670 return 0;
2671
2672}
2673EXPORT_SYMBOL(migrate_vma_setup);
2674
2581static void migrate_vma_insert_page(struct migrate_vma *migrate, 2675static void migrate_vma_insert_page(struct migrate_vma *migrate,
2582 unsigned long addr, 2676 unsigned long addr,
2583 struct page *page, 2677 struct page *page,
@@ -2709,7 +2803,7 @@ abort:
2709 *src &= ~MIGRATE_PFN_MIGRATE; 2803 *src &= ~MIGRATE_PFN_MIGRATE;
2710} 2804}
2711 2805
2712/* 2806/**
2713 * migrate_vma_pages() - migrate meta-data from src page to dst page 2807 * migrate_vma_pages() - migrate meta-data from src page to dst page
2714 * @migrate: migrate struct containing all migration information 2808 * @migrate: migrate struct containing all migration information
2715 * 2809 *
@@ -2717,7 +2811,7 @@ abort:
2717 * struct page. This effectively finishes the migration from source page to the 2811 * struct page. This effectively finishes the migration from source page to the
2718 * destination page. 2812 * destination page.
2719 */ 2813 */
2720static void migrate_vma_pages(struct migrate_vma *migrate) 2814void migrate_vma_pages(struct migrate_vma *migrate)
2721{ 2815{
2722 const unsigned long npages = migrate->npages; 2816 const unsigned long npages = migrate->npages;
2723 const unsigned long start = migrate->start; 2817 const unsigned long start = migrate->start;
@@ -2791,8 +2885,9 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
2791 if (notified) 2885 if (notified)
2792 mmu_notifier_invalidate_range_only_end(&range); 2886 mmu_notifier_invalidate_range_only_end(&range);
2793} 2887}
2888EXPORT_SYMBOL(migrate_vma_pages);
2794 2889
2795/* 2890/**
2796 * migrate_vma_finalize() - restore CPU page table entry 2891 * migrate_vma_finalize() - restore CPU page table entry
2797 * @migrate: migrate struct containing all migration information 2892 * @migrate: migrate struct containing all migration information
2798 * 2893 *
@@ -2803,7 +2898,7 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
2803 * This also unlocks the pages and puts them back on the lru, or drops the extra 2898 * This also unlocks the pages and puts them back on the lru, or drops the extra
2804 * refcount, for device pages. 2899 * refcount, for device pages.
2805 */ 2900 */
2806static void migrate_vma_finalize(struct migrate_vma *migrate) 2901void migrate_vma_finalize(struct migrate_vma *migrate)
2807{ 2902{
2808 const unsigned long npages = migrate->npages; 2903 const unsigned long npages = migrate->npages;
2809 unsigned long i; 2904 unsigned long i;
@@ -2846,124 +2941,5 @@ static void migrate_vma_finalize(struct migrate_vma *migrate)
2846 } 2941 }
2847 } 2942 }
2848} 2943}
2849 2944EXPORT_SYMBOL(migrate_vma_finalize);
2850/*
2851 * migrate_vma() - migrate a range of memory inside vma
2852 *
2853 * @ops: migration callback for allocating destination memory and copying
2854 * @vma: virtual memory area containing the range to be migrated
2855 * @start: start address of the range to migrate (inclusive)
2856 * @end: end address of the range to migrate (exclusive)
2857 * @src: array of hmm_pfn_t containing source pfns
2858 * @dst: array of hmm_pfn_t containing destination pfns
2859 * @private: pointer passed back to each of the callback
2860 * Returns: 0 on success, error code otherwise
2861 *
2862 * This function tries to migrate a range of memory virtual address range, using
2863 * callbacks to allocate and copy memory from source to destination. First it
2864 * collects all the pages backing each virtual address in the range, saving this
2865 * inside the src array. Then it locks those pages and unmaps them. Once the pages
2866 * are locked and unmapped, it checks whether each page is pinned or not. Pages
2867 * that aren't pinned have the MIGRATE_PFN_MIGRATE flag set (by this function)
2868 * in the corresponding src array entry. It then restores any pages that are
2869 * pinned, by remapping and unlocking those pages.
2870 *
2871 * At this point it calls the alloc_and_copy() callback. For documentation on
2872 * what is expected from that callback, see struct migrate_vma_ops comments in
2873 * include/linux/migrate.h
2874 *
2875 * After the alloc_and_copy() callback, this function goes over each entry in
2876 * the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag
2877 * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set,
2878 * then the function tries to migrate struct page information from the source
2879 * struct page to the destination struct page. If it fails to migrate the struct
2880 * page information, then it clears the MIGRATE_PFN_MIGRATE flag in the src
2881 * array.
2882 *
2883 * At this point all successfully migrated pages have an entry in the src
2884 * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst
2885 * array entry with MIGRATE_PFN_VALID flag set.
2886 *
2887 * It then calls the finalize_and_map() callback. See comments for "struct
2888 * migrate_vma_ops", in include/linux/migrate.h for details about
2889 * finalize_and_map() behavior.
2890 *
2891 * After the finalize_and_map() callback, for successfully migrated pages, this
2892 * function updates the CPU page table to point to new pages, otherwise it
2893 * restores the CPU page table to point to the original source pages.
2894 *
2895 * Function returns 0 after the above steps, even if no pages were migrated
2896 * (The function only returns an error if any of the arguments are invalid.)
2897 *
2898 * Both src and dst array must be big enough for (end - start) >> PAGE_SHIFT
2899 * unsigned long entries.
2900 */
2901int migrate_vma(const struct migrate_vma_ops *ops,
2902 struct vm_area_struct *vma,
2903 unsigned long start,
2904 unsigned long end,
2905 unsigned long *src,
2906 unsigned long *dst,
2907 void *private)
2908{
2909 struct migrate_vma migrate;
2910
2911 /* Sanity check the arguments */
2912 start &= PAGE_MASK;
2913 end &= PAGE_MASK;
2914 if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) ||
2915 vma_is_dax(vma))
2916 return -EINVAL;
2917 if (start < vma->vm_start || start >= vma->vm_end)
2918 return -EINVAL;
2919 if (end <= vma->vm_start || end > vma->vm_end)
2920 return -EINVAL;
2921 if (!ops || !src || !dst || start >= end)
2922 return -EINVAL;
2923
2924 memset(src, 0, sizeof(*src) * ((end - start) >> PAGE_SHIFT));
2925 migrate.src = src;
2926 migrate.dst = dst;
2927 migrate.start = start;
2928 migrate.npages = 0;
2929 migrate.cpages = 0;
2930 migrate.end = end;
2931 migrate.vma = vma;
2932
2933 /* Collect, and try to unmap source pages */
2934 migrate_vma_collect(&migrate);
2935 if (!migrate.cpages)
2936 return 0;
2937
2938 /* Lock and isolate page */
2939 migrate_vma_prepare(&migrate);
2940 if (!migrate.cpages)
2941 return 0;
2942
2943 /* Unmap pages */
2944 migrate_vma_unmap(&migrate);
2945 if (!migrate.cpages)
2946 return 0;
2947
2948 /*
2949 * At this point pages are locked and unmapped, and thus they have
2950 * stable content and can safely be copied to destination memory that
2951 * is allocated by the callback.
2952 *
2953 * Note that migration can fail in migrate_vma_struct_page() for each
2954 * individual page.
2955 */
2956 ops->alloc_and_copy(vma, src, dst, start, end, private);
2957
2958 /* This does the real migration of struct page */
2959 migrate_vma_pages(&migrate);
2960
2961 ops->finalize_and_map(vma, src, dst, start, end, private);
2962
2963 /* Unlock and remap pages */
2964 migrate_vma_finalize(&migrate);
2965
2966 return 0;
2967}
2968EXPORT_SYMBOL(migrate_vma);
2969#endif /* defined(MIGRATE_VMA_HELPER) */ 2945#endif /* defined(MIGRATE_VMA_HELPER) */