diff options
author | Christoph Hellwig <hch@lst.de> | 2019-08-14 03:59:19 -0400 |
---|---|---|
committer | Jason Gunthorpe <jgg@mellanox.com> | 2019-08-20 08:35:02 -0400 |
commit | a7d1f22bb74f32cf3cd93f52776007e161f1a738 (patch) | |
tree | d3cbbf0483499da1f858f3b2facda271670e8d69 | |
parent | f4fb3b9c1971ec210b30845a9f62dc823c5242d0 (diff) |
mm: turn migrate_vma upside down
There isn't any good reason to pass callbacks to migrate_vma. Instead
we can just export the three steps done by this function to drivers and
let them sequence the operation without callbacks. This removes a lot
of boilerplate code as-is, and will allow the drivers to drastically
improve code flow and error handling further on.
Link: https://lore.kernel.org/r/20190814075928.23766-2-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Tested-by: Ralph Campbell <rcampbell@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r-- | Documentation/vm/hmm.rst | 54 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_dmem.c | 122 | ||||
-rw-r--r-- | include/linux/migrate.h | 118 | ||||
-rw-r--r-- | mm/migrate.c | 244 |
4 files changed, 194 insertions, 344 deletions
diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst index e63c11f7e0e0..0a5960beccf7 100644 --- a/Documentation/vm/hmm.rst +++ b/Documentation/vm/hmm.rst | |||
@@ -339,58 +339,8 @@ Migration to and from device memory | |||
339 | =================================== | 339 | =================================== |
340 | 340 | ||
341 | Because the CPU cannot access device memory, migration must use the device DMA | 341 | Because the CPU cannot access device memory, migration must use the device DMA |
342 | engine to perform copy from and to device memory. For this we need a new | 342 | engine to perform copy from and to device memory. For this we need to use |
343 | migration helper:: | 343 | migrate_vma_setup(), migrate_vma_pages(), and migrate_vma_finalize() helpers. |
344 | |||
345 | int migrate_vma(const struct migrate_vma_ops *ops, | ||
346 | struct vm_area_struct *vma, | ||
347 | unsigned long mentries, | ||
348 | unsigned long start, | ||
349 | unsigned long end, | ||
350 | unsigned long *src, | ||
351 | unsigned long *dst, | ||
352 | void *private); | ||
353 | |||
354 | Unlike other migration functions it works on a range of virtual address, there | ||
355 | are two reasons for that. First, device DMA copy has a high setup overhead cost | ||
356 | and thus batching multiple pages is needed as otherwise the migration overhead | ||
357 | makes the whole exercise pointless. The second reason is because the | ||
358 | migration might be for a range of addresses the device is actively accessing. | ||
359 | |||
360 | The migrate_vma_ops struct defines two callbacks. First one (alloc_and_copy()) | ||
361 | controls destination memory allocation and copy operation. Second one is there | ||
362 | to allow the device driver to perform cleanup operations after migration:: | ||
363 | |||
364 | struct migrate_vma_ops { | ||
365 | void (*alloc_and_copy)(struct vm_area_struct *vma, | ||
366 | const unsigned long *src, | ||
367 | unsigned long *dst, | ||
368 | unsigned long start, | ||
369 | unsigned long end, | ||
370 | void *private); | ||
371 | void (*finalize_and_map)(struct vm_area_struct *vma, | ||
372 | const unsigned long *src, | ||
373 | const unsigned long *dst, | ||
374 | unsigned long start, | ||
375 | unsigned long end, | ||
376 | void *private); | ||
377 | }; | ||
378 | |||
379 | It is important to stress that these migration helpers allow for holes in the | ||
380 | virtual address range. Some pages in the range might not be migrated for all | ||
381 | the usual reasons (page is pinned, page is locked, ...). This helper does not | ||
382 | fail but just skips over those pages. | ||
383 | |||
384 | The alloc_and_copy() might decide to not migrate all pages in the | ||
385 | range (for reasons under the callback control). For those, the callback just | ||
386 | has to leave the corresponding dst entry empty. | ||
387 | |||
388 | Finally, the migration of the struct page might fail (for file backed page) for | ||
389 | various reasons (failure to freeze reference, or update page cache, ...). If | ||
390 | that happens, then the finalize_and_map() can catch any pages that were not | ||
391 | migrated. Note those pages were still copied to a new page and thus we wasted | ||
392 | bandwidth but this is considered as a rare event and a price that we are | ||
393 | willing to pay to keep all the code simpler. | ||
394 | 344 | ||
395 | 345 | ||
396 | Memory cgroup (memcg) and rss accounting | 346 | Memory cgroup (memcg) and rss accounting |
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 345c63cb752a..38416798abd4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c | |||
@@ -131,9 +131,8 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, | |||
131 | unsigned long *dst_pfns, | 131 | unsigned long *dst_pfns, |
132 | unsigned long start, | 132 | unsigned long start, |
133 | unsigned long end, | 133 | unsigned long end, |
134 | void *private) | 134 | struct nouveau_dmem_fault *fault) |
135 | { | 135 | { |
136 | struct nouveau_dmem_fault *fault = private; | ||
137 | struct nouveau_drm *drm = fault->drm; | 136 | struct nouveau_drm *drm = fault->drm; |
138 | struct device *dev = drm->dev->dev; | 137 | struct device *dev = drm->dev->dev; |
139 | unsigned long addr, i, npages = 0; | 138 | unsigned long addr, i, npages = 0; |
@@ -230,14 +229,9 @@ error: | |||
230 | } | 229 | } |
231 | } | 230 | } |
232 | 231 | ||
233 | void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma, | 232 | static void |
234 | const unsigned long *src_pfns, | 233 | nouveau_dmem_fault_finalize_and_map(struct nouveau_dmem_fault *fault) |
235 | const unsigned long *dst_pfns, | ||
236 | unsigned long start, | ||
237 | unsigned long end, | ||
238 | void *private) | ||
239 | { | 234 | { |
240 | struct nouveau_dmem_fault *fault = private; | ||
241 | struct nouveau_drm *drm = fault->drm; | 235 | struct nouveau_drm *drm = fault->drm; |
242 | 236 | ||
243 | if (fault->fence) { | 237 | if (fault->fence) { |
@@ -257,29 +251,35 @@ void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma, | |||
257 | kfree(fault->dma); | 251 | kfree(fault->dma); |
258 | } | 252 | } |
259 | 253 | ||
260 | static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = { | ||
261 | .alloc_and_copy = nouveau_dmem_fault_alloc_and_copy, | ||
262 | .finalize_and_map = nouveau_dmem_fault_finalize_and_map, | ||
263 | }; | ||
264 | |||
265 | static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) | 254 | static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) |
266 | { | 255 | { |
267 | struct nouveau_dmem *dmem = page_to_dmem(vmf->page); | 256 | struct nouveau_dmem *dmem = page_to_dmem(vmf->page); |
268 | unsigned long src[1] = {0}, dst[1] = {0}; | 257 | unsigned long src[1] = {0}, dst[1] = {0}; |
258 | struct migrate_vma args = { | ||
259 | .vma = vmf->vma, | ||
260 | .start = vmf->address, | ||
261 | .end = vmf->address + PAGE_SIZE, | ||
262 | .src = src, | ||
263 | .dst = dst, | ||
264 | }; | ||
269 | struct nouveau_dmem_fault fault = { .drm = dmem->drm }; | 265 | struct nouveau_dmem_fault fault = { .drm = dmem->drm }; |
270 | int ret; | ||
271 | 266 | ||
272 | /* | 267 | /* |
273 | * FIXME what we really want is to find some heuristic to migrate more | 268 | * FIXME what we really want is to find some heuristic to migrate more |
274 | * than just one page on CPU fault. When such fault happens it is very | 269 | * than just one page on CPU fault. When such fault happens it is very |
275 | * likely that more surrounding page will CPU fault too. | 270 | * likely that more surrounding page will CPU fault too. |
276 | */ | 271 | */ |
277 | ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vmf->vma, | 272 | if (migrate_vma_setup(&args) < 0) |
278 | vmf->address, vmf->address + PAGE_SIZE, | ||
279 | src, dst, &fault); | ||
280 | if (ret) | ||
281 | return VM_FAULT_SIGBUS; | 273 | return VM_FAULT_SIGBUS; |
274 | if (!args.cpages) | ||
275 | return 0; | ||
276 | |||
277 | nouveau_dmem_fault_alloc_and_copy(args.vma, src, dst, args.start, | ||
278 | args.end, &fault); | ||
279 | migrate_vma_pages(&args); | ||
280 | nouveau_dmem_fault_finalize_and_map(&fault); | ||
282 | 281 | ||
282 | migrate_vma_finalize(&args); | ||
283 | if (dst[0] == MIGRATE_PFN_ERROR) | 283 | if (dst[0] == MIGRATE_PFN_ERROR) |
284 | return VM_FAULT_SIGBUS; | 284 | return VM_FAULT_SIGBUS; |
285 | 285 | ||
@@ -648,9 +648,8 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, | |||
648 | unsigned long *dst_pfns, | 648 | unsigned long *dst_pfns, |
649 | unsigned long start, | 649 | unsigned long start, |
650 | unsigned long end, | 650 | unsigned long end, |
651 | void *private) | 651 | struct nouveau_migrate *migrate) |
652 | { | 652 | { |
653 | struct nouveau_migrate *migrate = private; | ||
654 | struct nouveau_drm *drm = migrate->drm; | 653 | struct nouveau_drm *drm = migrate->drm; |
655 | struct device *dev = drm->dev->dev; | 654 | struct device *dev = drm->dev->dev; |
656 | unsigned long addr, i, npages = 0; | 655 | unsigned long addr, i, npages = 0; |
@@ -747,14 +746,9 @@ error: | |||
747 | } | 746 | } |
748 | } | 747 | } |
749 | 748 | ||
750 | void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma, | 749 | static void |
751 | const unsigned long *src_pfns, | 750 | nouveau_dmem_migrate_finalize_and_map(struct nouveau_migrate *migrate) |
752 | const unsigned long *dst_pfns, | ||
753 | unsigned long start, | ||
754 | unsigned long end, | ||
755 | void *private) | ||
756 | { | 751 | { |
757 | struct nouveau_migrate *migrate = private; | ||
758 | struct nouveau_drm *drm = migrate->drm; | 752 | struct nouveau_drm *drm = migrate->drm; |
759 | 753 | ||
760 | if (migrate->fence) { | 754 | if (migrate->fence) { |
@@ -779,10 +773,15 @@ void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma, | |||
779 | */ | 773 | */ |
780 | } | 774 | } |
781 | 775 | ||
782 | static const struct migrate_vma_ops nouveau_dmem_migrate_ops = { | 776 | static void nouveau_dmem_migrate_chunk(struct migrate_vma *args, |
783 | .alloc_and_copy = nouveau_dmem_migrate_alloc_and_copy, | 777 | struct nouveau_migrate *migrate) |
784 | .finalize_and_map = nouveau_dmem_migrate_finalize_and_map, | 778 | { |
785 | }; | 779 | nouveau_dmem_migrate_alloc_and_copy(args->vma, args->src, args->dst, |
780 | args->start, args->end, migrate); | ||
781 | migrate_vma_pages(args); | ||
782 | nouveau_dmem_migrate_finalize_and_map(migrate); | ||
783 | migrate_vma_finalize(args); | ||
784 | } | ||
786 | 785 | ||
787 | int | 786 | int |
788 | nouveau_dmem_migrate_vma(struct nouveau_drm *drm, | 787 | nouveau_dmem_migrate_vma(struct nouveau_drm *drm, |
@@ -790,40 +789,45 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, | |||
790 | unsigned long start, | 789 | unsigned long start, |
791 | unsigned long end) | 790 | unsigned long end) |
792 | { | 791 | { |
793 | unsigned long *src_pfns, *dst_pfns, npages; | 792 | unsigned long npages = (end - start) >> PAGE_SHIFT; |
794 | struct nouveau_migrate migrate = {0}; | 793 | unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages); |
795 | unsigned long i, c, max; | 794 | struct migrate_vma args = { |
796 | int ret = 0; | 795 | .vma = vma, |
797 | 796 | .start = start, | |
798 | npages = (end - start) >> PAGE_SHIFT; | 797 | }; |
799 | max = min(SG_MAX_SINGLE_ALLOC, npages); | 798 | struct nouveau_migrate migrate = { |
800 | src_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); | 799 | .drm = drm, |
801 | if (src_pfns == NULL) | 800 | .vma = vma, |
802 | return -ENOMEM; | 801 | .npages = npages, |
803 | dst_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); | 802 | }; |
804 | if (dst_pfns == NULL) { | 803 | unsigned long c, i; |
805 | kfree(src_pfns); | 804 | int ret = -ENOMEM; |
806 | return -ENOMEM; | 805 | |
807 | } | 806 | args.src = kzalloc(sizeof(long) * max, GFP_KERNEL); |
807 | if (!args.src) | ||
808 | goto out; | ||
809 | args.dst = kzalloc(sizeof(long) * max, GFP_KERNEL); | ||
810 | if (!args.dst) | ||
811 | goto out_free_src; | ||
808 | 812 | ||
809 | migrate.drm = drm; | ||
810 | migrate.vma = vma; | ||
811 | migrate.npages = npages; | ||
812 | for (i = 0; i < npages; i += c) { | 813 | for (i = 0; i < npages; i += c) { |
813 | unsigned long next; | ||
814 | |||
815 | c = min(SG_MAX_SINGLE_ALLOC, npages); | 814 | c = min(SG_MAX_SINGLE_ALLOC, npages); |
816 | next = start + (c << PAGE_SHIFT); | 815 | args.end = start + (c << PAGE_SHIFT); |
817 | ret = migrate_vma(&nouveau_dmem_migrate_ops, vma, start, | 816 | ret = migrate_vma_setup(&args); |
818 | next, src_pfns, dst_pfns, &migrate); | ||
819 | if (ret) | 817 | if (ret) |
820 | goto out; | 818 | goto out_free_dst; |
821 | start = next; | 819 | |
820 | if (args.cpages) | ||
821 | nouveau_dmem_migrate_chunk(&args, &migrate); | ||
822 | args.start = args.end; | ||
822 | } | 823 | } |
823 | 824 | ||
825 | ret = 0; | ||
826 | out_free_dst: | ||
827 | kfree(args.dst); | ||
828 | out_free_src: | ||
829 | kfree(args.src); | ||
824 | out: | 830 | out: |
825 | kfree(dst_pfns); | ||
826 | kfree(src_pfns); | ||
827 | return ret; | 831 | return ret; |
828 | } | 832 | } |
829 | 833 | ||
diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 7f04754c7f2b..18156d379ebf 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h | |||
@@ -182,107 +182,27 @@ static inline unsigned long migrate_pfn(unsigned long pfn) | |||
182 | return (pfn << MIGRATE_PFN_SHIFT) | MIGRATE_PFN_VALID; | 182 | return (pfn << MIGRATE_PFN_SHIFT) | MIGRATE_PFN_VALID; |
183 | } | 183 | } |
184 | 184 | ||
185 | /* | 185 | struct migrate_vma { |
186 | * struct migrate_vma_ops - migrate operation callback | 186 | struct vm_area_struct *vma; |
187 | * | 187 | /* |
188 | * @alloc_and_copy: alloc destination memory and copy source memory to it | 188 | * Both src and dst array must be big enough for |
189 | * @finalize_and_map: allow caller to map the successfully migrated pages | 189 | * (end - start) >> PAGE_SHIFT entries. |
190 | * | 190 | * |
191 | * | 191 | * The src array must not be modified by the caller after |
192 | * The alloc_and_copy() callback happens once all source pages have been locked, | 192 | * migrate_vma_setup(), and must not change the dst array after |
193 | * unmapped and checked (checked whether pinned or not). All pages that can be | 193 | * migrate_vma_pages() returns. |
194 | * migrated will have an entry in the src array set with the pfn value of the | 194 | */ |
195 | * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set (other | 195 | unsigned long *dst; |
196 | * flags might be set but should be ignored by the callback). | 196 | unsigned long *src; |
197 | * | 197 | unsigned long cpages; |
198 | * The alloc_and_copy() callback can then allocate destination memory and copy | 198 | unsigned long npages; |
199 | * source memory to it for all those entries (ie with MIGRATE_PFN_VALID and | 199 | unsigned long start; |
200 | * MIGRATE_PFN_MIGRATE flag set). Once these are allocated and copied, the | 200 | unsigned long end; |
201 | * callback must update each corresponding entry in the dst array with the pfn | ||
202 | * value of the destination page and with the MIGRATE_PFN_VALID and | ||
203 | * MIGRATE_PFN_LOCKED flags set (destination pages must have their struct pages | ||
204 | * locked, via lock_page()). | ||
205 | * | ||
206 | * At this point the alloc_and_copy() callback is done and returns. | ||
207 | * | ||
208 | * Note that the callback does not have to migrate all the pages that are | ||
209 | * marked with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration | ||
210 | * from device memory to system memory (ie the MIGRATE_PFN_DEVICE flag is also | ||
211 | * set in the src array entry). If the device driver cannot migrate a device | ||
212 | * page back to system memory, then it must set the corresponding dst array | ||
213 | * entry to MIGRATE_PFN_ERROR. This will trigger a SIGBUS if CPU tries to | ||
214 | * access any of the virtual addresses originally backed by this page. Because | ||
215 | * a SIGBUS is such a severe result for the userspace process, the device | ||
216 | * driver should avoid setting MIGRATE_PFN_ERROR unless it is really in an | ||
217 | * unrecoverable state. | ||
218 | * | ||
219 | * For empty entry inside CPU page table (pte_none() or pmd_none() is true) we | ||
220 | * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus | ||
221 | * allowing device driver to allocate device memory for those unback virtual | ||
222 | * address. For this the device driver simply have to allocate device memory | ||
223 | * and properly set the destination entry like for regular migration. Note that | ||
224 | * this can still fails and thus inside the device driver must check if the | ||
225 | * migration was successful for those entry inside the finalize_and_map() | ||
226 | * callback just like for regular migration. | ||
227 | * | ||
228 | * THE alloc_and_copy() CALLBACK MUST NOT CHANGE ANY OF THE SRC ARRAY ENTRIES | ||
229 | * OR BAD THINGS WILL HAPPEN ! | ||
230 | * | ||
231 | * | ||
232 | * The finalize_and_map() callback happens after struct page migration from | ||
233 | * source to destination (destination struct pages are the struct pages for the | ||
234 | * memory allocated by the alloc_and_copy() callback). Migration can fail, and | ||
235 | * thus the finalize_and_map() allows the driver to inspect which pages were | ||
236 | * successfully migrated, and which were not. Successfully migrated pages will | ||
237 | * have the MIGRATE_PFN_MIGRATE flag set for their src array entry. | ||
238 | * | ||
239 | * It is safe to update device page table from within the finalize_and_map() | ||
240 | * callback because both destination and source page are still locked, and the | ||
241 | * mmap_sem is held in read mode (hence no one can unmap the range being | ||
242 | * migrated). | ||
243 | * | ||
244 | * Once callback is done cleaning up things and updating its page table (if it | ||
245 | * chose to do so, this is not an obligation) then it returns. At this point, | ||
246 | * the HMM core will finish up the final steps, and the migration is complete. | ||
247 | * | ||
248 | * THE finalize_and_map() CALLBACK MUST NOT CHANGE ANY OF THE SRC OR DST ARRAY | ||
249 | * ENTRIES OR BAD THINGS WILL HAPPEN ! | ||
250 | */ | ||
251 | struct migrate_vma_ops { | ||
252 | void (*alloc_and_copy)(struct vm_area_struct *vma, | ||
253 | const unsigned long *src, | ||
254 | unsigned long *dst, | ||
255 | unsigned long start, | ||
256 | unsigned long end, | ||
257 | void *private); | ||
258 | void (*finalize_and_map)(struct vm_area_struct *vma, | ||
259 | const unsigned long *src, | ||
260 | const unsigned long *dst, | ||
261 | unsigned long start, | ||
262 | unsigned long end, | ||
263 | void *private); | ||
264 | }; | 201 | }; |
265 | 202 | ||
266 | #if defined(CONFIG_MIGRATE_VMA_HELPER) | 203 | int migrate_vma_setup(struct migrate_vma *args); |
267 | int migrate_vma(const struct migrate_vma_ops *ops, | 204 | void migrate_vma_pages(struct migrate_vma *migrate); |
268 | struct vm_area_struct *vma, | 205 | void migrate_vma_finalize(struct migrate_vma *migrate); |
269 | unsigned long start, | ||
270 | unsigned long end, | ||
271 | unsigned long *src, | ||
272 | unsigned long *dst, | ||
273 | void *private); | ||
274 | #else | ||
275 | static inline int migrate_vma(const struct migrate_vma_ops *ops, | ||
276 | struct vm_area_struct *vma, | ||
277 | unsigned long start, | ||
278 | unsigned long end, | ||
279 | unsigned long *src, | ||
280 | unsigned long *dst, | ||
281 | void *private) | ||
282 | { | ||
283 | return -EINVAL; | ||
284 | } | ||
285 | #endif /* IS_ENABLED(CONFIG_MIGRATE_VMA_HELPER) */ | ||
286 | 206 | ||
287 | #endif /* CONFIG_MIGRATION */ | 207 | #endif /* CONFIG_MIGRATION */ |
288 | 208 | ||
diff --git a/mm/migrate.c b/mm/migrate.c index 8992741f10aa..8111e031fa2b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -2118,16 +2118,6 @@ out_unlock: | |||
2118 | #endif /* CONFIG_NUMA */ | 2118 | #endif /* CONFIG_NUMA */ |
2119 | 2119 | ||
2120 | #if defined(CONFIG_MIGRATE_VMA_HELPER) | 2120 | #if defined(CONFIG_MIGRATE_VMA_HELPER) |
2121 | struct migrate_vma { | ||
2122 | struct vm_area_struct *vma; | ||
2123 | unsigned long *dst; | ||
2124 | unsigned long *src; | ||
2125 | unsigned long cpages; | ||
2126 | unsigned long npages; | ||
2127 | unsigned long start; | ||
2128 | unsigned long end; | ||
2129 | }; | ||
2130 | |||
2131 | static int migrate_vma_collect_hole(unsigned long start, | 2121 | static int migrate_vma_collect_hole(unsigned long start, |
2132 | unsigned long end, | 2122 | unsigned long end, |
2133 | struct mm_walk *walk) | 2123 | struct mm_walk *walk) |
@@ -2578,6 +2568,110 @@ restore: | |||
2578 | } | 2568 | } |
2579 | } | 2569 | } |
2580 | 2570 | ||
2571 | /** | ||
2572 | * migrate_vma_setup() - prepare to migrate a range of memory | ||
2573 | * @args: contains the vma, start, and and pfns arrays for the migration | ||
2574 | * | ||
2575 | * Returns: negative errno on failures, 0 when 0 or more pages were migrated | ||
2576 | * without an error. | ||
2577 | * | ||
2578 | * Prepare to migrate a range of memory virtual address range by collecting all | ||
2579 | * the pages backing each virtual address in the range, saving them inside the | ||
2580 | * src array. Then lock those pages and unmap them. Once the pages are locked | ||
2581 | * and unmapped, check whether each page is pinned or not. Pages that aren't | ||
2582 | * pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) in the | ||
2583 | * corresponding src array entry. Then restores any pages that are pinned, by | ||
2584 | * remapping and unlocking those pages. | ||
2585 | * | ||
2586 | * The caller should then allocate destination memory and copy source memory to | ||
2587 | * it for all those entries (ie with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE | ||
2588 | * flag set). Once these are allocated and copied, the caller must update each | ||
2589 | * corresponding entry in the dst array with the pfn value of the destination | ||
2590 | * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_LOCKED flags set | ||
2591 | * (destination pages must have their struct pages locked, via lock_page()). | ||
2592 | * | ||
2593 | * Note that the caller does not have to migrate all the pages that are marked | ||
2594 | * with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration from | ||
2595 | * device memory to system memory. If the caller cannot migrate a device page | ||
2596 | * back to system memory, then it must return VM_FAULT_SIGBUS, which has severe | ||
2597 | * consequences for the userspace process, so it must be avoided if at all | ||
2598 | * possible. | ||
2599 | * | ||
2600 | * For empty entries inside CPU page table (pte_none() or pmd_none() is true) we | ||
2601 | * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus | ||
2602 | * allowing the caller to allocate device memory for those unback virtual | ||
2603 | * address. For this the caller simply has to allocate device memory and | ||
2604 | * properly set the destination entry like for regular migration. Note that | ||
2605 | * this can still fails and thus inside the device driver must check if the | ||
2606 | * migration was successful for those entries after calling migrate_vma_pages() | ||
2607 | * just like for regular migration. | ||
2608 | * | ||
2609 | * After that, the callers must call migrate_vma_pages() to go over each entry | ||
2610 | * in the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag | ||
2611 | * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set, | ||
2612 | * then migrate_vma_pages() to migrate struct page information from the source | ||
2613 | * struct page to the destination struct page. If it fails to migrate the | ||
2614 | * struct page information, then it clears the MIGRATE_PFN_MIGRATE flag in the | ||
2615 | * src array. | ||
2616 | * | ||
2617 | * At this point all successfully migrated pages have an entry in the src | ||
2618 | * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst | ||
2619 | * array entry with MIGRATE_PFN_VALID flag set. | ||
2620 | * | ||
2621 | * Once migrate_vma_pages() returns the caller may inspect which pages were | ||
2622 | * successfully migrated, and which were not. Successfully migrated pages will | ||
2623 | * have the MIGRATE_PFN_MIGRATE flag set for their src array entry. | ||
2624 | * | ||
2625 | * It is safe to update device page table after migrate_vma_pages() because | ||
2626 | * both destination and source page are still locked, and the mmap_sem is held | ||
2627 | * in read mode (hence no one can unmap the range being migrated). | ||
2628 | * | ||
2629 | * Once the caller is done cleaning up things and updating its page table (if it | ||
2630 | * chose to do so, this is not an obligation) it finally calls | ||
2631 | * migrate_vma_finalize() to update the CPU page table to point to new pages | ||
2632 | * for successfully migrated pages or otherwise restore the CPU page table to | ||
2633 | * point to the original source pages. | ||
2634 | */ | ||
2635 | int migrate_vma_setup(struct migrate_vma *args) | ||
2636 | { | ||
2637 | long nr_pages = (args->end - args->start) >> PAGE_SHIFT; | ||
2638 | |||
2639 | args->start &= PAGE_MASK; | ||
2640 | args->end &= PAGE_MASK; | ||
2641 | if (!args->vma || is_vm_hugetlb_page(args->vma) || | ||
2642 | (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma)) | ||
2643 | return -EINVAL; | ||
2644 | if (nr_pages <= 0) | ||
2645 | return -EINVAL; | ||
2646 | if (args->start < args->vma->vm_start || | ||
2647 | args->start >= args->vma->vm_end) | ||
2648 | return -EINVAL; | ||
2649 | if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end) | ||
2650 | return -EINVAL; | ||
2651 | if (!args->src || !args->dst) | ||
2652 | return -EINVAL; | ||
2653 | |||
2654 | memset(args->src, 0, sizeof(*args->src) * nr_pages); | ||
2655 | args->cpages = 0; | ||
2656 | args->npages = 0; | ||
2657 | |||
2658 | migrate_vma_collect(args); | ||
2659 | |||
2660 | if (args->cpages) | ||
2661 | migrate_vma_prepare(args); | ||
2662 | if (args->cpages) | ||
2663 | migrate_vma_unmap(args); | ||
2664 | |||
2665 | /* | ||
2666 | * At this point pages are locked and unmapped, and thus they have | ||
2667 | * stable content and can safely be copied to destination memory that | ||
2668 | * is allocated by the drivers. | ||
2669 | */ | ||
2670 | return 0; | ||
2671 | |||
2672 | } | ||
2673 | EXPORT_SYMBOL(migrate_vma_setup); | ||
2674 | |||
2581 | static void migrate_vma_insert_page(struct migrate_vma *migrate, | 2675 | static void migrate_vma_insert_page(struct migrate_vma *migrate, |
2582 | unsigned long addr, | 2676 | unsigned long addr, |
2583 | struct page *page, | 2677 | struct page *page, |
@@ -2709,7 +2803,7 @@ abort: | |||
2709 | *src &= ~MIGRATE_PFN_MIGRATE; | 2803 | *src &= ~MIGRATE_PFN_MIGRATE; |
2710 | } | 2804 | } |
2711 | 2805 | ||
2712 | /* | 2806 | /** |
2713 | * migrate_vma_pages() - migrate meta-data from src page to dst page | 2807 | * migrate_vma_pages() - migrate meta-data from src page to dst page |
2714 | * @migrate: migrate struct containing all migration information | 2808 | * @migrate: migrate struct containing all migration information |
2715 | * | 2809 | * |
@@ -2717,7 +2811,7 @@ abort: | |||
2717 | * struct page. This effectively finishes the migration from source page to the | 2811 | * struct page. This effectively finishes the migration from source page to the |
2718 | * destination page. | 2812 | * destination page. |
2719 | */ | 2813 | */ |
2720 | static void migrate_vma_pages(struct migrate_vma *migrate) | 2814 | void migrate_vma_pages(struct migrate_vma *migrate) |
2721 | { | 2815 | { |
2722 | const unsigned long npages = migrate->npages; | 2816 | const unsigned long npages = migrate->npages; |
2723 | const unsigned long start = migrate->start; | 2817 | const unsigned long start = migrate->start; |
@@ -2791,8 +2885,9 @@ static void migrate_vma_pages(struct migrate_vma *migrate) | |||
2791 | if (notified) | 2885 | if (notified) |
2792 | mmu_notifier_invalidate_range_only_end(&range); | 2886 | mmu_notifier_invalidate_range_only_end(&range); |
2793 | } | 2887 | } |
2888 | EXPORT_SYMBOL(migrate_vma_pages); | ||
2794 | 2889 | ||
2795 | /* | 2890 | /** |
2796 | * migrate_vma_finalize() - restore CPU page table entry | 2891 | * migrate_vma_finalize() - restore CPU page table entry |
2797 | * @migrate: migrate struct containing all migration information | 2892 | * @migrate: migrate struct containing all migration information |
2798 | * | 2893 | * |
@@ -2803,7 +2898,7 @@ static void migrate_vma_pages(struct migrate_vma *migrate) | |||
2803 | * This also unlocks the pages and puts them back on the lru, or drops the extra | 2898 | * This also unlocks the pages and puts them back on the lru, or drops the extra |
2804 | * refcount, for device pages. | 2899 | * refcount, for device pages. |
2805 | */ | 2900 | */ |
2806 | static void migrate_vma_finalize(struct migrate_vma *migrate) | 2901 | void migrate_vma_finalize(struct migrate_vma *migrate) |
2807 | { | 2902 | { |
2808 | const unsigned long npages = migrate->npages; | 2903 | const unsigned long npages = migrate->npages; |
2809 | unsigned long i; | 2904 | unsigned long i; |
@@ -2846,124 +2941,5 @@ static void migrate_vma_finalize(struct migrate_vma *migrate) | |||
2846 | } | 2941 | } |
2847 | } | 2942 | } |
2848 | } | 2943 | } |
2849 | 2944 | EXPORT_SYMBOL(migrate_vma_finalize); | |
2850 | /* | ||
2851 | * migrate_vma() - migrate a range of memory inside vma | ||
2852 | * | ||
2853 | * @ops: migration callback for allocating destination memory and copying | ||
2854 | * @vma: virtual memory area containing the range to be migrated | ||
2855 | * @start: start address of the range to migrate (inclusive) | ||
2856 | * @end: end address of the range to migrate (exclusive) | ||
2857 | * @src: array of hmm_pfn_t containing source pfns | ||
2858 | * @dst: array of hmm_pfn_t containing destination pfns | ||
2859 | * @private: pointer passed back to each of the callback | ||
2860 | * Returns: 0 on success, error code otherwise | ||
2861 | * | ||
2862 | * This function tries to migrate a range of memory virtual address range, using | ||
2863 | * callbacks to allocate and copy memory from source to destination. First it | ||
2864 | * collects all the pages backing each virtual address in the range, saving this | ||
2865 | * inside the src array. Then it locks those pages and unmaps them. Once the pages | ||
2866 | * are locked and unmapped, it checks whether each page is pinned or not. Pages | ||
2867 | * that aren't pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) | ||
2868 | * in the corresponding src array entry. It then restores any pages that are | ||
2869 | * pinned, by remapping and unlocking those pages. | ||
2870 | * | ||
2871 | * At this point it calls the alloc_and_copy() callback. For documentation on | ||
2872 | * what is expected from that callback, see struct migrate_vma_ops comments in | ||
2873 | * include/linux/migrate.h | ||
2874 | * | ||
2875 | * After the alloc_and_copy() callback, this function goes over each entry in | ||
2876 | * the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag | ||
2877 | * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set, | ||
2878 | * then the function tries to migrate struct page information from the source | ||
2879 | * struct page to the destination struct page. If it fails to migrate the struct | ||
2880 | * page information, then it clears the MIGRATE_PFN_MIGRATE flag in the src | ||
2881 | * array. | ||
2882 | * | ||
2883 | * At this point all successfully migrated pages have an entry in the src | ||
2884 | * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst | ||
2885 | * array entry with MIGRATE_PFN_VALID flag set. | ||
2886 | * | ||
2887 | * It then calls the finalize_and_map() callback. See comments for "struct | ||
2888 | * migrate_vma_ops", in include/linux/migrate.h for details about | ||
2889 | * finalize_and_map() behavior. | ||
2890 | * | ||
2891 | * After the finalize_and_map() callback, for successfully migrated pages, this | ||
2892 | * function updates the CPU page table to point to new pages, otherwise it | ||
2893 | * restores the CPU page table to point to the original source pages. | ||
2894 | * | ||
2895 | * Function returns 0 after the above steps, even if no pages were migrated | ||
2896 | * (The function only returns an error if any of the arguments are invalid.) | ||
2897 | * | ||
2898 | * Both src and dst array must be big enough for (end - start) >> PAGE_SHIFT | ||
2899 | * unsigned long entries. | ||
2900 | */ | ||
2901 | int migrate_vma(const struct migrate_vma_ops *ops, | ||
2902 | struct vm_area_struct *vma, | ||
2903 | unsigned long start, | ||
2904 | unsigned long end, | ||
2905 | unsigned long *src, | ||
2906 | unsigned long *dst, | ||
2907 | void *private) | ||
2908 | { | ||
2909 | struct migrate_vma migrate; | ||
2910 | |||
2911 | /* Sanity check the arguments */ | ||
2912 | start &= PAGE_MASK; | ||
2913 | end &= PAGE_MASK; | ||
2914 | if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) || | ||
2915 | vma_is_dax(vma)) | ||
2916 | return -EINVAL; | ||
2917 | if (start < vma->vm_start || start >= vma->vm_end) | ||
2918 | return -EINVAL; | ||
2919 | if (end <= vma->vm_start || end > vma->vm_end) | ||
2920 | return -EINVAL; | ||
2921 | if (!ops || !src || !dst || start >= end) | ||
2922 | return -EINVAL; | ||
2923 | |||
2924 | memset(src, 0, sizeof(*src) * ((end - start) >> PAGE_SHIFT)); | ||
2925 | migrate.src = src; | ||
2926 | migrate.dst = dst; | ||
2927 | migrate.start = start; | ||
2928 | migrate.npages = 0; | ||
2929 | migrate.cpages = 0; | ||
2930 | migrate.end = end; | ||
2931 | migrate.vma = vma; | ||
2932 | |||
2933 | /* Collect, and try to unmap source pages */ | ||
2934 | migrate_vma_collect(&migrate); | ||
2935 | if (!migrate.cpages) | ||
2936 | return 0; | ||
2937 | |||
2938 | /* Lock and isolate page */ | ||
2939 | migrate_vma_prepare(&migrate); | ||
2940 | if (!migrate.cpages) | ||
2941 | return 0; | ||
2942 | |||
2943 | /* Unmap pages */ | ||
2944 | migrate_vma_unmap(&migrate); | ||
2945 | if (!migrate.cpages) | ||
2946 | return 0; | ||
2947 | |||
2948 | /* | ||
2949 | * At this point pages are locked and unmapped, and thus they have | ||
2950 | * stable content and can safely be copied to destination memory that | ||
2951 | * is allocated by the callback. | ||
2952 | * | ||
2953 | * Note that migration can fail in migrate_vma_struct_page() for each | ||
2954 | * individual page. | ||
2955 | */ | ||
2956 | ops->alloc_and_copy(vma, src, dst, start, end, private); | ||
2957 | |||
2958 | /* This does the real migration of struct page */ | ||
2959 | migrate_vma_pages(&migrate); | ||
2960 | |||
2961 | ops->finalize_and_map(vma, src, dst, start, end, private); | ||
2962 | |||
2963 | /* Unlock and remap pages */ | ||
2964 | migrate_vma_finalize(&migrate); | ||
2965 | |||
2966 | return 0; | ||
2967 | } | ||
2968 | EXPORT_SYMBOL(migrate_vma); | ||
2969 | #endif /* defined(MIGRATE_VMA_HELPER) */ | 2945 | #endif /* defined(MIGRATE_VMA_HELPER) */ |