summaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c492
1 files changed, 492 insertions, 0 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 71de36cfb673..991e8886093f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -428,6 +428,14 @@ int migrate_page_move_mapping(struct address_space *mapping,
428 int expected_count = 1 + extra_count; 428 int expected_count = 1 + extra_count;
429 void **pslot; 429 void **pslot;
430 430
431 /*
432 * ZONE_DEVICE pages have 1 refcount always held by their device
433 *
434 * Note that DAX memory will never reach that point as it does not have
435 * the MEMORY_DEVICE_ALLOW_MIGRATE flag set (see memory_hotplug.h).
436 */
437 expected_count += is_zone_device_page(page);
438
431 if (!mapping) { 439 if (!mapping) {
432 /* Anonymous page without mapping */ 440 /* Anonymous page without mapping */
433 if (page_count(page) != expected_count) 441 if (page_count(page) != expected_count)
@@ -2106,3 +2114,487 @@ out_unlock:
2106#endif /* CONFIG_NUMA_BALANCING */ 2114#endif /* CONFIG_NUMA_BALANCING */
2107 2115
2108#endif /* CONFIG_NUMA */ 2116#endif /* CONFIG_NUMA */
2117
2118
2119struct migrate_vma {
2120 struct vm_area_struct *vma;
2121 unsigned long *dst;
2122 unsigned long *src;
2123 unsigned long cpages;
2124 unsigned long npages;
2125 unsigned long start;
2126 unsigned long end;
2127};
2128
2129static int migrate_vma_collect_hole(unsigned long start,
2130 unsigned long end,
2131 struct mm_walk *walk)
2132{
2133 struct migrate_vma *migrate = walk->private;
2134 unsigned long addr;
2135
2136 for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
2137 migrate->dst[migrate->npages] = 0;
2138 migrate->src[migrate->npages++] = 0;
2139 }
2140
2141 return 0;
2142}
2143
2144static int migrate_vma_collect_pmd(pmd_t *pmdp,
2145 unsigned long start,
2146 unsigned long end,
2147 struct mm_walk *walk)
2148{
2149 struct migrate_vma *migrate = walk->private;
2150 struct vm_area_struct *vma = walk->vma;
2151 struct mm_struct *mm = vma->vm_mm;
2152 unsigned long addr = start;
2153 spinlock_t *ptl;
2154 pte_t *ptep;
2155
2156again:
2157 if (pmd_none(*pmdp))
2158 return migrate_vma_collect_hole(start, end, walk);
2159
2160 if (pmd_trans_huge(*pmdp)) {
2161 struct page *page;
2162
2163 ptl = pmd_lock(mm, pmdp);
2164 if (unlikely(!pmd_trans_huge(*pmdp))) {
2165 spin_unlock(ptl);
2166 goto again;
2167 }
2168
2169 page = pmd_page(*pmdp);
2170 if (is_huge_zero_page(page)) {
2171 spin_unlock(ptl);
2172 split_huge_pmd(vma, pmdp, addr);
2173 if (pmd_trans_unstable(pmdp))
2174 return migrate_vma_collect_hole(start, end,
2175 walk);
2176 } else {
2177 int ret;
2178
2179 get_page(page);
2180 spin_unlock(ptl);
2181 if (unlikely(!trylock_page(page)))
2182 return migrate_vma_collect_hole(start, end,
2183 walk);
2184 ret = split_huge_page(page);
2185 unlock_page(page);
2186 put_page(page);
2187 if (ret || pmd_none(*pmdp))
2188 return migrate_vma_collect_hole(start, end,
2189 walk);
2190 }
2191 }
2192
2193 if (unlikely(pmd_bad(*pmdp)))
2194 return migrate_vma_collect_hole(start, end, walk);
2195
2196 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
2197 for (; addr < end; addr += PAGE_SIZE, ptep++) {
2198 unsigned long mpfn, pfn;
2199 struct page *page;
2200 pte_t pte;
2201
2202 pte = *ptep;
2203 pfn = pte_pfn(pte);
2204
2205 if (!pte_present(pte)) {
2206 mpfn = pfn = 0;
2207 goto next;
2208 }
2209
2210 /* FIXME support THP */
2211 page = vm_normal_page(migrate->vma, addr, pte);
2212 if (!page || !page->mapping || PageTransCompound(page)) {
2213 mpfn = pfn = 0;
2214 goto next;
2215 }
2216
2217 /*
2218 * By getting a reference on the page we pin it and that blocks
2219 * any kind of migration. Side effect is that it "freezes" the
2220 * pte.
2221 *
2222 * We drop this reference after isolating the page from the lru
2223 * for non device page (device page are not on the lru and thus
2224 * can't be dropped from it).
2225 */
2226 get_page(page);
2227 migrate->cpages++;
2228 mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
2229 mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
2230
2231next:
2232 migrate->src[migrate->npages++] = mpfn;
2233 }
2234 pte_unmap_unlock(ptep - 1, ptl);
2235
2236 return 0;
2237}
2238
2239/*
2240 * migrate_vma_collect() - collect pages over a range of virtual addresses
2241 * @migrate: migrate struct containing all migration information
2242 *
2243 * This will walk the CPU page table. For each virtual address backed by a
2244 * valid page, it updates the src array and takes a reference on the page, in
2245 * order to pin the page until we lock it and unmap it.
2246 */
2247static void migrate_vma_collect(struct migrate_vma *migrate)
2248{
2249 struct mm_walk mm_walk;
2250
2251 mm_walk.pmd_entry = migrate_vma_collect_pmd;
2252 mm_walk.pte_entry = NULL;
2253 mm_walk.pte_hole = migrate_vma_collect_hole;
2254 mm_walk.hugetlb_entry = NULL;
2255 mm_walk.test_walk = NULL;
2256 mm_walk.vma = migrate->vma;
2257 mm_walk.mm = migrate->vma->vm_mm;
2258 mm_walk.private = migrate;
2259
2260 walk_page_range(migrate->start, migrate->end, &mm_walk);
2261
2262 migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
2263}
2264
2265/*
2266 * migrate_vma_check_page() - check if page is pinned or not
2267 * @page: struct page to check
2268 *
2269 * Pinned pages cannot be migrated. This is the same test as in
2270 * migrate_page_move_mapping(), except that here we allow migration of a
2271 * ZONE_DEVICE page.
2272 */
2273static bool migrate_vma_check_page(struct page *page)
2274{
2275 /*
2276 * One extra ref because caller holds an extra reference, either from
2277 * isolate_lru_page() for a regular page, or migrate_vma_collect() for
2278 * a device page.
2279 */
2280 int extra = 1;
2281
2282 /*
2283 * FIXME support THP (transparent huge page), it is bit more complex to
2284 * check them than regular pages, because they can be mapped with a pmd
2285 * or with a pte (split pte mapping).
2286 */
2287 if (PageCompound(page))
2288 return false;
2289
2290 if ((page_count(page) - extra) > page_mapcount(page))
2291 return false;
2292
2293 return true;
2294}
2295
2296/*
2297 * migrate_vma_prepare() - lock pages and isolate them from the lru
2298 * @migrate: migrate struct containing all migration information
2299 *
2300 * This locks pages that have been collected by migrate_vma_collect(). Once each
2301 * page is locked it is isolated from the lru (for non-device pages). Finally,
2302 * the ref taken by migrate_vma_collect() is dropped, as locked pages cannot be
2303 * migrated by concurrent kernel threads.
2304 */
2305static void migrate_vma_prepare(struct migrate_vma *migrate)
2306{
2307 const unsigned long npages = migrate->npages;
2308 bool allow_drain = true;
2309 unsigned long i;
2310
2311 lru_add_drain();
2312
2313 for (i = 0; (i < npages) && migrate->cpages; i++) {
2314 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2315
2316 if (!page)
2317 continue;
2318
2319 /*
2320 * Because we are migrating several pages there can be
2321 * a deadlock between 2 concurrent migration where each
2322 * are waiting on each other page lock.
2323 *
2324 * Make migrate_vma() a best effort thing and backoff
2325 * for any page we can not lock right away.
2326 */
2327 if (!trylock_page(page)) {
2328 migrate->src[i] = 0;
2329 migrate->cpages--;
2330 put_page(page);
2331 continue;
2332 }
2333 migrate->src[i] |= MIGRATE_PFN_LOCKED;
2334
2335 if (!PageLRU(page) && allow_drain) {
2336 /* Drain CPU's pagevec */
2337 lru_add_drain_all();
2338 allow_drain = false;
2339 }
2340
2341 if (isolate_lru_page(page)) {
2342 migrate->src[i] = 0;
2343 unlock_page(page);
2344 migrate->cpages--;
2345 put_page(page);
2346 continue;
2347 }
2348
2349 if (!migrate_vma_check_page(page)) {
2350 migrate->src[i] = 0;
2351 unlock_page(page);
2352 migrate->cpages--;
2353
2354 putback_lru_page(page);
2355 }
2356 }
2357}
2358
2359/*
2360 * migrate_vma_unmap() - replace page mapping with special migration pte entry
2361 * @migrate: migrate struct containing all migration information
2362 *
2363 * Replace page mapping (CPU page table pte) with a special migration pte entry
2364 * and check again if it has been pinned. Pinned pages are restored because we
2365 * cannot migrate them.
2366 *
2367 * This is the last step before we call the device driver callback to allocate
2368 * destination memory and copy contents of original page over to new page.
2369 */
2370static void migrate_vma_unmap(struct migrate_vma *migrate)
2371{
2372 int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
2373 const unsigned long npages = migrate->npages;
2374 const unsigned long start = migrate->start;
2375 unsigned long addr, i, restore = 0;
2376
2377 for (i = 0; i < npages; i++) {
2378 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2379
2380 if (!page || !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
2381 continue;
2382
2383 try_to_unmap(page, flags);
2384 if (page_mapped(page) || !migrate_vma_check_page(page)) {
2385 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2386 migrate->cpages--;
2387 restore++;
2388 }
2389 }
2390
2391 for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) {
2392 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2393
2394 if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
2395 continue;
2396
2397 remove_migration_ptes(page, page, false);
2398
2399 migrate->src[i] = 0;
2400 unlock_page(page);
2401 restore--;
2402
2403 putback_lru_page(page);
2404 }
2405}
2406
2407/*
2408 * migrate_vma_pages() - migrate meta-data from src page to dst page
2409 * @migrate: migrate struct containing all migration information
2410 *
2411 * This migrates struct page meta-data from source struct page to destination
2412 * struct page. This effectively finishes the migration from source page to the
2413 * destination page.
2414 */
2415static void migrate_vma_pages(struct migrate_vma *migrate)
2416{
2417 const unsigned long npages = migrate->npages;
2418 const unsigned long start = migrate->start;
2419 unsigned long addr, i;
2420
2421 for (i = 0, addr = start; i < npages; addr += PAGE_SIZE, i++) {
2422 struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
2423 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2424 struct address_space *mapping;
2425 int r;
2426
2427 if (!page || !newpage)
2428 continue;
2429 if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE))
2430 continue;
2431
2432 mapping = page_mapping(page);
2433
2434 r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY);
2435 if (r != MIGRATEPAGE_SUCCESS)
2436 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2437 }
2438}
2439
2440/*
2441 * migrate_vma_finalize() - restore CPU page table entry
2442 * @migrate: migrate struct containing all migration information
2443 *
2444 * This replaces the special migration pte entry with either a mapping to the
2445 * new page if migration was successful for that page, or to the original page
2446 * otherwise.
2447 *
2448 * This also unlocks the pages and puts them back on the lru, or drops the extra
2449 * refcount, for device pages.
2450 */
2451static void migrate_vma_finalize(struct migrate_vma *migrate)
2452{
2453 const unsigned long npages = migrate->npages;
2454 unsigned long i;
2455
2456 for (i = 0; i < npages; i++) {
2457 struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
2458 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2459
2460 if (!page)
2461 continue;
2462 if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) {
2463 if (newpage) {
2464 unlock_page(newpage);
2465 put_page(newpage);
2466 }
2467 newpage = page;
2468 }
2469
2470 remove_migration_ptes(page, newpage, false);
2471 unlock_page(page);
2472 migrate->cpages--;
2473
2474 putback_lru_page(page);
2475
2476 if (newpage != page) {
2477 unlock_page(newpage);
2478 putback_lru_page(newpage);
2479 }
2480 }
2481}
2482
2483/*
2484 * migrate_vma() - migrate a range of memory inside vma
2485 *
2486 * @ops: migration callback for allocating destination memory and copying
2487 * @vma: virtual memory area containing the range to be migrated
2488 * @start: start address of the range to migrate (inclusive)
2489 * @end: end address of the range to migrate (exclusive)
2490 * @src: array of hmm_pfn_t containing source pfns
2491 * @dst: array of hmm_pfn_t containing destination pfns
2492 * @private: pointer passed back to each of the callback
2493 * Returns: 0 on success, error code otherwise
2494 *
2495 * This function tries to migrate a range of memory virtual address range, using
2496 * callbacks to allocate and copy memory from source to destination. First it
2497 * collects all the pages backing each virtual address in the range, saving this
2498 * inside the src array. Then it locks those pages and unmaps them. Once the pages
2499 * are locked and unmapped, it checks whether each page is pinned or not. Pages
2500 * that aren't pinned have the MIGRATE_PFN_MIGRATE flag set (by this function)
2501 * in the corresponding src array entry. It then restores any pages that are
2502 * pinned, by remapping and unlocking those pages.
2503 *
2504 * At this point it calls the alloc_and_copy() callback. For documentation on
2505 * what is expected from that callback, see struct migrate_vma_ops comments in
2506 * include/linux/migrate.h
2507 *
2508 * After the alloc_and_copy() callback, this function goes over each entry in
2509 * the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag
2510 * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set,
2511 * then the function tries to migrate struct page information from the source
2512 * struct page to the destination struct page. If it fails to migrate the struct
2513 * page information, then it clears the MIGRATE_PFN_MIGRATE flag in the src
2514 * array.
2515 *
2516 * At this point all successfully migrated pages have an entry in the src
2517 * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst
2518 * array entry with MIGRATE_PFN_VALID flag set.
2519 *
2520 * It then calls the finalize_and_map() callback. See comments for "struct
2521 * migrate_vma_ops", in include/linux/migrate.h for details about
2522 * finalize_and_map() behavior.
2523 *
2524 * After the finalize_and_map() callback, for successfully migrated pages, this
2525 * function updates the CPU page table to point to new pages, otherwise it
2526 * restores the CPU page table to point to the original source pages.
2527 *
2528 * Function returns 0 after the above steps, even if no pages were migrated
2529 * (The function only returns an error if any of the arguments are invalid.)
2530 *
2531 * Both src and dst array must be big enough for (end - start) >> PAGE_SHIFT
2532 * unsigned long entries.
2533 */
2534int migrate_vma(const struct migrate_vma_ops *ops,
2535 struct vm_area_struct *vma,
2536 unsigned long start,
2537 unsigned long end,
2538 unsigned long *src,
2539 unsigned long *dst,
2540 void *private)
2541{
2542 struct migrate_vma migrate;
2543
2544 /* Sanity check the arguments */
2545 start &= PAGE_MASK;
2546 end &= PAGE_MASK;
2547 if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL))
2548 return -EINVAL;
2549 if (start < vma->vm_start || start >= vma->vm_end)
2550 return -EINVAL;
2551 if (end <= vma->vm_start || end > vma->vm_end)
2552 return -EINVAL;
2553 if (!ops || !src || !dst || start >= end)
2554 return -EINVAL;
2555
2556 memset(src, 0, sizeof(*src) * ((end - start) >> PAGE_SHIFT));
2557 migrate.src = src;
2558 migrate.dst = dst;
2559 migrate.start = start;
2560 migrate.npages = 0;
2561 migrate.cpages = 0;
2562 migrate.end = end;
2563 migrate.vma = vma;
2564
2565 /* Collect, and try to unmap source pages */
2566 migrate_vma_collect(&migrate);
2567 if (!migrate.cpages)
2568 return 0;
2569
2570 /* Lock and isolate page */
2571 migrate_vma_prepare(&migrate);
2572 if (!migrate.cpages)
2573 return 0;
2574
2575 /* Unmap pages */
2576 migrate_vma_unmap(&migrate);
2577 if (!migrate.cpages)
2578 return 0;
2579
2580 /*
2581 * At this point pages are locked and unmapped, and thus they have
2582 * stable content and can safely be copied to destination memory that
2583 * is allocated by the callback.
2584 *
2585 * Note that migration can fail in migrate_vma_struct_page() for each
2586 * individual page.
2587 */
2588 ops->alloc_and_copy(vma, src, dst, start, end, private);
2589
2590 /* This does the real migration of struct page */
2591 migrate_vma_pages(&migrate);
2592
2593 ops->finalize_and_map(vma, src, dst, start, end, private);
2594
2595 /* Unlock and remap pages */
2596 migrate_vma_finalize(&migrate);
2597
2598 return 0;
2599}
2600EXPORT_SYMBOL(migrate_vma);