diff options
author | Jérôme Glisse <jglisse@redhat.com> | 2017-09-08 19:12:24 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-08 21:26:46 -0400 |
commit | df6ad69838fc9dcdbee0dcf2fc2c6f1113f8d609 (patch) | |
tree | d5774eba9a9c2204123b8ca36d9cba90bfa9ad64 /mm/migrate.c | |
parent | 8315ada7f095bfa2cae0cd1e915b95bf6226897d (diff) |
mm/device-public-memory: device memory cache coherent with CPU
Platform with advance system bus (like CAPI or CCIX) allow device memory
to be accessible from CPU in a cache coherent fashion. Add a new type of
ZONE_DEVICE to represent such memory. The use case are the same as for
the un-addressable device memory but without all the corners cases.
Link: http://lkml.kernel.org/r/20170817000548.32038-19-jglisse@redhat.com
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: David Nellans <dnellans@nvidia.com>
Cc: Evgeny Baskakov <ebaskakov@nvidia.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Mark Hairgrove <mhairgrove@nvidia.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Sherry Cheung <SCheung@nvidia.com>
Cc: Subhash Gutti <sgutti@nvidia.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Bob Liu <liubo95@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 57 |
1 files changed, 38 insertions, 19 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index e581253ef330..618aeb5e9cde 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/hugetlb.h> | 36 | #include <linux/hugetlb.h> |
37 | #include <linux/hugetlb_cgroup.h> | 37 | #include <linux/hugetlb_cgroup.h> |
38 | #include <linux/gfp.h> | 38 | #include <linux/gfp.h> |
39 | #include <linux/pfn_t.h> | ||
39 | #include <linux/memremap.h> | 40 | #include <linux/memremap.h> |
40 | #include <linux/userfaultfd_k.h> | 41 | #include <linux/userfaultfd_k.h> |
41 | #include <linux/balloon_compaction.h> | 42 | #include <linux/balloon_compaction.h> |
@@ -239,10 +240,14 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, | |||
239 | if (is_write_migration_entry(entry)) | 240 | if (is_write_migration_entry(entry)) |
240 | pte = maybe_mkwrite(pte, vma); | 241 | pte = maybe_mkwrite(pte, vma); |
241 | 242 | ||
242 | if (unlikely(is_zone_device_page(new)) && | 243 | if (unlikely(is_zone_device_page(new))) { |
243 | is_device_private_page(new)) { | 244 | if (is_device_private_page(new)) { |
244 | entry = make_device_private_entry(new, pte_write(pte)); | 245 | entry = make_device_private_entry(new, pte_write(pte)); |
245 | pte = swp_entry_to_pte(entry); | 246 | pte = swp_entry_to_pte(entry); |
247 | } else if (is_device_public_page(new)) { | ||
248 | pte = pte_mkdevmap(pte); | ||
249 | flush_dcache_page(new); | ||
250 | } | ||
246 | } else | 251 | } else |
247 | flush_dcache_page(new); | 252 | flush_dcache_page(new); |
248 | 253 | ||
@@ -437,12 +442,11 @@ int migrate_page_move_mapping(struct address_space *mapping, | |||
437 | void **pslot; | 442 | void **pslot; |
438 | 443 | ||
439 | /* | 444 | /* |
440 | * ZONE_DEVICE pages have 1 refcount always held by their device | 445 | * Device public or private pages have an extra refcount as they are |
441 | * | 446 | * ZONE_DEVICE pages. |
442 | * Note that DAX memory will never reach that point as it does not have | ||
443 | * the MEMORY_DEVICE_ALLOW_MIGRATE flag set (see memory_hotplug.h). | ||
444 | */ | 447 | */ |
445 | expected_count += is_zone_device_page(page); | 448 | expected_count += is_device_private_page(page); |
449 | expected_count += is_device_public_page(page); | ||
446 | 450 | ||
447 | if (!mapping) { | 451 | if (!mapping) { |
448 | /* Anonymous page without mapping */ | 452 | /* Anonymous page without mapping */ |
@@ -2123,7 +2127,6 @@ out_unlock: | |||
2123 | 2127 | ||
2124 | #endif /* CONFIG_NUMA */ | 2128 | #endif /* CONFIG_NUMA */ |
2125 | 2129 | ||
2126 | |||
2127 | struct migrate_vma { | 2130 | struct migrate_vma { |
2128 | struct vm_area_struct *vma; | 2131 | struct vm_area_struct *vma; |
2129 | unsigned long *dst; | 2132 | unsigned long *dst; |
@@ -2263,7 +2266,7 @@ again: | |||
2263 | pfn = 0; | 2266 | pfn = 0; |
2264 | goto next; | 2267 | goto next; |
2265 | } | 2268 | } |
2266 | page = vm_normal_page(migrate->vma, addr, pte); | 2269 | page = _vm_normal_page(migrate->vma, addr, pte, true); |
2267 | mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; | 2270 | mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; |
2268 | mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0; | 2271 | mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0; |
2269 | } | 2272 | } |
@@ -2406,10 +2409,19 @@ static bool migrate_vma_check_page(struct page *page) | |||
2406 | if (is_device_private_page(page)) | 2409 | if (is_device_private_page(page)) |
2407 | return true; | 2410 | return true; |
2408 | 2411 | ||
2409 | /* Other ZONE_DEVICE memory type are not supported */ | 2412 | /* |
2410 | return false; | 2413 | * Only allow device public page to be migrated and account for |
2414 | * the extra reference count imply by ZONE_DEVICE pages. | ||
2415 | */ | ||
2416 | if (!is_device_public_page(page)) | ||
2417 | return false; | ||
2418 | extra++; | ||
2411 | } | 2419 | } |
2412 | 2420 | ||
2421 | /* For file back page */ | ||
2422 | if (page_mapping(page)) | ||
2423 | extra += 1 + page_has_private(page); | ||
2424 | |||
2413 | if ((page_count(page) - extra) > page_mapcount(page)) | 2425 | if ((page_count(page) - extra) > page_mapcount(page)) |
2414 | return false; | 2426 | return false; |
2415 | 2427 | ||
@@ -2647,11 +2659,18 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, | |||
2647 | */ | 2659 | */ |
2648 | __SetPageUptodate(page); | 2660 | __SetPageUptodate(page); |
2649 | 2661 | ||
2650 | if (is_zone_device_page(page) && is_device_private_page(page)) { | 2662 | if (is_zone_device_page(page)) { |
2651 | swp_entry_t swp_entry; | 2663 | if (is_device_private_page(page)) { |
2652 | 2664 | swp_entry_t swp_entry; | |
2653 | swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE); | 2665 | |
2654 | entry = swp_entry_to_pte(swp_entry); | 2666 | swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE); |
2667 | entry = swp_entry_to_pte(swp_entry); | ||
2668 | } else if (is_device_public_page(page)) { | ||
2669 | entry = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot))); | ||
2670 | if (vma->vm_flags & VM_WRITE) | ||
2671 | entry = pte_mkwrite(pte_mkdirty(entry)); | ||
2672 | entry = pte_mkdevmap(entry); | ||
2673 | } | ||
2655 | } else { | 2674 | } else { |
2656 | entry = mk_pte(page, vma->vm_page_prot); | 2675 | entry = mk_pte(page, vma->vm_page_prot); |
2657 | if (vma->vm_flags & VM_WRITE) | 2676 | if (vma->vm_flags & VM_WRITE) |
@@ -2768,7 +2787,7 @@ static void migrate_vma_pages(struct migrate_vma *migrate) | |||
2768 | migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; | 2787 | migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; |
2769 | continue; | 2788 | continue; |
2770 | } | 2789 | } |
2771 | } else { | 2790 | } else if (!is_device_public_page(newpage)) { |
2772 | /* | 2791 | /* |
2773 | * Other types of ZONE_DEVICE page are not | 2792 | * Other types of ZONE_DEVICE page are not |
2774 | * supported. | 2793 | * supported. |