summaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
authorJérôme Glisse <jglisse@redhat.com>2017-09-08 19:12:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-08 21:26:46 -0400
commitdf6ad69838fc9dcdbee0dcf2fc2c6f1113f8d609 (patch)
treed5774eba9a9c2204123b8ca36d9cba90bfa9ad64 /mm/migrate.c
parent8315ada7f095bfa2cae0cd1e915b95bf6226897d (diff)
mm/device-public-memory: device memory cache coherent with CPU
Platform with advance system bus (like CAPI or CCIX) allow device memory to be accessible from CPU in a cache coherent fashion. Add a new type of ZONE_DEVICE to represent such memory. The use case are the same as for the un-addressable device memory but without all the corners cases. Link: http://lkml.kernel.org/r/20170817000548.32038-19-jglisse@redhat.com Signed-off-by: Jérôme Glisse <jglisse@redhat.com> Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Balbir Singh <bsingharora@gmail.com> Cc: David Nellans <dnellans@nvidia.com> Cc: Evgeny Baskakov <ebaskakov@nvidia.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Mark Hairgrove <mhairgrove@nvidia.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Sherry Cheung <SCheung@nvidia.com> Cc: Subhash Gutti <sgutti@nvidia.com> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Bob Liu <liubo95@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c57
1 files changed, 38 insertions, 19 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index e581253ef330..618aeb5e9cde 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -36,6 +36,7 @@
36#include <linux/hugetlb.h> 36#include <linux/hugetlb.h>
37#include <linux/hugetlb_cgroup.h> 37#include <linux/hugetlb_cgroup.h>
38#include <linux/gfp.h> 38#include <linux/gfp.h>
39#include <linux/pfn_t.h>
39#include <linux/memremap.h> 40#include <linux/memremap.h>
40#include <linux/userfaultfd_k.h> 41#include <linux/userfaultfd_k.h>
41#include <linux/balloon_compaction.h> 42#include <linux/balloon_compaction.h>
@@ -239,10 +240,14 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
239 if (is_write_migration_entry(entry)) 240 if (is_write_migration_entry(entry))
240 pte = maybe_mkwrite(pte, vma); 241 pte = maybe_mkwrite(pte, vma);
241 242
242 if (unlikely(is_zone_device_page(new)) && 243 if (unlikely(is_zone_device_page(new))) {
243 is_device_private_page(new)) { 244 if (is_device_private_page(new)) {
244 entry = make_device_private_entry(new, pte_write(pte)); 245 entry = make_device_private_entry(new, pte_write(pte));
245 pte = swp_entry_to_pte(entry); 246 pte = swp_entry_to_pte(entry);
247 } else if (is_device_public_page(new)) {
248 pte = pte_mkdevmap(pte);
249 flush_dcache_page(new);
250 }
246 } else 251 } else
247 flush_dcache_page(new); 252 flush_dcache_page(new);
248 253
@@ -437,12 +442,11 @@ int migrate_page_move_mapping(struct address_space *mapping,
437 void **pslot; 442 void **pslot;
438 443
439 /* 444 /*
440 * ZONE_DEVICE pages have 1 refcount always held by their device 445 * Device public or private pages have an extra refcount as they are
441 * 446 * ZONE_DEVICE pages.
442 * Note that DAX memory will never reach that point as it does not have
443 * the MEMORY_DEVICE_ALLOW_MIGRATE flag set (see memory_hotplug.h).
444 */ 447 */
445 expected_count += is_zone_device_page(page); 448 expected_count += is_device_private_page(page);
449 expected_count += is_device_public_page(page);
446 450
447 if (!mapping) { 451 if (!mapping) {
448 /* Anonymous page without mapping */ 452 /* Anonymous page without mapping */
@@ -2123,7 +2127,6 @@ out_unlock:
2123 2127
2124#endif /* CONFIG_NUMA */ 2128#endif /* CONFIG_NUMA */
2125 2129
2126
2127struct migrate_vma { 2130struct migrate_vma {
2128 struct vm_area_struct *vma; 2131 struct vm_area_struct *vma;
2129 unsigned long *dst; 2132 unsigned long *dst;
@@ -2263,7 +2266,7 @@ again:
2263 pfn = 0; 2266 pfn = 0;
2264 goto next; 2267 goto next;
2265 } 2268 }
2266 page = vm_normal_page(migrate->vma, addr, pte); 2269 page = _vm_normal_page(migrate->vma, addr, pte, true);
2267 mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; 2270 mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
2268 mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0; 2271 mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
2269 } 2272 }
@@ -2406,10 +2409,19 @@ static bool migrate_vma_check_page(struct page *page)
2406 if (is_device_private_page(page)) 2409 if (is_device_private_page(page))
2407 return true; 2410 return true;
2408 2411
2409 /* Other ZONE_DEVICE memory type are not supported */ 2412 /*
2410 return false; 2413 * Only allow device public page to be migrated and account for
2414 * the extra reference count imply by ZONE_DEVICE pages.
2415 */
2416 if (!is_device_public_page(page))
2417 return false;
2418 extra++;
2411 } 2419 }
2412 2420
2421 /* For file back page */
2422 if (page_mapping(page))
2423 extra += 1 + page_has_private(page);
2424
2413 if ((page_count(page) - extra) > page_mapcount(page)) 2425 if ((page_count(page) - extra) > page_mapcount(page))
2414 return false; 2426 return false;
2415 2427
@@ -2647,11 +2659,18 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
2647 */ 2659 */
2648 __SetPageUptodate(page); 2660 __SetPageUptodate(page);
2649 2661
2650 if (is_zone_device_page(page) && is_device_private_page(page)) { 2662 if (is_zone_device_page(page)) {
2651 swp_entry_t swp_entry; 2663 if (is_device_private_page(page)) {
2652 2664 swp_entry_t swp_entry;
2653 swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE); 2665
2654 entry = swp_entry_to_pte(swp_entry); 2666 swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE);
2667 entry = swp_entry_to_pte(swp_entry);
2668 } else if (is_device_public_page(page)) {
2669 entry = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot)));
2670 if (vma->vm_flags & VM_WRITE)
2671 entry = pte_mkwrite(pte_mkdirty(entry));
2672 entry = pte_mkdevmap(entry);
2673 }
2655 } else { 2674 } else {
2656 entry = mk_pte(page, vma->vm_page_prot); 2675 entry = mk_pte(page, vma->vm_page_prot);
2657 if (vma->vm_flags & VM_WRITE) 2676 if (vma->vm_flags & VM_WRITE)
@@ -2768,7 +2787,7 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
2768 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; 2787 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2769 continue; 2788 continue;
2770 } 2789 }
2771 } else { 2790 } else if (!is_device_public_page(newpage)) {
2772 /* 2791 /*
2773 * Other types of ZONE_DEVICE page are not 2792 * Other types of ZONE_DEVICE page are not
2774 * supported. 2793 * supported.