summaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
authorJérôme Glisse <jglisse@redhat.com>2017-09-08 19:12:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-08 21:26:46 -0400
commitdf6ad69838fc9dcdbee0dcf2fc2c6f1113f8d609 (patch)
treed5774eba9a9c2204123b8ca36d9cba90bfa9ad64 /mm/memory.c
parent8315ada7f095bfa2cae0cd1e915b95bf6226897d (diff)
mm/device-public-memory: device memory cache coherent with CPU
Platform with advance system bus (like CAPI or CCIX) allow device memory to be accessible from CPU in a cache coherent fashion. Add a new type of ZONE_DEVICE to represent such memory. The use case are the same as for the un-addressable device memory but without all the corners cases. Link: http://lkml.kernel.org/r/20170817000548.32038-19-jglisse@redhat.com Signed-off-by: Jérôme Glisse <jglisse@redhat.com> Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Balbir Singh <bsingharora@gmail.com> Cc: David Nellans <dnellans@nvidia.com> Cc: Evgeny Baskakov <ebaskakov@nvidia.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Mark Hairgrove <mhairgrove@nvidia.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Sherry Cheung <SCheung@nvidia.com> Cc: Subhash Gutti <sgutti@nvidia.com> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Bob Liu <liubo95@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c46
1 files changed, 41 insertions, 5 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 079eeac0b009..ad0ea1af1f44 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -818,8 +818,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
818#else 818#else
819# define HAVE_PTE_SPECIAL 0 819# define HAVE_PTE_SPECIAL 0
820#endif 820#endif
821struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, 821struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
822 pte_t pte) 822 pte_t pte, bool with_public_device)
823{ 823{
824 unsigned long pfn = pte_pfn(pte); 824 unsigned long pfn = pte_pfn(pte);
825 825
@@ -830,8 +830,31 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
830 return vma->vm_ops->find_special_page(vma, addr); 830 return vma->vm_ops->find_special_page(vma, addr);
831 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) 831 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
832 return NULL; 832 return NULL;
833 if (!is_zero_pfn(pfn)) 833 if (is_zero_pfn(pfn))
834 print_bad_pte(vma, addr, pte, NULL); 834 return NULL;
835
836 /*
837 * Device public pages are special pages (they are ZONE_DEVICE
838 * pages but different from persistent memory). They behave
839 * allmost like normal pages. The difference is that they are
840 * not on the lru and thus should never be involve with any-
841 * thing that involve lru manipulation (mlock, numa balancing,
842 * ...).
843 *
844 * This is why we still want to return NULL for such page from
845 * vm_normal_page() so that we do not have to special case all
846 * call site of vm_normal_page().
847 */
848 if (likely(pfn < highest_memmap_pfn)) {
849 struct page *page = pfn_to_page(pfn);
850
851 if (is_device_public_page(page)) {
852 if (with_public_device)
853 return page;
854 return NULL;
855 }
856 }
857 print_bad_pte(vma, addr, pte, NULL);
835 return NULL; 858 return NULL;
836 } 859 }
837 860
@@ -1012,6 +1035,19 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1012 get_page(page); 1035 get_page(page);
1013 page_dup_rmap(page, false); 1036 page_dup_rmap(page, false);
1014 rss[mm_counter(page)]++; 1037 rss[mm_counter(page)]++;
1038 } else if (pte_devmap(pte)) {
1039 page = pte_page(pte);
1040
1041 /*
1042 * Cache coherent device memory behave like regular page and
1043 * not like persistent memory page. For more informations see
1044 * MEMORY_DEVICE_CACHE_COHERENT in memory_hotplug.h
1045 */
1046 if (is_device_public_page(page)) {
1047 get_page(page);
1048 page_dup_rmap(page, false);
1049 rss[mm_counter(page)]++;
1050 }
1015 } 1051 }
1016 1052
1017out_set_pte: 1053out_set_pte:
@@ -1267,7 +1303,7 @@ again:
1267 if (pte_present(ptent)) { 1303 if (pte_present(ptent)) {
1268 struct page *page; 1304 struct page *page;
1269 1305
1270 page = vm_normal_page(vma, addr, ptent); 1306 page = _vm_normal_page(vma, addr, ptent, true);
1271 if (unlikely(details) && page) { 1307 if (unlikely(details) && page) {
1272 /* 1308 /*
1273 * unmap_shared_mapping_pages() wants to 1309 * unmap_shared_mapping_pages() wants to