diff options
author | Jérôme Glisse <jglisse@redhat.com> | 2017-09-08 19:12:24 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-08 21:26:46 -0400 |
commit | df6ad69838fc9dcdbee0dcf2fc2c6f1113f8d609 (patch) | |
tree | d5774eba9a9c2204123b8ca36d9cba90bfa9ad64 /mm/memory.c | |
parent | 8315ada7f095bfa2cae0cd1e915b95bf6226897d (diff) |
mm/device-public-memory: device memory cache coherent with CPU
Platform with advance system bus (like CAPI or CCIX) allow device memory
to be accessible from CPU in a cache coherent fashion. Add a new type of
ZONE_DEVICE to represent such memory. The use case are the same as for
the un-addressable device memory but without all the corners cases.
Link: http://lkml.kernel.org/r/20170817000548.32038-19-jglisse@redhat.com
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: David Nellans <dnellans@nvidia.com>
Cc: Evgeny Baskakov <ebaskakov@nvidia.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Mark Hairgrove <mhairgrove@nvidia.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Sherry Cheung <SCheung@nvidia.com>
Cc: Subhash Gutti <sgutti@nvidia.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Bob Liu <liubo95@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 46 |
1 files changed, 41 insertions, 5 deletions
diff --git a/mm/memory.c b/mm/memory.c index 079eeac0b009..ad0ea1af1f44 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -818,8 +818,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, | |||
818 | #else | 818 | #else |
819 | # define HAVE_PTE_SPECIAL 0 | 819 | # define HAVE_PTE_SPECIAL 0 |
820 | #endif | 820 | #endif |
821 | struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, | 821 | struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, |
822 | pte_t pte) | 822 | pte_t pte, bool with_public_device) |
823 | { | 823 | { |
824 | unsigned long pfn = pte_pfn(pte); | 824 | unsigned long pfn = pte_pfn(pte); |
825 | 825 | ||
@@ -830,8 +830,31 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, | |||
830 | return vma->vm_ops->find_special_page(vma, addr); | 830 | return vma->vm_ops->find_special_page(vma, addr); |
831 | if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) | 831 | if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) |
832 | return NULL; | 832 | return NULL; |
833 | if (!is_zero_pfn(pfn)) | 833 | if (is_zero_pfn(pfn)) |
834 | print_bad_pte(vma, addr, pte, NULL); | 834 | return NULL; |
835 | |||
836 | /* | ||
837 | * Device public pages are special pages (they are ZONE_DEVICE | ||
838 | * pages but different from persistent memory). They behave | ||
839 | * allmost like normal pages. The difference is that they are | ||
840 | * not on the lru and thus should never be involve with any- | ||
841 | * thing that involve lru manipulation (mlock, numa balancing, | ||
842 | * ...). | ||
843 | * | ||
844 | * This is why we still want to return NULL for such page from | ||
845 | * vm_normal_page() so that we do not have to special case all | ||
846 | * call site of vm_normal_page(). | ||
847 | */ | ||
848 | if (likely(pfn < highest_memmap_pfn)) { | ||
849 | struct page *page = pfn_to_page(pfn); | ||
850 | |||
851 | if (is_device_public_page(page)) { | ||
852 | if (with_public_device) | ||
853 | return page; | ||
854 | return NULL; | ||
855 | } | ||
856 | } | ||
857 | print_bad_pte(vma, addr, pte, NULL); | ||
835 | return NULL; | 858 | return NULL; |
836 | } | 859 | } |
837 | 860 | ||
@@ -1012,6 +1035,19 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
1012 | get_page(page); | 1035 | get_page(page); |
1013 | page_dup_rmap(page, false); | 1036 | page_dup_rmap(page, false); |
1014 | rss[mm_counter(page)]++; | 1037 | rss[mm_counter(page)]++; |
1038 | } else if (pte_devmap(pte)) { | ||
1039 | page = pte_page(pte); | ||
1040 | |||
1041 | /* | ||
1042 | * Cache coherent device memory behave like regular page and | ||
1043 | * not like persistent memory page. For more informations see | ||
1044 | * MEMORY_DEVICE_CACHE_COHERENT in memory_hotplug.h | ||
1045 | */ | ||
1046 | if (is_device_public_page(page)) { | ||
1047 | get_page(page); | ||
1048 | page_dup_rmap(page, false); | ||
1049 | rss[mm_counter(page)]++; | ||
1050 | } | ||
1015 | } | 1051 | } |
1016 | 1052 | ||
1017 | out_set_pte: | 1053 | out_set_pte: |
@@ -1267,7 +1303,7 @@ again: | |||
1267 | if (pte_present(ptent)) { | 1303 | if (pte_present(ptent)) { |
1268 | struct page *page; | 1304 | struct page *page; |
1269 | 1305 | ||
1270 | page = vm_normal_page(vma, addr, ptent); | 1306 | page = _vm_normal_page(vma, addr, ptent, true); |
1271 | if (unlikely(details) && page) { | 1307 | if (unlikely(details) && page) { |
1272 | /* | 1308 | /* |
1273 | * unmap_shared_mapping_pages() wants to | 1309 | * unmap_shared_mapping_pages() wants to |