summaryrefslogtreecommitdiffstats
path: root/include/linux/swapops.h
diff options
context:
space:
mode:
authorJérôme Glisse <jglisse@redhat.com>2017-09-08 19:11:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-08 21:26:46 -0400
commit5042db43cc26f51eed51c56192e2c2317e44315f (patch)
treed4115135f5aad384fb173537a2c397170a86f29a /include/linux/swapops.h
parent3072e413e305e353cd4654f8a57d953b66e85bf3 (diff)
mm/ZONE_DEVICE: new type of ZONE_DEVICE for unaddressable memory
HMM (heterogeneous memory management) need struct page to support migration from system main memory to device memory. Reasons for HMM and migration to device memory is explained with HMM core patch. This patch deals with device memory that is un-addressable memory (ie CPU can not access it). Hence we do not want those struct page to be manage like regular memory. That is why we extend ZONE_DEVICE to support different types of memory. A persistent memory type is define for existing user of ZONE_DEVICE and a new device un-addressable type is added for the un-addressable memory type. There is a clear separation between what is expected from each memory type and existing user of ZONE_DEVICE are un-affected by new requirement and new use of the un-addressable type. All specific code path are protect with test against the memory type. Because memory is un-addressable we use a new special swap type for when a page is migrated to device memory (this reduces the number of maximum swap file). The main two additions beside memory type to ZONE_DEVICE is two callbacks. First one, page_free() is call whenever page refcount reach 1 (which means the page is free as ZONE_DEVICE page never reach a refcount of 0). This allow device driver to manage its memory and associated struct page. The second callback page_fault() happens when there is a CPU access to an address that is back by a device page (which are un-addressable by the CPU). This callback is responsible to migrate the page back to system main memory. Device driver can not block migration back to system memory, HMM make sure that such page can not be pin into device memory. If device is in some error condition and can not migrate memory back then a CPU page fault to device memory should end with SIGBUS. [arnd@arndb.de: fix warning] Link: http://lkml.kernel.org/r/20170823133213.712917-1-arnd@arndb.de Link: http://lkml.kernel.org/r/20170817000548.32038-8-jglisse@redhat.com Signed-off-by: Jérôme Glisse <jglisse@redhat.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Dan Williams <dan.j.williams@intel.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Nellans <dnellans@nvidia.com> Cc: Evgeny Baskakov <ebaskakov@nvidia.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Mark Hairgrove <mhairgrove@nvidia.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Sherry Cheung <SCheung@nvidia.com> Cc: Subhash Gutti <sgutti@nvidia.com> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Bob Liu <liubo95@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux/swapops.h')
-rw-r--r--include/linux/swapops.h68
1 files changed, 68 insertions, 0 deletions
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 45b092aa6419..291c4b534658 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -100,6 +100,74 @@ static inline void *swp_to_radix_entry(swp_entry_t entry)
100 return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY); 100 return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY);
101} 101}
102 102
103#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
104static inline swp_entry_t make_device_private_entry(struct page *page, bool write)
105{
106 return swp_entry(write ? SWP_DEVICE_WRITE : SWP_DEVICE_READ,
107 page_to_pfn(page));
108}
109
110static inline bool is_device_private_entry(swp_entry_t entry)
111{
112 int type = swp_type(entry);
113 return type == SWP_DEVICE_READ || type == SWP_DEVICE_WRITE;
114}
115
116static inline void make_device_private_entry_read(swp_entry_t *entry)
117{
118 *entry = swp_entry(SWP_DEVICE_READ, swp_offset(*entry));
119}
120
121static inline bool is_write_device_private_entry(swp_entry_t entry)
122{
123 return unlikely(swp_type(entry) == SWP_DEVICE_WRITE);
124}
125
126static inline struct page *device_private_entry_to_page(swp_entry_t entry)
127{
128 return pfn_to_page(swp_offset(entry));
129}
130
131int device_private_entry_fault(struct vm_area_struct *vma,
132 unsigned long addr,
133 swp_entry_t entry,
134 unsigned int flags,
135 pmd_t *pmdp);
136#else /* CONFIG_DEVICE_PRIVATE */
137static inline swp_entry_t make_device_private_entry(struct page *page, bool write)
138{
139 return swp_entry(0, 0);
140}
141
142static inline void make_device_private_entry_read(swp_entry_t *entry)
143{
144}
145
146static inline bool is_device_private_entry(swp_entry_t entry)
147{
148 return false;
149}
150
151static inline bool is_write_device_private_entry(swp_entry_t entry)
152{
153 return false;
154}
155
156static inline struct page *device_private_entry_to_page(swp_entry_t entry)
157{
158 return NULL;
159}
160
161static inline int device_private_entry_fault(struct vm_area_struct *vma,
162 unsigned long addr,
163 swp_entry_t entry,
164 unsigned int flags,
165 pmd_t *pmdp)
166{
167 return VM_FAULT_SIGBUS;
168}
169#endif /* CONFIG_DEVICE_PRIVATE */
170
103#ifdef CONFIG_MIGRATION 171#ifdef CONFIG_MIGRATION
104static inline swp_entry_t make_migration_entry(struct page *page, int write) 172static inline swp_entry_t make_migration_entry(struct page *page, int write)
105{ 173{