aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-08 20:21:52 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-08 20:21:52 -0400
commit7d3bf613e99abbd96ac7b90ee3694a246c975021 (patch)
tree084e4d900025ce3459702d3a8c05ead860c67c64 /include/linux
parenta3818841bd5e9b4a7e0e732c19cf3a632fcb525e (diff)
parent930218affeadd1325ea17e053f0dcecf218f5a4f (diff)
Merge tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams: "This adds a user for the new 'bytes-remaining' updates to memcpy_mcsafe() that you already received through Ingo via the x86-dax- for-linus pull. Not included here, but still targeting this cycle, is support for handling memory media errors (poison) consumed via userspace dax mappings. Summary: - DAX broke a fundamental assumption of truncate of file mapped pages. The truncate path assumed that it is safe to disconnect a pinned page from a file and let the filesystem reclaim the physical block. With DAX the page is equivalent to the filesystem block. Introduce dax_layout_busy_page() to enable filesystems to wait for pinned DAX pages to be released. Without this wait a filesystem could allocate blocks under active device-DMA to a new file. - DAX arranges for the block layer to be bypassed and uses dax_direct_access() + copy_to_iter() to satisfy read(2) calls. However, the memcpy_mcsafe() facility is available through the pmem block driver. In order to safely handle media errors, via the DAX block-layer bypass, introduce copy_to_iter_mcsafe(). - Fix cache management policy relative to the ACPI NFIT Platform Capabilities Structure to properly elide cache flushes when they are not necessary. The table indicates whether CPU caches are power-fail protected. Clarify that a deep flush is always performed on REQ_{FUA,PREFLUSH} requests" * tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (21 commits) dax: Use dax_write_cache* helpers libnvdimm, pmem: Do not flush power-fail protected CPU caches libnvdimm, pmem: Unconditionally deep flush on *sync libnvdimm, pmem: Complete REQ_FLUSH => REQ_PREFLUSH acpi, nfit: Remove ecc_unit_size dax: dax_insert_mapping_entry always succeeds libnvdimm, e820: Register all pmem resources libnvdimm: Debug probe times linvdimm, pmem: Preserve read-only setting for pmem devices x86, nfit_test: Add unit test for memcpy_mcsafe() pmem: Switch to copy_to_iter_mcsafe() dax: Report bytes remaining in dax_iomap_actor() dax: Introduce a ->copy_to_iter dax operation uio, lib: Fix CONFIG_ARCH_HAS_UACCESS_MCSAFE compilation xfs, dax: introduce xfs_break_dax_layouts() xfs: prepare xfs_break_layouts() for another layout type xfs: prepare xfs_break_layouts() to be called with XFS_MMAPLOCK_EXCL mm, fs, dax: handle layout changes to pinned dax mappings mm: fix __gup_device_huge vs unmap mm: introduce MEMORY_DEVICE_FS_DAX and CONFIG_DEV_PAGEMAP_OPS ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/dax.h12
-rw-r--r--include/linux/device-mapper.h5
-rw-r--r--include/linux/memremap.h36
-rw-r--r--include/linux/mm.h71
-rw-r--r--include/linux/uio.h2
5 files changed, 79 insertions, 47 deletions
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 88504e87cd6c..3855e3800f48 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -20,6 +20,9 @@ struct dax_operations {
20 /* copy_from_iter: required operation for fs-dax direct-i/o */ 20 /* copy_from_iter: required operation for fs-dax direct-i/o */
21 size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, 21 size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
22 struct iov_iter *); 22 struct iov_iter *);
23 /* copy_to_iter: required operation for fs-dax direct-i/o */
24 size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t,
25 struct iov_iter *);
23}; 26};
24 27
25extern struct attribute_group dax_attribute_group; 28extern struct attribute_group dax_attribute_group;
@@ -83,6 +86,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev)
83struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); 86struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
84int dax_writeback_mapping_range(struct address_space *mapping, 87int dax_writeback_mapping_range(struct address_space *mapping,
85 struct block_device *bdev, struct writeback_control *wbc); 88 struct block_device *bdev, struct writeback_control *wbc);
89
90struct page *dax_layout_busy_page(struct address_space *mapping);
86#else 91#else
87static inline bool bdev_dax_supported(struct block_device *bdev, 92static inline bool bdev_dax_supported(struct block_device *bdev,
88 int blocksize) 93 int blocksize)
@@ -104,6 +109,11 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
104 return NULL; 109 return NULL;
105} 110}
106 111
112static inline struct page *dax_layout_busy_page(struct address_space *mapping)
113{
114 return NULL;
115}
116
107static inline int dax_writeback_mapping_range(struct address_space *mapping, 117static inline int dax_writeback_mapping_range(struct address_space *mapping,
108 struct block_device *bdev, struct writeback_control *wbc) 118 struct block_device *bdev, struct writeback_control *wbc)
109{ 119{
@@ -119,6 +129,8 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
119 void **kaddr, pfn_t *pfn); 129 void **kaddr, pfn_t *pfn);
120size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, 130size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
121 size_t bytes, struct iov_iter *i); 131 size_t bytes, struct iov_iter *i);
132size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
133 size_t bytes, struct iov_iter *i);
122void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); 134void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
123 135
124ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, 136ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 31fef7c34185..6fb0808e87c8 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -133,7 +133,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
133 */ 133 */
134typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, 134typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
135 long nr_pages, void **kaddr, pfn_t *pfn); 135 long nr_pages, void **kaddr, pfn_t *pfn);
136typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, 136typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff,
137 void *addr, size_t bytes, struct iov_iter *i); 137 void *addr, size_t bytes, struct iov_iter *i);
138#define PAGE_SECTORS (PAGE_SIZE / 512) 138#define PAGE_SECTORS (PAGE_SIZE / 512)
139 139
@@ -184,7 +184,8 @@ struct target_type {
184 dm_iterate_devices_fn iterate_devices; 184 dm_iterate_devices_fn iterate_devices;
185 dm_io_hints_fn io_hints; 185 dm_io_hints_fn io_hints;
186 dm_dax_direct_access_fn direct_access; 186 dm_dax_direct_access_fn direct_access;
187 dm_dax_copy_from_iter_fn dax_copy_from_iter; 187 dm_dax_copy_iter_fn dax_copy_from_iter;
188 dm_dax_copy_iter_fn dax_copy_to_iter;
188 189
189 /* For internal device-mapper use. */ 190 /* For internal device-mapper use. */
190 struct list_head list; 191 struct list_head list;
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 74ea5e2310a8..f91f9e763557 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -1,7 +1,6 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _LINUX_MEMREMAP_H_ 2#ifndef _LINUX_MEMREMAP_H_
3#define _LINUX_MEMREMAP_H_ 3#define _LINUX_MEMREMAP_H_
4#include <linux/mm.h>
5#include <linux/ioport.h> 4#include <linux/ioport.h>
6#include <linux/percpu-refcount.h> 5#include <linux/percpu-refcount.h>
7 6
@@ -30,13 +29,6 @@ struct vmem_altmap {
30 * Specialize ZONE_DEVICE memory into multiple types each having differents 29 * Specialize ZONE_DEVICE memory into multiple types each having differents
31 * usage. 30 * usage.
32 * 31 *
33 * MEMORY_DEVICE_HOST:
34 * Persistent device memory (pmem): struct page might be allocated in different
35 * memory and architecture might want to perform special actions. It is similar
36 * to regular memory, in that the CPU can access it transparently. However,
37 * it is likely to have different bandwidth and latency than regular memory.
38 * See Documentation/nvdimm/nvdimm.txt for more information.
39 *
40 * MEMORY_DEVICE_PRIVATE: 32 * MEMORY_DEVICE_PRIVATE:
41 * Device memory that is not directly addressable by the CPU: CPU can neither 33 * Device memory that is not directly addressable by the CPU: CPU can neither
42 * read nor write private memory. In this case, we do still have struct pages 34 * read nor write private memory. In this case, we do still have struct pages
@@ -53,11 +45,19 @@ struct vmem_altmap {
53 * driver can hotplug the device memory using ZONE_DEVICE and with that memory 45 * driver can hotplug the device memory using ZONE_DEVICE and with that memory
54 * type. Any page of a process can be migrated to such memory. However no one 46 * type. Any page of a process can be migrated to such memory. However no one
55 * should be allow to pin such memory so that it can always be evicted. 47 * should be allow to pin such memory so that it can always be evicted.
48 *
49 * MEMORY_DEVICE_FS_DAX:
50 * Host memory that has similar access semantics as System RAM i.e. DMA
51 * coherent and supports page pinning. In support of coordinating page
52 * pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a
53 * wakeup event whenever a page is unpinned and becomes idle. This
54 * wakeup is used to coordinate physical address space management (ex:
55 * fs truncate/hole punch) vs pinned pages (ex: device dma).
56 */ 56 */
57enum memory_type { 57enum memory_type {
58 MEMORY_DEVICE_HOST = 0, 58 MEMORY_DEVICE_PRIVATE = 1,
59 MEMORY_DEVICE_PRIVATE,
60 MEMORY_DEVICE_PUBLIC, 59 MEMORY_DEVICE_PUBLIC,
60 MEMORY_DEVICE_FS_DAX,
61}; 61};
62 62
63/* 63/*
@@ -129,8 +129,6 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
129 129
130unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); 130unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
131void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); 131void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
132
133static inline bool is_zone_device_page(const struct page *page);
134#else 132#else
135static inline void *devm_memremap_pages(struct device *dev, 133static inline void *devm_memremap_pages(struct device *dev,
136 struct dev_pagemap *pgmap) 134 struct dev_pagemap *pgmap)
@@ -161,20 +159,6 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap,
161} 159}
162#endif /* CONFIG_ZONE_DEVICE */ 160#endif /* CONFIG_ZONE_DEVICE */
163 161
164#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
165static inline bool is_device_private_page(const struct page *page)
166{
167 return is_zone_device_page(page) &&
168 page->pgmap->type == MEMORY_DEVICE_PRIVATE;
169}
170
171static inline bool is_device_public_page(const struct page *page)
172{
173 return is_zone_device_page(page) &&
174 page->pgmap->type == MEMORY_DEVICE_PUBLIC;
175}
176#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
177
178static inline void put_dev_pagemap(struct dev_pagemap *pgmap) 162static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
179{ 163{
180 if (pgmap) 164 if (pgmap)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4c3881b44ef1..0e493884e6e1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -830,27 +830,65 @@ static inline bool is_zone_device_page(const struct page *page)
830} 830}
831#endif 831#endif
832 832
833#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) 833#ifdef CONFIG_DEV_PAGEMAP_OPS
834void put_zone_device_private_or_public_page(struct page *page); 834void dev_pagemap_get_ops(void);
835DECLARE_STATIC_KEY_FALSE(device_private_key); 835void dev_pagemap_put_ops(void);
836#define IS_HMM_ENABLED static_branch_unlikely(&device_private_key) 836void __put_devmap_managed_page(struct page *page);
837static inline bool is_device_private_page(const struct page *page); 837DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
838static inline bool is_device_public_page(const struct page *page); 838static inline bool put_devmap_managed_page(struct page *page)
839#else /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ 839{
840static inline void put_zone_device_private_or_public_page(struct page *page) 840 if (!static_branch_unlikely(&devmap_managed_key))
841 return false;
842 if (!is_zone_device_page(page))
843 return false;
844 switch (page->pgmap->type) {
845 case MEMORY_DEVICE_PRIVATE:
846 case MEMORY_DEVICE_PUBLIC:
847 case MEMORY_DEVICE_FS_DAX:
848 __put_devmap_managed_page(page);
849 return true;
850 default:
851 break;
852 }
853 return false;
854}
855
856static inline bool is_device_private_page(const struct page *page)
841{ 857{
858 return is_zone_device_page(page) &&
859 page->pgmap->type == MEMORY_DEVICE_PRIVATE;
842} 860}
843#define IS_HMM_ENABLED 0 861
862static inline bool is_device_public_page(const struct page *page)
863{
864 return is_zone_device_page(page) &&
865 page->pgmap->type == MEMORY_DEVICE_PUBLIC;
866}
867
868#else /* CONFIG_DEV_PAGEMAP_OPS */
869static inline void dev_pagemap_get_ops(void)
870{
871}
872
873static inline void dev_pagemap_put_ops(void)
874{
875}
876
877static inline bool put_devmap_managed_page(struct page *page)
878{
879 return false;
880}
881
844static inline bool is_device_private_page(const struct page *page) 882static inline bool is_device_private_page(const struct page *page)
845{ 883{
846 return false; 884 return false;
847} 885}
886
848static inline bool is_device_public_page(const struct page *page) 887static inline bool is_device_public_page(const struct page *page)
849{ 888{
850 return false; 889 return false;
851} 890}
852#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ 891#endif /* CONFIG_DEV_PAGEMAP_OPS */
853
854 892
855static inline void get_page(struct page *page) 893static inline void get_page(struct page *page)
856{ 894{
@@ -868,16 +906,13 @@ static inline void put_page(struct page *page)
868 page = compound_head(page); 906 page = compound_head(page);
869 907
870 /* 908 /*
871 * For private device pages we need to catch refcount transition from 909 * For devmap managed pages we need to catch refcount transition from
872 * 2 to 1, when refcount reach one it means the private device page is 910 * 2 to 1, when refcount reach one it means the page is free and we
873 * free and we need to inform the device driver through callback. See 911 * need to inform the device driver through callback. See
874 * include/linux/memremap.h and HMM for details. 912 * include/linux/memremap.h and HMM for details.
875 */ 913 */
876 if (IS_HMM_ENABLED && unlikely(is_device_private_page(page) || 914 if (put_devmap_managed_page(page))
877 unlikely(is_device_public_page(page)))) {
878 put_zone_device_private_or_public_page(page);
879 return; 915 return;
880 }
881 916
882 if (put_page_testzero(page)) 917 if (put_page_testzero(page))
883 __put_page(page); 918 __put_page(page);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index f5766e853a77..409c845d4cd3 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -155,7 +155,7 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
155#endif 155#endif
156 156
157#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE 157#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
158size_t _copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i); 158size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i);
159#else 159#else
160#define _copy_to_iter_mcsafe _copy_to_iter 160#define _copy_to_iter_mcsafe _copy_to_iter
161#endif 161#endif