diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-08 20:21:52 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-08 20:21:52 -0400 |
| commit | 7d3bf613e99abbd96ac7b90ee3694a246c975021 (patch) | |
| tree | 084e4d900025ce3459702d3a8c05ead860c67c64 /include/linux | |
| parent | a3818841bd5e9b4a7e0e732c19cf3a632fcb525e (diff) | |
| parent | 930218affeadd1325ea17e053f0dcecf218f5a4f (diff) | |
Merge tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
"This adds a user for the new 'bytes-remaining' updates to
memcpy_mcsafe() that you already received through Ingo via the
x86-dax- for-linus pull.
Not included here, but still targeting this cycle, is support for
handling memory media errors (poison) consumed via userspace dax
mappings.
Summary:
- DAX broke a fundamental assumption of truncate of file mapped
pages. The truncate path assumed that it is safe to disconnect a
pinned page from a file and let the filesystem reclaim the physical
block. With DAX the page is equivalent to the filesystem block.
Introduce dax_layout_busy_page() to enable filesystems to wait for
pinned DAX pages to be released. Without this wait a filesystem
could allocate blocks under active device-DMA to a new file.
- DAX arranges for the block layer to be bypassed and uses
dax_direct_access() + copy_to_iter() to satisfy read(2) calls.
However, the memcpy_mcsafe() facility is available through the pmem
block driver. In order to safely handle media errors, via the DAX
block-layer bypass, introduce copy_to_iter_mcsafe().
- Fix cache management policy relative to the ACPI NFIT Platform
Capabilities Structure to properly elide cache flushes when they
are not necessary. The table indicates whether CPU caches are
power-fail protected. Clarify that a deep flush is always performed
on REQ_{FUA,PREFLUSH} requests"
* tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (21 commits)
dax: Use dax_write_cache* helpers
libnvdimm, pmem: Do not flush power-fail protected CPU caches
libnvdimm, pmem: Unconditionally deep flush on *sync
libnvdimm, pmem: Complete REQ_FLUSH => REQ_PREFLUSH
acpi, nfit: Remove ecc_unit_size
dax: dax_insert_mapping_entry always succeeds
libnvdimm, e820: Register all pmem resources
libnvdimm: Debug probe times
linvdimm, pmem: Preserve read-only setting for pmem devices
x86, nfit_test: Add unit test for memcpy_mcsafe()
pmem: Switch to copy_to_iter_mcsafe()
dax: Report bytes remaining in dax_iomap_actor()
dax: Introduce a ->copy_to_iter dax operation
uio, lib: Fix CONFIG_ARCH_HAS_UACCESS_MCSAFE compilation
xfs, dax: introduce xfs_break_dax_layouts()
xfs: prepare xfs_break_layouts() for another layout type
xfs: prepare xfs_break_layouts() to be called with XFS_MMAPLOCK_EXCL
mm, fs, dax: handle layout changes to pinned dax mappings
mm: fix __gup_device_huge vs unmap
mm: introduce MEMORY_DEVICE_FS_DAX and CONFIG_DEV_PAGEMAP_OPS
...
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/dax.h | 12 | ||||
| -rw-r--r-- | include/linux/device-mapper.h | 5 | ||||
| -rw-r--r-- | include/linux/memremap.h | 36 | ||||
| -rw-r--r-- | include/linux/mm.h | 71 | ||||
| -rw-r--r-- | include/linux/uio.h | 2 |
5 files changed, 79 insertions, 47 deletions
diff --git a/include/linux/dax.h b/include/linux/dax.h index 88504e87cd6c..3855e3800f48 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h | |||
| @@ -20,6 +20,9 @@ struct dax_operations { | |||
| 20 | /* copy_from_iter: required operation for fs-dax direct-i/o */ | 20 | /* copy_from_iter: required operation for fs-dax direct-i/o */ |
| 21 | size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, | 21 | size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, |
| 22 | struct iov_iter *); | 22 | struct iov_iter *); |
| 23 | /* copy_to_iter: required operation for fs-dax direct-i/o */ | ||
| 24 | size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, | ||
| 25 | struct iov_iter *); | ||
| 23 | }; | 26 | }; |
| 24 | 27 | ||
| 25 | extern struct attribute_group dax_attribute_group; | 28 | extern struct attribute_group dax_attribute_group; |
| @@ -83,6 +86,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev) | |||
| 83 | struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); | 86 | struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); |
| 84 | int dax_writeback_mapping_range(struct address_space *mapping, | 87 | int dax_writeback_mapping_range(struct address_space *mapping, |
| 85 | struct block_device *bdev, struct writeback_control *wbc); | 88 | struct block_device *bdev, struct writeback_control *wbc); |
| 89 | |||
| 90 | struct page *dax_layout_busy_page(struct address_space *mapping); | ||
| 86 | #else | 91 | #else |
| 87 | static inline bool bdev_dax_supported(struct block_device *bdev, | 92 | static inline bool bdev_dax_supported(struct block_device *bdev, |
| 88 | int blocksize) | 93 | int blocksize) |
| @@ -104,6 +109,11 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) | |||
| 104 | return NULL; | 109 | return NULL; |
| 105 | } | 110 | } |
| 106 | 111 | ||
| 112 | static inline struct page *dax_layout_busy_page(struct address_space *mapping) | ||
| 113 | { | ||
| 114 | return NULL; | ||
| 115 | } | ||
| 116 | |||
| 107 | static inline int dax_writeback_mapping_range(struct address_space *mapping, | 117 | static inline int dax_writeback_mapping_range(struct address_space *mapping, |
| 108 | struct block_device *bdev, struct writeback_control *wbc) | 118 | struct block_device *bdev, struct writeback_control *wbc) |
| 109 | { | 119 | { |
| @@ -119,6 +129,8 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, | |||
| 119 | void **kaddr, pfn_t *pfn); | 129 | void **kaddr, pfn_t *pfn); |
| 120 | size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, | 130 | size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, |
| 121 | size_t bytes, struct iov_iter *i); | 131 | size_t bytes, struct iov_iter *i); |
| 132 | size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, | ||
| 133 | size_t bytes, struct iov_iter *i); | ||
| 122 | void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); | 134 | void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); |
| 123 | 135 | ||
| 124 | ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, | 136 | ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, |
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 31fef7c34185..6fb0808e87c8 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h | |||
| @@ -133,7 +133,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); | |||
| 133 | */ | 133 | */ |
| 134 | typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, | 134 | typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, |
| 135 | long nr_pages, void **kaddr, pfn_t *pfn); | 135 | long nr_pages, void **kaddr, pfn_t *pfn); |
| 136 | typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, | 136 | typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff, |
| 137 | void *addr, size_t bytes, struct iov_iter *i); | 137 | void *addr, size_t bytes, struct iov_iter *i); |
| 138 | #define PAGE_SECTORS (PAGE_SIZE / 512) | 138 | #define PAGE_SECTORS (PAGE_SIZE / 512) |
| 139 | 139 | ||
| @@ -184,7 +184,8 @@ struct target_type { | |||
| 184 | dm_iterate_devices_fn iterate_devices; | 184 | dm_iterate_devices_fn iterate_devices; |
| 185 | dm_io_hints_fn io_hints; | 185 | dm_io_hints_fn io_hints; |
| 186 | dm_dax_direct_access_fn direct_access; | 186 | dm_dax_direct_access_fn direct_access; |
| 187 | dm_dax_copy_from_iter_fn dax_copy_from_iter; | 187 | dm_dax_copy_iter_fn dax_copy_from_iter; |
| 188 | dm_dax_copy_iter_fn dax_copy_to_iter; | ||
| 188 | 189 | ||
| 189 | /* For internal device-mapper use. */ | 190 | /* For internal device-mapper use. */ |
| 190 | struct list_head list; | 191 | struct list_head list; |
diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 74ea5e2310a8..f91f9e763557 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h | |||
| @@ -1,7 +1,6 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | #ifndef _LINUX_MEMREMAP_H_ | 2 | #ifndef _LINUX_MEMREMAP_H_ |
| 3 | #define _LINUX_MEMREMAP_H_ | 3 | #define _LINUX_MEMREMAP_H_ |
| 4 | #include <linux/mm.h> | ||
| 5 | #include <linux/ioport.h> | 4 | #include <linux/ioport.h> |
| 6 | #include <linux/percpu-refcount.h> | 5 | #include <linux/percpu-refcount.h> |
| 7 | 6 | ||
| @@ -30,13 +29,6 @@ struct vmem_altmap { | |||
| 30 | * Specialize ZONE_DEVICE memory into multiple types each having differents | 29 | * Specialize ZONE_DEVICE memory into multiple types each having differents |
| 31 | * usage. | 30 | * usage. |
| 32 | * | 31 | * |
| 33 | * MEMORY_DEVICE_HOST: | ||
| 34 | * Persistent device memory (pmem): struct page might be allocated in different | ||
| 35 | * memory and architecture might want to perform special actions. It is similar | ||
| 36 | * to regular memory, in that the CPU can access it transparently. However, | ||
| 37 | * it is likely to have different bandwidth and latency than regular memory. | ||
| 38 | * See Documentation/nvdimm/nvdimm.txt for more information. | ||
| 39 | * | ||
| 40 | * MEMORY_DEVICE_PRIVATE: | 32 | * MEMORY_DEVICE_PRIVATE: |
| 41 | * Device memory that is not directly addressable by the CPU: CPU can neither | 33 | * Device memory that is not directly addressable by the CPU: CPU can neither |
| 42 | * read nor write private memory. In this case, we do still have struct pages | 34 | * read nor write private memory. In this case, we do still have struct pages |
| @@ -53,11 +45,19 @@ struct vmem_altmap { | |||
| 53 | * driver can hotplug the device memory using ZONE_DEVICE and with that memory | 45 | * driver can hotplug the device memory using ZONE_DEVICE and with that memory |
| 54 | * type. Any page of a process can be migrated to such memory. However no one | 46 | * type. Any page of a process can be migrated to such memory. However no one |
| 55 | * should be allow to pin such memory so that it can always be evicted. | 47 | * should be allow to pin such memory so that it can always be evicted. |
| 48 | * | ||
| 49 | * MEMORY_DEVICE_FS_DAX: | ||
| 50 | * Host memory that has similar access semantics as System RAM i.e. DMA | ||
| 51 | * coherent and supports page pinning. In support of coordinating page | ||
| 52 | * pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a | ||
| 53 | * wakeup event whenever a page is unpinned and becomes idle. This | ||
| 54 | * wakeup is used to coordinate physical address space management (ex: | ||
| 55 | * fs truncate/hole punch) vs pinned pages (ex: device dma). | ||
| 56 | */ | 56 | */ |
| 57 | enum memory_type { | 57 | enum memory_type { |
| 58 | MEMORY_DEVICE_HOST = 0, | 58 | MEMORY_DEVICE_PRIVATE = 1, |
| 59 | MEMORY_DEVICE_PRIVATE, | ||
| 60 | MEMORY_DEVICE_PUBLIC, | 59 | MEMORY_DEVICE_PUBLIC, |
| 60 | MEMORY_DEVICE_FS_DAX, | ||
| 61 | }; | 61 | }; |
| 62 | 62 | ||
| 63 | /* | 63 | /* |
| @@ -129,8 +129,6 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, | |||
| 129 | 129 | ||
| 130 | unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); | 130 | unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); |
| 131 | void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); | 131 | void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); |
| 132 | |||
| 133 | static inline bool is_zone_device_page(const struct page *page); | ||
| 134 | #else | 132 | #else |
| 135 | static inline void *devm_memremap_pages(struct device *dev, | 133 | static inline void *devm_memremap_pages(struct device *dev, |
| 136 | struct dev_pagemap *pgmap) | 134 | struct dev_pagemap *pgmap) |
| @@ -161,20 +159,6 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap, | |||
| 161 | } | 159 | } |
| 162 | #endif /* CONFIG_ZONE_DEVICE */ | 160 | #endif /* CONFIG_ZONE_DEVICE */ |
| 163 | 161 | ||
| 164 | #if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) | ||
| 165 | static inline bool is_device_private_page(const struct page *page) | ||
| 166 | { | ||
| 167 | return is_zone_device_page(page) && | ||
| 168 | page->pgmap->type == MEMORY_DEVICE_PRIVATE; | ||
| 169 | } | ||
| 170 | |||
| 171 | static inline bool is_device_public_page(const struct page *page) | ||
| 172 | { | ||
| 173 | return is_zone_device_page(page) && | ||
| 174 | page->pgmap->type == MEMORY_DEVICE_PUBLIC; | ||
| 175 | } | ||
| 176 | #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ | ||
| 177 | |||
| 178 | static inline void put_dev_pagemap(struct dev_pagemap *pgmap) | 162 | static inline void put_dev_pagemap(struct dev_pagemap *pgmap) |
| 179 | { | 163 | { |
| 180 | if (pgmap) | 164 | if (pgmap) |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 4c3881b44ef1..0e493884e6e1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
| @@ -830,27 +830,65 @@ static inline bool is_zone_device_page(const struct page *page) | |||
| 830 | } | 830 | } |
| 831 | #endif | 831 | #endif |
| 832 | 832 | ||
| 833 | #if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) | 833 | #ifdef CONFIG_DEV_PAGEMAP_OPS |
| 834 | void put_zone_device_private_or_public_page(struct page *page); | 834 | void dev_pagemap_get_ops(void); |
| 835 | DECLARE_STATIC_KEY_FALSE(device_private_key); | 835 | void dev_pagemap_put_ops(void); |
| 836 | #define IS_HMM_ENABLED static_branch_unlikely(&device_private_key) | 836 | void __put_devmap_managed_page(struct page *page); |
| 837 | static inline bool is_device_private_page(const struct page *page); | 837 | DECLARE_STATIC_KEY_FALSE(devmap_managed_key); |
| 838 | static inline bool is_device_public_page(const struct page *page); | 838 | static inline bool put_devmap_managed_page(struct page *page) |
| 839 | #else /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ | 839 | { |
| 840 | static inline void put_zone_device_private_or_public_page(struct page *page) | 840 | if (!static_branch_unlikely(&devmap_managed_key)) |
| 841 | return false; | ||
| 842 | if (!is_zone_device_page(page)) | ||
| 843 | return false; | ||
| 844 | switch (page->pgmap->type) { | ||
| 845 | case MEMORY_DEVICE_PRIVATE: | ||
| 846 | case MEMORY_DEVICE_PUBLIC: | ||
| 847 | case MEMORY_DEVICE_FS_DAX: | ||
| 848 | __put_devmap_managed_page(page); | ||
| 849 | return true; | ||
| 850 | default: | ||
| 851 | break; | ||
| 852 | } | ||
| 853 | return false; | ||
| 854 | } | ||
| 855 | |||
| 856 | static inline bool is_device_private_page(const struct page *page) | ||
| 841 | { | 857 | { |
| 858 | return is_zone_device_page(page) && | ||
| 859 | page->pgmap->type == MEMORY_DEVICE_PRIVATE; | ||
| 842 | } | 860 | } |
| 843 | #define IS_HMM_ENABLED 0 | 861 | |
| 862 | static inline bool is_device_public_page(const struct page *page) | ||
| 863 | { | ||
| 864 | return is_zone_device_page(page) && | ||
| 865 | page->pgmap->type == MEMORY_DEVICE_PUBLIC; | ||
| 866 | } | ||
| 867 | |||
| 868 | #else /* CONFIG_DEV_PAGEMAP_OPS */ | ||
| 869 | static inline void dev_pagemap_get_ops(void) | ||
| 870 | { | ||
| 871 | } | ||
| 872 | |||
| 873 | static inline void dev_pagemap_put_ops(void) | ||
| 874 | { | ||
| 875 | } | ||
| 876 | |||
| 877 | static inline bool put_devmap_managed_page(struct page *page) | ||
| 878 | { | ||
| 879 | return false; | ||
| 880 | } | ||
| 881 | |||
| 844 | static inline bool is_device_private_page(const struct page *page) | 882 | static inline bool is_device_private_page(const struct page *page) |
| 845 | { | 883 | { |
| 846 | return false; | 884 | return false; |
| 847 | } | 885 | } |
| 886 | |||
| 848 | static inline bool is_device_public_page(const struct page *page) | 887 | static inline bool is_device_public_page(const struct page *page) |
| 849 | { | 888 | { |
| 850 | return false; | 889 | return false; |
| 851 | } | 890 | } |
| 852 | #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ | 891 | #endif /* CONFIG_DEV_PAGEMAP_OPS */ |
| 853 | |||
| 854 | 892 | ||
| 855 | static inline void get_page(struct page *page) | 893 | static inline void get_page(struct page *page) |
| 856 | { | 894 | { |
| @@ -868,16 +906,13 @@ static inline void put_page(struct page *page) | |||
| 868 | page = compound_head(page); | 906 | page = compound_head(page); |
| 869 | 907 | ||
| 870 | /* | 908 | /* |
| 871 | * For private device pages we need to catch refcount transition from | 909 | * For devmap managed pages we need to catch refcount transition from |
| 872 | * 2 to 1, when refcount reach one it means the private device page is | 910 | * 2 to 1, when refcount reach one it means the page is free and we |
| 873 | * free and we need to inform the device driver through callback. See | 911 | * need to inform the device driver through callback. See |
| 874 | * include/linux/memremap.h and HMM for details. | 912 | * include/linux/memremap.h and HMM for details. |
| 875 | */ | 913 | */ |
| 876 | if (IS_HMM_ENABLED && unlikely(is_device_private_page(page) || | 914 | if (put_devmap_managed_page(page)) |
| 877 | unlikely(is_device_public_page(page)))) { | ||
| 878 | put_zone_device_private_or_public_page(page); | ||
| 879 | return; | 915 | return; |
| 880 | } | ||
| 881 | 916 | ||
| 882 | if (put_page_testzero(page)) | 917 | if (put_page_testzero(page)) |
| 883 | __put_page(page); | 918 | __put_page(page); |
diff --git a/include/linux/uio.h b/include/linux/uio.h index f5766e853a77..409c845d4cd3 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h | |||
| @@ -155,7 +155,7 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); | |||
| 155 | #endif | 155 | #endif |
| 156 | 156 | ||
| 157 | #ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE | 157 | #ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE |
| 158 | size_t _copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i); | 158 | size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i); |
| 159 | #else | 159 | #else |
| 160 | #define _copy_to_iter_mcsafe _copy_to_iter | 160 | #define _copy_to_iter_mcsafe _copy_to_iter |
| 161 | #endif | 161 | #endif |
