diff options
author | Dan Williams <dan.j.williams@intel.com> | 2016-01-15 19:56:19 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-15 20:56:32 -0500 |
commit | 9476df7d80dfc425b37bfecf1d89edf8ec81fcb6 (patch) | |
tree | c18eecb4a2f369c45e2c45053e02853640c9005c | |
parent | 260ae3f7db614a5c4aa4b773599f99adc1d9859e (diff) |
mm: introduce find_dev_pagemap()
There are several scenarios where we need to retrieve and update
metadata associated with a given devm_memremap_pages() mapping, and the
only lookup key available is a pfn in the range:
1/ We want to augment vmemmap_populate() (called via arch_add_memory())
to allocate memmap storage from pre-allocated pages reserved by the
device driver. At vmemmap_alloc_block_buf() time it grabs device pages
rather than page allocator pages. This is in support of
devm_memremap_pages() mappings where the memmap is too large to fit in
main memory (i.e. large persistent memory devices).
2/ Taking a reference against the mapping when inserting device pages
into the address_space radix of a given inode. This facilitates
unmap_mapping_range() and truncate_inode_pages() operations when the
driver is tearing down the mapping.
3/ get_user_pages() operations on ZONE_DEVICE memory require taking a
reference against the mapping so that the driver teardown path can
revoke and drain usage of device pages.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/nvdimm/pmem.c | 2 | ||||
-rw-r--r-- | include/linux/io.h | 15 | ||||
-rw-r--r-- | include/linux/memremap.h | 38 | ||||
-rw-r--r-- | kernel/memremap.c | 85 |
4 files changed, 116 insertions, 24 deletions
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 5def7f4ddbd2..904629b97c4f 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c | |||
@@ -21,9 +21,9 @@ | |||
21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | #include <linux/platform_device.h> | 22 | #include <linux/platform_device.h> |
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/memory_hotplug.h> | ||
25 | #include <linux/moduleparam.h> | 24 | #include <linux/moduleparam.h> |
26 | #include <linux/badblocks.h> | 25 | #include <linux/badblocks.h> |
26 | #include <linux/memremap.h> | ||
27 | #include <linux/vmalloc.h> | 27 | #include <linux/vmalloc.h> |
28 | #include <linux/pfn_t.h> | 28 | #include <linux/pfn_t.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
diff --git a/include/linux/io.h b/include/linux/io.h index de64c1e53612..fffd88d7f426 100644 --- a/include/linux/io.h +++ b/include/linux/io.h | |||
@@ -89,21 +89,6 @@ void devm_memunmap(struct device *dev, void *addr); | |||
89 | 89 | ||
90 | void *__devm_memremap_pages(struct device *dev, struct resource *res); | 90 | void *__devm_memremap_pages(struct device *dev, struct resource *res); |
91 | 91 | ||
92 | #ifdef CONFIG_ZONE_DEVICE | ||
93 | void *devm_memremap_pages(struct device *dev, struct resource *res); | ||
94 | #else | ||
95 | static inline void *devm_memremap_pages(struct device *dev, struct resource *res) | ||
96 | { | ||
97 | /* | ||
98 | * Fail attempts to call devm_memremap_pages() without | ||
99 | * ZONE_DEVICE support enabled, this requires callers to fall | ||
100 | * back to plain devm_memremap() based on config | ||
101 | */ | ||
102 | WARN_ON_ONCE(1); | ||
103 | return ERR_PTR(-ENXIO); | ||
104 | } | ||
105 | #endif | ||
106 | |||
107 | /* | 92 | /* |
108 | * Some systems do not have legacy ISA devices. | 93 | * Some systems do not have legacy ISA devices. |
109 | * /dev/port is not a valid interface on these systems. | 94 | * /dev/port is not a valid interface on these systems. |
diff --git a/include/linux/memremap.h b/include/linux/memremap.h new file mode 100644 index 000000000000..d90721c178bb --- /dev/null +++ b/include/linux/memremap.h | |||
@@ -0,0 +1,38 @@ | |||
1 | #ifndef _LINUX_MEMREMAP_H_ | ||
2 | #define _LINUX_MEMREMAP_H_ | ||
3 | #include <linux/mm.h> | ||
4 | |||
5 | struct resource; | ||
6 | struct device; | ||
7 | /** | ||
8 | * struct dev_pagemap - metadata for ZONE_DEVICE mappings | ||
9 | * @dev: host device of the mapping for debug | ||
10 | */ | ||
11 | struct dev_pagemap { | ||
12 | /* TODO: vmem_altmap and percpu_ref count */ | ||
13 | struct device *dev; | ||
14 | }; | ||
15 | |||
16 | #ifdef CONFIG_ZONE_DEVICE | ||
17 | void *devm_memremap_pages(struct device *dev, struct resource *res); | ||
18 | struct dev_pagemap *find_dev_pagemap(resource_size_t phys); | ||
19 | #else | ||
20 | static inline void *devm_memremap_pages(struct device *dev, | ||
21 | struct resource *res) | ||
22 | { | ||
23 | /* | ||
24 | * Fail attempts to call devm_memremap_pages() without | ||
25 | * ZONE_DEVICE support enabled, this requires callers to fall | ||
26 | * back to plain devm_memremap() based on config | ||
27 | */ | ||
28 | WARN_ON_ONCE(1); | ||
29 | return ERR_PTR(-ENXIO); | ||
30 | } | ||
31 | |||
32 | static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys) | ||
33 | { | ||
34 | return NULL; | ||
35 | } | ||
36 | #endif | ||
37 | |||
38 | #endif /* _LINUX_MEMREMAP_H_ */ | ||
diff --git a/kernel/memremap.c b/kernel/memremap.c index 449cb6a5d9a1..61cfbf4d3054 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c | |||
@@ -10,6 +10,8 @@ | |||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | * General Public License for more details. | 11 | * General Public License for more details. |
12 | */ | 12 | */ |
13 | #include <linux/radix-tree.h> | ||
14 | #include <linux/memremap.h> | ||
13 | #include <linux/device.h> | 15 | #include <linux/device.h> |
14 | #include <linux/types.h> | 16 | #include <linux/types.h> |
15 | #include <linux/pfn_t.h> | 17 | #include <linux/pfn_t.h> |
@@ -155,22 +157,57 @@ pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags) | |||
155 | EXPORT_SYMBOL(phys_to_pfn_t); | 157 | EXPORT_SYMBOL(phys_to_pfn_t); |
156 | 158 | ||
157 | #ifdef CONFIG_ZONE_DEVICE | 159 | #ifdef CONFIG_ZONE_DEVICE |
160 | static DEFINE_MUTEX(pgmap_lock); | ||
161 | static RADIX_TREE(pgmap_radix, GFP_KERNEL); | ||
162 | #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1) | ||
163 | #define SECTION_SIZE (1UL << PA_SECTION_SHIFT) | ||
164 | |||
158 | struct page_map { | 165 | struct page_map { |
159 | struct resource res; | 166 | struct resource res; |
167 | struct percpu_ref *ref; | ||
168 | struct dev_pagemap pgmap; | ||
160 | }; | 169 | }; |
161 | 170 | ||
162 | static void devm_memremap_pages_release(struct device *dev, void *res) | 171 | static void pgmap_radix_release(struct resource *res) |
172 | { | ||
173 | resource_size_t key; | ||
174 | |||
175 | mutex_lock(&pgmap_lock); | ||
176 | for (key = res->start; key <= res->end; key += SECTION_SIZE) | ||
177 | radix_tree_delete(&pgmap_radix, key >> PA_SECTION_SHIFT); | ||
178 | mutex_unlock(&pgmap_lock); | ||
179 | } | ||
180 | |||
181 | static void devm_memremap_pages_release(struct device *dev, void *data) | ||
163 | { | 182 | { |
164 | struct page_map *page_map = res; | 183 | struct page_map *page_map = data; |
184 | struct resource *res = &page_map->res; | ||
185 | resource_size_t align_start, align_size; | ||
186 | |||
187 | pgmap_radix_release(res); | ||
165 | 188 | ||
166 | /* pages are dead and unused, undo the arch mapping */ | 189 | /* pages are dead and unused, undo the arch mapping */ |
167 | arch_remove_memory(page_map->res.start, resource_size(&page_map->res)); | 190 | align_start = res->start & ~(SECTION_SIZE - 1); |
191 | align_size = ALIGN(resource_size(res), SECTION_SIZE); | ||
192 | arch_remove_memory(align_start, align_size); | ||
193 | } | ||
194 | |||
195 | /* assumes rcu_read_lock() held at entry */ | ||
196 | struct dev_pagemap *find_dev_pagemap(resource_size_t phys) | ||
197 | { | ||
198 | struct page_map *page_map; | ||
199 | |||
200 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
201 | |||
202 | page_map = radix_tree_lookup(&pgmap_radix, phys >> PA_SECTION_SHIFT); | ||
203 | return page_map ? &page_map->pgmap : NULL; | ||
168 | } | 204 | } |
169 | 205 | ||
170 | void *devm_memremap_pages(struct device *dev, struct resource *res) | 206 | void *devm_memremap_pages(struct device *dev, struct resource *res) |
171 | { | 207 | { |
172 | int is_ram = region_intersects(res->start, resource_size(res), | 208 | int is_ram = region_intersects(res->start, resource_size(res), |
173 | "System RAM"); | 209 | "System RAM"); |
210 | resource_size_t key, align_start, align_size; | ||
174 | struct page_map *page_map; | 211 | struct page_map *page_map; |
175 | int error, nid; | 212 | int error, nid; |
176 | 213 | ||
@@ -190,18 +227,50 @@ void *devm_memremap_pages(struct device *dev, struct resource *res) | |||
190 | 227 | ||
191 | memcpy(&page_map->res, res, sizeof(*res)); | 228 | memcpy(&page_map->res, res, sizeof(*res)); |
192 | 229 | ||
230 | page_map->pgmap.dev = dev; | ||
231 | mutex_lock(&pgmap_lock); | ||
232 | error = 0; | ||
233 | for (key = res->start; key <= res->end; key += SECTION_SIZE) { | ||
234 | struct dev_pagemap *dup; | ||
235 | |||
236 | rcu_read_lock(); | ||
237 | dup = find_dev_pagemap(key); | ||
238 | rcu_read_unlock(); | ||
239 | if (dup) { | ||
240 | dev_err(dev, "%s: %pr collides with mapping for %s\n", | ||
241 | __func__, res, dev_name(dup->dev)); | ||
242 | error = -EBUSY; | ||
243 | break; | ||
244 | } | ||
245 | error = radix_tree_insert(&pgmap_radix, key >> PA_SECTION_SHIFT, | ||
246 | page_map); | ||
247 | if (error) { | ||
248 | dev_err(dev, "%s: failed: %d\n", __func__, error); | ||
249 | break; | ||
250 | } | ||
251 | } | ||
252 | mutex_unlock(&pgmap_lock); | ||
253 | if (error) | ||
254 | goto err_radix; | ||
255 | |||
193 | nid = dev_to_node(dev); | 256 | nid = dev_to_node(dev); |
194 | if (nid < 0) | 257 | if (nid < 0) |
195 | nid = numa_mem_id(); | 258 | nid = numa_mem_id(); |
196 | 259 | ||
197 | error = arch_add_memory(nid, res->start, resource_size(res), true); | 260 | align_start = res->start & ~(SECTION_SIZE - 1); |
198 | if (error) { | 261 | align_size = ALIGN(resource_size(res), SECTION_SIZE); |
199 | devres_free(page_map); | 262 | error = arch_add_memory(nid, align_start, align_size, true); |
200 | return ERR_PTR(error); | 263 | if (error) |
201 | } | 264 | goto err_add_memory; |
202 | 265 | ||
203 | devres_add(dev, page_map); | 266 | devres_add(dev, page_map); |
204 | return __va(res->start); | 267 | return __va(res->start); |
268 | |||
269 | err_add_memory: | ||
270 | err_radix: | ||
271 | pgmap_radix_release(res); | ||
272 | devres_free(page_map); | ||
273 | return ERR_PTR(error); | ||
205 | } | 274 | } |
206 | EXPORT_SYMBOL(devm_memremap_pages); | 275 | EXPORT_SYMBOL(devm_memremap_pages); |
207 | #endif /* CONFIG_ZONE_DEVICE */ | 276 | #endif /* CONFIG_ZONE_DEVICE */ |