diff options
| author | Dan Williams <dan.j.williams@intel.com> | 2016-01-15 19:56:19 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-15 20:56:32 -0500 |
| commit | 9476df7d80dfc425b37bfecf1d89edf8ec81fcb6 (patch) | |
| tree | c18eecb4a2f369c45e2c45053e02853640c9005c /kernel | |
| parent | 260ae3f7db614a5c4aa4b773599f99adc1d9859e (diff) | |
mm: introduce find_dev_pagemap()
There are several scenarios where we need to retrieve and update
metadata associated with a given devm_memremap_pages() mapping, and the
only lookup key available is a pfn in the range:
1/ We want to augment vmemmap_populate() (called via arch_add_memory())
to allocate memmap storage from pre-allocated pages reserved by the
device driver. At vmemmap_alloc_block_buf() time it grabs device pages
rather than page allocator pages. This is in support of
devm_memremap_pages() mappings where the memmap is too large to fit in
main memory (i.e. large persistent memory devices).
2/ Taking a reference against the mapping when inserting device pages
into the address_space radix of a given inode. This facilitates
unmap_mapping_range() and truncate_inode_pages() operations when the
driver is tearing down the mapping.
3/ get_user_pages() operations on ZONE_DEVICE memory require taking a
reference against the mapping so that the driver teardown path can
revoke and drain usage of device pages.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/memremap.c | 85 |
1 files changed, 77 insertions, 8 deletions
diff --git a/kernel/memremap.c b/kernel/memremap.c index 449cb6a5d9a1..61cfbf4d3054 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c | |||
| @@ -10,6 +10,8 @@ | |||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 11 | * General Public License for more details. | 11 | * General Public License for more details. |
| 12 | */ | 12 | */ |
| 13 | #include <linux/radix-tree.h> | ||
| 14 | #include <linux/memremap.h> | ||
| 13 | #include <linux/device.h> | 15 | #include <linux/device.h> |
| 14 | #include <linux/types.h> | 16 | #include <linux/types.h> |
| 15 | #include <linux/pfn_t.h> | 17 | #include <linux/pfn_t.h> |
| @@ -155,22 +157,57 @@ pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags) | |||
| 155 | EXPORT_SYMBOL(phys_to_pfn_t); | 157 | EXPORT_SYMBOL(phys_to_pfn_t); |
| 156 | 158 | ||
| 157 | #ifdef CONFIG_ZONE_DEVICE | 159 | #ifdef CONFIG_ZONE_DEVICE |
| 160 | static DEFINE_MUTEX(pgmap_lock); | ||
| 161 | static RADIX_TREE(pgmap_radix, GFP_KERNEL); | ||
| 162 | #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1) | ||
| 163 | #define SECTION_SIZE (1UL << PA_SECTION_SHIFT) | ||
| 164 | |||
| 158 | struct page_map { | 165 | struct page_map { |
| 159 | struct resource res; | 166 | struct resource res; |
| 167 | struct percpu_ref *ref; | ||
| 168 | struct dev_pagemap pgmap; | ||
| 160 | }; | 169 | }; |
| 161 | 170 | ||
| 162 | static void devm_memremap_pages_release(struct device *dev, void *res) | 171 | static void pgmap_radix_release(struct resource *res) |
| 172 | { | ||
| 173 | resource_size_t key; | ||
| 174 | |||
| 175 | mutex_lock(&pgmap_lock); | ||
| 176 | for (key = res->start; key <= res->end; key += SECTION_SIZE) | ||
| 177 | radix_tree_delete(&pgmap_radix, key >> PA_SECTION_SHIFT); | ||
| 178 | mutex_unlock(&pgmap_lock); | ||
| 179 | } | ||
| 180 | |||
| 181 | static void devm_memremap_pages_release(struct device *dev, void *data) | ||
| 163 | { | 182 | { |
| 164 | struct page_map *page_map = res; | 183 | struct page_map *page_map = data; |
| 184 | struct resource *res = &page_map->res; | ||
| 185 | resource_size_t align_start, align_size; | ||
| 186 | |||
| 187 | pgmap_radix_release(res); | ||
| 165 | 188 | ||
| 166 | /* pages are dead and unused, undo the arch mapping */ | 189 | /* pages are dead and unused, undo the arch mapping */ |
| 167 | arch_remove_memory(page_map->res.start, resource_size(&page_map->res)); | 190 | align_start = res->start & ~(SECTION_SIZE - 1); |
| 191 | align_size = ALIGN(resource_size(res), SECTION_SIZE); | ||
| 192 | arch_remove_memory(align_start, align_size); | ||
| 193 | } | ||
| 194 | |||
| 195 | /* assumes rcu_read_lock() held at entry */ | ||
| 196 | struct dev_pagemap *find_dev_pagemap(resource_size_t phys) | ||
| 197 | { | ||
| 198 | struct page_map *page_map; | ||
| 199 | |||
| 200 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
| 201 | |||
| 202 | page_map = radix_tree_lookup(&pgmap_radix, phys >> PA_SECTION_SHIFT); | ||
| 203 | return page_map ? &page_map->pgmap : NULL; | ||
| 168 | } | 204 | } |
| 169 | 205 | ||
| 170 | void *devm_memremap_pages(struct device *dev, struct resource *res) | 206 | void *devm_memremap_pages(struct device *dev, struct resource *res) |
| 171 | { | 207 | { |
| 172 | int is_ram = region_intersects(res->start, resource_size(res), | 208 | int is_ram = region_intersects(res->start, resource_size(res), |
| 173 | "System RAM"); | 209 | "System RAM"); |
| 210 | resource_size_t key, align_start, align_size; | ||
| 174 | struct page_map *page_map; | 211 | struct page_map *page_map; |
| 175 | int error, nid; | 212 | int error, nid; |
| 176 | 213 | ||
| @@ -190,18 +227,50 @@ void *devm_memremap_pages(struct device *dev, struct resource *res) | |||
| 190 | 227 | ||
| 191 | memcpy(&page_map->res, res, sizeof(*res)); | 228 | memcpy(&page_map->res, res, sizeof(*res)); |
| 192 | 229 | ||
| 230 | page_map->pgmap.dev = dev; | ||
| 231 | mutex_lock(&pgmap_lock); | ||
| 232 | error = 0; | ||
| 233 | for (key = res->start; key <= res->end; key += SECTION_SIZE) { | ||
| 234 | struct dev_pagemap *dup; | ||
| 235 | |||
| 236 | rcu_read_lock(); | ||
| 237 | dup = find_dev_pagemap(key); | ||
| 238 | rcu_read_unlock(); | ||
| 239 | if (dup) { | ||
| 240 | dev_err(dev, "%s: %pr collides with mapping for %s\n", | ||
| 241 | __func__, res, dev_name(dup->dev)); | ||
| 242 | error = -EBUSY; | ||
| 243 | break; | ||
| 244 | } | ||
| 245 | error = radix_tree_insert(&pgmap_radix, key >> PA_SECTION_SHIFT, | ||
| 246 | page_map); | ||
| 247 | if (error) { | ||
| 248 | dev_err(dev, "%s: failed: %d\n", __func__, error); | ||
| 249 | break; | ||
| 250 | } | ||
| 251 | } | ||
| 252 | mutex_unlock(&pgmap_lock); | ||
| 253 | if (error) | ||
| 254 | goto err_radix; | ||
| 255 | |||
| 193 | nid = dev_to_node(dev); | 256 | nid = dev_to_node(dev); |
| 194 | if (nid < 0) | 257 | if (nid < 0) |
| 195 | nid = numa_mem_id(); | 258 | nid = numa_mem_id(); |
| 196 | 259 | ||
| 197 | error = arch_add_memory(nid, res->start, resource_size(res), true); | 260 | align_start = res->start & ~(SECTION_SIZE - 1); |
| 198 | if (error) { | 261 | align_size = ALIGN(resource_size(res), SECTION_SIZE); |
| 199 | devres_free(page_map); | 262 | error = arch_add_memory(nid, align_start, align_size, true); |
| 200 | return ERR_PTR(error); | 263 | if (error) |
| 201 | } | 264 | goto err_add_memory; |
| 202 | 265 | ||
| 203 | devres_add(dev, page_map); | 266 | devres_add(dev, page_map); |
| 204 | return __va(res->start); | 267 | return __va(res->start); |
| 268 | |||
| 269 | err_add_memory: | ||
| 270 | err_radix: | ||
| 271 | pgmap_radix_release(res); | ||
| 272 | devres_free(page_map); | ||
| 273 | return ERR_PTR(error); | ||
| 205 | } | 274 | } |
| 206 | EXPORT_SYMBOL(devm_memremap_pages); | 275 | EXPORT_SYMBOL(devm_memremap_pages); |
| 207 | #endif /* CONFIG_ZONE_DEVICE */ | 276 | #endif /* CONFIG_ZONE_DEVICE */ |
