summaryrefslogtreecommitdiffstats
path: root/kernel/memremap.c
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2017-09-06 19:24:13 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-06 20:27:29 -0400
commitab1b597ee0e4208a1db227bb7b2c9512c8234b48 (patch)
tree57d44937a91ae2e91e4c8bf741e6e4d326933064 /kernel/memremap.c
parentc568da282bbc8f09c4b49201177fa259fe184c47 (diff)
mm, devm_memremap_pages: use multi-order radix for ZONE_DEVICE lookups
devm_memremap_pages() records mapped ranges in pgmap_radix with an entry per section's worth of memory (128MB). The key for each of those entries is a section number. This leads to false positives when devm_memremap_pages() is passed a section-unaligned range as lookups in the misalignment fail to return NULL. We can close this hole by using the pfn as the key for entries in the tree. The number of entries required to describe a remapped range is reduced by leveraging multi-order entries. In practice this approach usually yields just one entry in the tree if the size and starting address are of the same power-of-2 alignment. Previously we always needed nr_entries = mapping_size / 128MB. Link: https://lists.01.org/pipermail/linux-nvdimm/2016-August/006666.html Link: http://lkml.kernel.org/r/150215410565.39310.13767886055248249438.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reported-by: Toshi Kani <toshi.kani@hpe.com> Cc: Matthew Wilcox <mawilcox@microsoft.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/memremap.c')
-rw-r--r--kernel/memremap.c52
1 files changed, 38 insertions, 14 deletions
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 9afdc434fb49..066e73c2fcc9 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -194,18 +194,41 @@ struct page_map {
194 struct vmem_altmap altmap; 194 struct vmem_altmap altmap;
195}; 195};
196 196
197static void pgmap_radix_release(struct resource *res) 197static unsigned long order_at(struct resource *res, unsigned long pgoff)
198{ 198{
199 resource_size_t key, align_start, align_size, align_end; 199 unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff;
200 unsigned long nr_pages, mask;
200 201
201 align_start = res->start & ~(SECTION_SIZE - 1); 202 nr_pages = PHYS_PFN(resource_size(res));
202 align_size = ALIGN(resource_size(res), SECTION_SIZE); 203 if (nr_pages == pgoff)
203 align_end = align_start + align_size - 1; 204 return ULONG_MAX;
205
206 /*
207 * What is the largest aligned power-of-2 range available from
208 * this resource pgoff to the end of the resource range,
209 * considering the alignment of the current pgoff?
210 */
211 mask = phys_pgoff | rounddown_pow_of_two(nr_pages - pgoff);
212 if (!mask)
213 return ULONG_MAX;
214
215 return find_first_bit(&mask, BITS_PER_LONG);
216}
217
218#define foreach_order_pgoff(res, order, pgoff) \
219 for (pgoff = 0, order = order_at((res), pgoff); order < ULONG_MAX; \
220 pgoff += 1UL << order, order = order_at((res), pgoff))
221
222static void pgmap_radix_release(struct resource *res)
223{
224 unsigned long pgoff, order;
204 225
205 mutex_lock(&pgmap_lock); 226 mutex_lock(&pgmap_lock);
206 for (key = res->start; key <= res->end; key += SECTION_SIZE) 227 foreach_order_pgoff(res, order, pgoff)
207 radix_tree_delete(&pgmap_radix, key >> PA_SECTION_SHIFT); 228 radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff);
208 mutex_unlock(&pgmap_lock); 229 mutex_unlock(&pgmap_lock);
230
231 synchronize_rcu();
209} 232}
210 233
211static unsigned long pfn_first(struct page_map *page_map) 234static unsigned long pfn_first(struct page_map *page_map)
@@ -268,7 +291,7 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
268 291
269 WARN_ON_ONCE(!rcu_read_lock_held()); 292 WARN_ON_ONCE(!rcu_read_lock_held());
270 293
271 page_map = radix_tree_lookup(&pgmap_radix, phys >> PA_SECTION_SHIFT); 294 page_map = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
272 return page_map ? &page_map->pgmap : NULL; 295 return page_map ? &page_map->pgmap : NULL;
273} 296}
274 297
@@ -293,12 +316,12 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
293void *devm_memremap_pages(struct device *dev, struct resource *res, 316void *devm_memremap_pages(struct device *dev, struct resource *res,
294 struct percpu_ref *ref, struct vmem_altmap *altmap) 317 struct percpu_ref *ref, struct vmem_altmap *altmap)
295{ 318{
296 resource_size_t key, align_start, align_size, align_end; 319 resource_size_t align_start, align_size, align_end;
320 unsigned long pfn, pgoff, order;
297 pgprot_t pgprot = PAGE_KERNEL; 321 pgprot_t pgprot = PAGE_KERNEL;
298 struct dev_pagemap *pgmap; 322 struct dev_pagemap *pgmap;
299 struct page_map *page_map; 323 struct page_map *page_map;
300 int error, nid, is_ram; 324 int error, nid, is_ram;
301 unsigned long pfn;
302 325
303 align_start = res->start & ~(SECTION_SIZE - 1); 326 align_start = res->start & ~(SECTION_SIZE - 1);
304 align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) 327 align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@@ -337,11 +360,12 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
337 mutex_lock(&pgmap_lock); 360 mutex_lock(&pgmap_lock);
338 error = 0; 361 error = 0;
339 align_end = align_start + align_size - 1; 362 align_end = align_start + align_size - 1;
340 for (key = align_start; key <= align_end; key += SECTION_SIZE) { 363
364 foreach_order_pgoff(res, order, pgoff) {
341 struct dev_pagemap *dup; 365 struct dev_pagemap *dup;
342 366
343 rcu_read_lock(); 367 rcu_read_lock();
344 dup = find_dev_pagemap(key); 368 dup = find_dev_pagemap(res->start + PFN_PHYS(pgoff));
345 rcu_read_unlock(); 369 rcu_read_unlock();
346 if (dup) { 370 if (dup) {
347 dev_err(dev, "%s: %pr collides with mapping for %s\n", 371 dev_err(dev, "%s: %pr collides with mapping for %s\n",
@@ -349,8 +373,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
349 error = -EBUSY; 373 error = -EBUSY;
350 break; 374 break;
351 } 375 }
352 error = radix_tree_insert(&pgmap_radix, key >> PA_SECTION_SHIFT, 376 error = __radix_tree_insert(&pgmap_radix,
353 page_map); 377 PHYS_PFN(res->start) + pgoff, order, page_map);
354 if (error) { 378 if (error) {
355 dev_err(dev, "%s: failed: %d\n", __func__, error); 379 dev_err(dev, "%s: failed: %d\n", __func__, error);
356 break; 380 break;