aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-02-06 13:41:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-02-06 13:41:33 -0500
commit3ff1b28caaff1d66d2be7e6eb7c56f78e9046fbb (patch)
tree32d75a6db7f4985d37a9cfb7f1a1270963cfa404 /kernel
parent105cf3c8c6264dce4bcdab877feb8037bc4109b1 (diff)
parentee95f4059a833839bf52972191b2d4c3d3cec552 (diff)
Merge tag 'libnvdimm-for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Ross Zwisler: - Require struct page by default for filesystem DAX to remove a number of surprising failure cases. This includes failures with direct I/O, gdb and fork(2). - Add support for the new Platform Capabilities Structure added to the NFIT in ACPI 6.2a. This new table tells us whether the platform supports flushing of CPU and memory controller caches on unexpected power loss events. - Revamp vmem_altmap and dev_pagemap handling to clean up code and better support future future PCI P2P uses. - Deprecate the ND_IOCTL_SMART_THRESHOLD command whose payload has become out-of-sync with recent versions of the NVDIMM_FAMILY_INTEL spec, and instead rely on the generic ND_CMD_CALL approach used by the two other IOCTL families, NVDIMM_FAMILY_{HPE,MSFT}. - Enhance nfit_test so we can test some of the new things added in version 1.6 of the DSM specification. This includes testing firmware download and simulating the Last Shutdown State (LSS) status. * tag 'libnvdimm-for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (37 commits) libnvdimm, namespace: remove redundant initialization of 'nd_mapping' acpi, nfit: fix register dimm error handling libnvdimm, namespace: make min namespace size 4K tools/testing/nvdimm: force nfit_test to depend on instrumented modules libnvdimm/nfit_test: adding support for unit testing enable LSS status libnvdimm/nfit_test: add firmware download emulation nfit-test: Add platform cap support from ACPI 6.2a to test libnvdimm: expose platform persistence attribute for nd_region acpi: nfit: add persistent memory control flag for nd_region acpi: nfit: Add support for detect platform CPU cache flush on power loss device-dax: Fix trailing semicolon libnvdimm, btt: fix uninitialized err_lock dax: require 'struct page' by default for filesystem dax ext2: auto disable dax instead of failing mount ext4: auto disable dax instead of failing mount mm, dax: introduce pfn_t_special() mm: Fix devm_memremap_pages() collision handling mm: Fix memory size alignment in devm_memremap_pages_release() memremap: merge find_dev_pagemap into get_dev_pagemap memremap: change devm_memremap_pages interface to use struct dev_pagemap ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/memremap.c174
1 files changed, 71 insertions, 103 deletions
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 403ab9cdb949..4849be5f9b3c 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -188,13 +188,6 @@ static RADIX_TREE(pgmap_radix, GFP_KERNEL);
188#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1) 188#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
189#define SECTION_SIZE (1UL << PA_SECTION_SHIFT) 189#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
190 190
191struct page_map {
192 struct resource res;
193 struct percpu_ref *ref;
194 struct dev_pagemap pgmap;
195 struct vmem_altmap altmap;
196};
197
198static unsigned long order_at(struct resource *res, unsigned long pgoff) 191static unsigned long order_at(struct resource *res, unsigned long pgoff)
199{ 192{
200 unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff; 193 unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff;
@@ -248,34 +241,36 @@ int device_private_entry_fault(struct vm_area_struct *vma,
248EXPORT_SYMBOL(device_private_entry_fault); 241EXPORT_SYMBOL(device_private_entry_fault);
249#endif /* CONFIG_DEVICE_PRIVATE */ 242#endif /* CONFIG_DEVICE_PRIVATE */
250 243
251static void pgmap_radix_release(struct resource *res) 244static void pgmap_radix_release(struct resource *res, unsigned long end_pgoff)
252{ 245{
253 unsigned long pgoff, order; 246 unsigned long pgoff, order;
254 247
255 mutex_lock(&pgmap_lock); 248 mutex_lock(&pgmap_lock);
256 foreach_order_pgoff(res, order, pgoff) 249 foreach_order_pgoff(res, order, pgoff) {
250 if (pgoff >= end_pgoff)
251 break;
257 radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff); 252 radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff);
253 }
258 mutex_unlock(&pgmap_lock); 254 mutex_unlock(&pgmap_lock);
259 255
260 synchronize_rcu(); 256 synchronize_rcu();
261} 257}
262 258
263static unsigned long pfn_first(struct page_map *page_map) 259static unsigned long pfn_first(struct dev_pagemap *pgmap)
264{ 260{
265 struct dev_pagemap *pgmap = &page_map->pgmap; 261 const struct resource *res = &pgmap->res;
266 const struct resource *res = &page_map->res; 262 struct vmem_altmap *altmap = &pgmap->altmap;
267 struct vmem_altmap *altmap = pgmap->altmap;
268 unsigned long pfn; 263 unsigned long pfn;
269 264
270 pfn = res->start >> PAGE_SHIFT; 265 pfn = res->start >> PAGE_SHIFT;
271 if (altmap) 266 if (pgmap->altmap_valid)
272 pfn += vmem_altmap_offset(altmap); 267 pfn += vmem_altmap_offset(altmap);
273 return pfn; 268 return pfn;
274} 269}
275 270
276static unsigned long pfn_end(struct page_map *page_map) 271static unsigned long pfn_end(struct dev_pagemap *pgmap)
277{ 272{
278 const struct resource *res = &page_map->res; 273 const struct resource *res = &pgmap->res;
279 274
280 return (res->start + resource_size(res)) >> PAGE_SHIFT; 275 return (res->start + resource_size(res)) >> PAGE_SHIFT;
281} 276}
@@ -283,15 +278,15 @@ static unsigned long pfn_end(struct page_map *page_map)
283#define for_each_device_pfn(pfn, map) \ 278#define for_each_device_pfn(pfn, map) \
284 for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++) 279 for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++)
285 280
286static void devm_memremap_pages_release(struct device *dev, void *data) 281static void devm_memremap_pages_release(void *data)
287{ 282{
288 struct page_map *page_map = data; 283 struct dev_pagemap *pgmap = data;
289 struct resource *res = &page_map->res; 284 struct device *dev = pgmap->dev;
285 struct resource *res = &pgmap->res;
290 resource_size_t align_start, align_size; 286 resource_size_t align_start, align_size;
291 struct dev_pagemap *pgmap = &page_map->pgmap;
292 unsigned long pfn; 287 unsigned long pfn;
293 288
294 for_each_device_pfn(pfn, page_map) 289 for_each_device_pfn(pfn, pgmap)
295 put_page(pfn_to_page(pfn)); 290 put_page(pfn_to_page(pfn));
296 291
297 if (percpu_ref_tryget_live(pgmap->ref)) { 292 if (percpu_ref_tryget_live(pgmap->ref)) {
@@ -301,56 +296,51 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
301 296
302 /* pages are dead and unused, undo the arch mapping */ 297 /* pages are dead and unused, undo the arch mapping */
303 align_start = res->start & ~(SECTION_SIZE - 1); 298 align_start = res->start & ~(SECTION_SIZE - 1);
304 align_size = ALIGN(resource_size(res), SECTION_SIZE); 299 align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
300 - align_start;
305 301
306 mem_hotplug_begin(); 302 mem_hotplug_begin();
307 arch_remove_memory(align_start, align_size); 303 arch_remove_memory(align_start, align_size, pgmap->altmap_valid ?
304 &pgmap->altmap : NULL);
308 mem_hotplug_done(); 305 mem_hotplug_done();
309 306
310 untrack_pfn(NULL, PHYS_PFN(align_start), align_size); 307 untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
311 pgmap_radix_release(res); 308 pgmap_radix_release(res, -1);
312 dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, 309 dev_WARN_ONCE(dev, pgmap->altmap.alloc,
313 "%s: failed to free all reserved pages\n", __func__); 310 "%s: failed to free all reserved pages\n", __func__);
314}
315
316/* assumes rcu_read_lock() held at entry */
317struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
318{
319 struct page_map *page_map;
320
321 WARN_ON_ONCE(!rcu_read_lock_held());
322
323 page_map = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
324 return page_map ? &page_map->pgmap : NULL;
325} 311}
326 312
327/** 313/**
328 * devm_memremap_pages - remap and provide memmap backing for the given resource 314 * devm_memremap_pages - remap and provide memmap backing for the given resource
329 * @dev: hosting device for @res 315 * @dev: hosting device for @res
330 * @res: "host memory" address range 316 * @pgmap: pointer to a struct dev_pgmap
331 * @ref: a live per-cpu reference count
332 * @altmap: optional descriptor for allocating the memmap from @res
333 * 317 *
334 * Notes: 318 * Notes:
335 * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time 319 * 1/ At a minimum the res, ref and type members of @pgmap must be initialized
336 * (or devm release event). The expected order of events is that @ref has 320 * by the caller before passing it to this function
321 *
322 * 2/ The altmap field may optionally be initialized, in which case altmap_valid
323 * must be set to true
324 *
325 * 3/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages()
326 * time (or devm release event). The expected order of events is that ref has
337 * been through percpu_ref_kill() before devm_memremap_pages_release(). The 327 * been through percpu_ref_kill() before devm_memremap_pages_release(). The
338 * wait for the completion of all references being dropped and 328 * wait for the completion of all references being dropped and
339 * percpu_ref_exit() must occur after devm_memremap_pages_release(). 329 * percpu_ref_exit() must occur after devm_memremap_pages_release().
340 * 330 *
341 * 2/ @res is expected to be a host memory range that could feasibly be 331 * 4/ res is expected to be a host memory range that could feasibly be
342 * treated as a "System RAM" range, i.e. not a device mmio range, but 332 * treated as a "System RAM" range, i.e. not a device mmio range, but
343 * this is not enforced. 333 * this is not enforced.
344 */ 334 */
345void *devm_memremap_pages(struct device *dev, struct resource *res, 335void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
346 struct percpu_ref *ref, struct vmem_altmap *altmap)
347{ 336{
348 resource_size_t align_start, align_size, align_end; 337 resource_size_t align_start, align_size, align_end;
338 struct vmem_altmap *altmap = pgmap->altmap_valid ?
339 &pgmap->altmap : NULL;
349 unsigned long pfn, pgoff, order; 340 unsigned long pfn, pgoff, order;
350 pgprot_t pgprot = PAGE_KERNEL; 341 pgprot_t pgprot = PAGE_KERNEL;
351 struct dev_pagemap *pgmap;
352 struct page_map *page_map;
353 int error, nid, is_ram, i = 0; 342 int error, nid, is_ram, i = 0;
343 struct resource *res = &pgmap->res;
354 344
355 align_start = res->start & ~(SECTION_SIZE - 1); 345 align_start = res->start & ~(SECTION_SIZE - 1);
356 align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) 346 align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@@ -367,47 +357,18 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
367 if (is_ram == REGION_INTERSECTS) 357 if (is_ram == REGION_INTERSECTS)
368 return __va(res->start); 358 return __va(res->start);
369 359
370 if (!ref) 360 if (!pgmap->ref)
371 return ERR_PTR(-EINVAL); 361 return ERR_PTR(-EINVAL);
372 362
373 page_map = devres_alloc_node(devm_memremap_pages_release,
374 sizeof(*page_map), GFP_KERNEL, dev_to_node(dev));
375 if (!page_map)
376 return ERR_PTR(-ENOMEM);
377 pgmap = &page_map->pgmap;
378
379 memcpy(&page_map->res, res, sizeof(*res));
380
381 pgmap->dev = dev; 363 pgmap->dev = dev;
382 if (altmap) {
383 memcpy(&page_map->altmap, altmap, sizeof(*altmap));
384 pgmap->altmap = &page_map->altmap;
385 }
386 pgmap->ref = ref;
387 pgmap->res = &page_map->res;
388 pgmap->type = MEMORY_DEVICE_HOST;
389 pgmap->page_fault = NULL;
390 pgmap->page_free = NULL;
391 pgmap->data = NULL;
392 364
393 mutex_lock(&pgmap_lock); 365 mutex_lock(&pgmap_lock);
394 error = 0; 366 error = 0;
395 align_end = align_start + align_size - 1; 367 align_end = align_start + align_size - 1;
396 368
397 foreach_order_pgoff(res, order, pgoff) { 369 foreach_order_pgoff(res, order, pgoff) {
398 struct dev_pagemap *dup;
399
400 rcu_read_lock();
401 dup = find_dev_pagemap(res->start + PFN_PHYS(pgoff));
402 rcu_read_unlock();
403 if (dup) {
404 dev_err(dev, "%s: %pr collides with mapping for %s\n",
405 __func__, res, dev_name(dup->dev));
406 error = -EBUSY;
407 break;
408 }
409 error = __radix_tree_insert(&pgmap_radix, 370 error = __radix_tree_insert(&pgmap_radix,
410 PHYS_PFN(res->start) + pgoff, order, page_map); 371 PHYS_PFN(res->start) + pgoff, order, pgmap);
411 if (error) { 372 if (error) {
412 dev_err(dev, "%s: failed: %d\n", __func__, error); 373 dev_err(dev, "%s: failed: %d\n", __func__, error);
413 break; 374 break;
@@ -427,16 +388,16 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
427 goto err_pfn_remap; 388 goto err_pfn_remap;
428 389
429 mem_hotplug_begin(); 390 mem_hotplug_begin();
430 error = arch_add_memory(nid, align_start, align_size, false); 391 error = arch_add_memory(nid, align_start, align_size, altmap, false);
431 if (!error) 392 if (!error)
432 move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], 393 move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
433 align_start >> PAGE_SHIFT, 394 align_start >> PAGE_SHIFT,
434 align_size >> PAGE_SHIFT); 395 align_size >> PAGE_SHIFT, altmap);
435 mem_hotplug_done(); 396 mem_hotplug_done();
436 if (error) 397 if (error)
437 goto err_add_memory; 398 goto err_add_memory;
438 399
439 for_each_device_pfn(pfn, page_map) { 400 for_each_device_pfn(pfn, pgmap) {
440 struct page *page = pfn_to_page(pfn); 401 struct page *page = pfn_to_page(pfn);
441 402
442 /* 403 /*
@@ -447,19 +408,21 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
447 */ 408 */
448 list_del(&page->lru); 409 list_del(&page->lru);
449 page->pgmap = pgmap; 410 page->pgmap = pgmap;
450 percpu_ref_get(ref); 411 percpu_ref_get(pgmap->ref);
451 if (!(++i % 1024)) 412 if (!(++i % 1024))
452 cond_resched(); 413 cond_resched();
453 } 414 }
454 devres_add(dev, page_map); 415
416 devm_add_action(dev, devm_memremap_pages_release, pgmap);
417
455 return __va(res->start); 418 return __va(res->start);
456 419
457 err_add_memory: 420 err_add_memory:
458 untrack_pfn(NULL, PHYS_PFN(align_start), align_size); 421 untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
459 err_pfn_remap: 422 err_pfn_remap:
460 err_radix: 423 err_radix:
461 pgmap_radix_release(res); 424 pgmap_radix_release(res, pgoff);
462 devres_free(page_map); 425 devres_free(pgmap);
463 return ERR_PTR(error); 426 return ERR_PTR(error);
464} 427}
465EXPORT_SYMBOL(devm_memremap_pages); 428EXPORT_SYMBOL(devm_memremap_pages);
@@ -475,34 +438,39 @@ void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
475 altmap->alloc -= nr_pfns; 438 altmap->alloc -= nr_pfns;
476} 439}
477 440
478struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) 441/**
442 * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
443 * @pfn: page frame number to lookup page_map
444 * @pgmap: optional known pgmap that already has a reference
445 *
446 * If @pgmap is non-NULL and covers @pfn it will be returned as-is. If @pgmap
447 * is non-NULL but does not cover @pfn the reference to it will be released.
448 */
449struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
450 struct dev_pagemap *pgmap)
479{ 451{
480 /* 452 resource_size_t phys = PFN_PHYS(pfn);
481 * 'memmap_start' is the virtual address for the first "struct
482 * page" in this range of the vmemmap array. In the case of
483 * CONFIG_SPARSEMEM_VMEMMAP a page_to_pfn conversion is simple
484 * pointer arithmetic, so we can perform this to_vmem_altmap()
485 * conversion without concern for the initialization state of
486 * the struct page fields.
487 */
488 struct page *page = (struct page *) memmap_start;
489 struct dev_pagemap *pgmap;
490 453
491 /* 454 /*
492 * Unconditionally retrieve a dev_pagemap associated with the 455 * In the cached case we're already holding a live reference.
493 * given physical address, this is only for use in the
494 * arch_{add|remove}_memory() for setting up and tearing down
495 * the memmap.
496 */ 456 */
457 if (pgmap) {
458 if (phys >= pgmap->res.start && phys <= pgmap->res.end)
459 return pgmap;
460 put_dev_pagemap(pgmap);
461 }
462
463 /* fall back to slow path lookup */
497 rcu_read_lock(); 464 rcu_read_lock();
498 pgmap = find_dev_pagemap(__pfn_to_phys(page_to_pfn(page))); 465 pgmap = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
466 if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
467 pgmap = NULL;
499 rcu_read_unlock(); 468 rcu_read_unlock();
500 469
501 return pgmap ? pgmap->altmap : NULL; 470 return pgmap;
502} 471}
503#endif /* CONFIG_ZONE_DEVICE */ 472#endif /* CONFIG_ZONE_DEVICE */
504 473
505
506#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) 474#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC)
507void put_zone_device_private_or_public_page(struct page *page) 475void put_zone_device_private_or_public_page(struct page *page)
508{ 476{