aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-10 13:25:57 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-10 13:25:57 -0400
commit9f3a0941fb5efaa4d27911e251dc595034d58baa (patch)
tree7212d9872b41b73a0b3c4f8c991039b639add212
parentfbe173e3ffbd897b5a859020d714c0eaf4af2a1a (diff)
parente13e75b86ef2f88e3a47d672dd4c52a293efb95b (diff)
Merge tag 'libnvdimm-for-4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams: "This cycle was was not something I ever want to repeat as there were several late changes that have only now just settled. Half of the branch up to commit d2c997c0f145 ("fs, dax: use page->mapping to warn...") have been in -next for several releases. The of_pmem driver and the address range scrub rework were late arrivals, and the dax work was scaled back at the last moment. The of_pmem driver missed a previous merge window due to an oversight. A sense of obligation to rectify that miss is why it is included for 4.17. It has acks from PowerPC folks. Stephen reported a build failure that only occurs when merging it with your latest tree, for now I have fixed that up by disabling modular builds of of_pmem. A test merge with your tree has received a build success report from the 0day robot over 156 configs. An initial version of the ARS rework was submitted before the merge window. It is self contained to libnvdimm, a net code reduction, and passing all unit tests. The filesystem-dax changes are based on the wait_var_event() functionality from tip/sched/core. However, late review feedback showed that those changes regressed truncate performance to a large degree. The branch was rewound to drop the truncate behavior change and now only includes preparation patches and cleanups (with full acks and reviews). The finalization of this dax-dma-vs-trnucate work will need to wait for 4.18. Summary: - A rework of the filesytem-dax implementation provides for detection of unmap operations (truncate / hole punch) colliding with in-progress device-DMA. A fix for these collisions remains a work-in-progress pending resolution of truncate latency and starvation regressions. - The of_pmem driver expands the users of libnvdimm outside of x86 and ACPI to describe an implementation of persistent memory on PowerPC with Open Firmware / Device tree. - Address Range Scrub (ARS) handling is completely rewritten to account for the fact that ARS may run for 100s of seconds and there is no platform defined way to cancel it. ARS will now no longer block namespace initialization. - The NVDIMM Namespace Label implementation is updated to handle label areas as small as 1K, down from 128K. - Miscellaneous cleanups and updates to unit test infrastructure" * tag 'libnvdimm-for-4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (39 commits) libnvdimm, of_pmem: workaround OF_NUMA=n build error nfit, address-range-scrub: add module option to skip initial ars nfit, address-range-scrub: rework and simplify ARS state machine nfit, address-range-scrub: determine one platform max_ars value powerpc/powernv: Create platform devs for nvdimm buses doc/devicetree: Persistent memory region bindings libnvdimm: Add device-tree based driver libnvdimm: Add of_node to region and bus descriptors libnvdimm, region: quiet region probe libnvdimm, namespace: use a safe lookup for dimm device name libnvdimm, dimm: fix dpa reservation vs uninitialized label area libnvdimm, testing: update the default smart ctrl_temperature libnvdimm, testing: Add emulation for smart injection commands nfit, address-range-scrub: introduce nfit_spa->ars_state libnvdimm: add an api to cast a 'struct nd_region' to its 'struct device' nfit, address-range-scrub: fix scrub in-progress reporting dax, dm: allow device-mapper to operate without dax support dax: introduce CONFIG_DAX_DRIVER fs, dax: use page->mapping to warn if truncate collides with a busy page ext2, dax: introduce ext2_dax_aops ...
-rw-r--r--Documentation/devicetree/bindings/pmem/pmem-region.txt65
-rw-r--r--MAINTAINERS8
-rw-r--r--arch/powerpc/platforms/powernv/opal.c3
-rw-r--r--drivers/acpi/nfit/core.c679
-rw-r--r--drivers/acpi/nfit/mce.c5
-rw-r--r--drivers/acpi/nfit/nfit.h22
-rw-r--r--drivers/dax/Kconfig5
-rw-r--r--drivers/dax/device.c38
-rw-r--r--drivers/dax/pmem.c18
-rw-r--r--drivers/dax/super.c15
-rw-r--r--drivers/md/Kconfig2
-rw-r--r--drivers/md/dm-linear.c6
-rw-r--r--drivers/md/dm-log-writes.c95
-rw-r--r--drivers/md/dm-stripe.c6
-rw-r--r--drivers/md/dm.c10
-rw-r--r--drivers/nvdimm/Kconfig13
-rw-r--r--drivers/nvdimm/Makefile1
-rw-r--r--drivers/nvdimm/btt_devs.c21
-rw-r--r--drivers/nvdimm/bus.c14
-rw-r--r--drivers/nvdimm/claim.c2
-rw-r--r--drivers/nvdimm/core.c6
-rw-r--r--drivers/nvdimm/dax_devs.c5
-rw-r--r--drivers/nvdimm/dimm.c8
-rw-r--r--drivers/nvdimm/dimm_devs.c7
-rw-r--r--drivers/nvdimm/label.c85
-rw-r--r--drivers/nvdimm/label.h2
-rw-r--r--drivers/nvdimm/namespace_devs.c42
-rw-r--r--drivers/nvdimm/nd.h1
-rw-r--r--drivers/nvdimm/of_pmem.c119
-rw-r--r--drivers/nvdimm/pfn_devs.c25
-rw-r--r--drivers/nvdimm/pmem.c14
-rw-r--r--drivers/nvdimm/region.c4
-rw-r--r--drivers/nvdimm/region_devs.c9
-rw-r--r--drivers/s390/block/Kconfig2
-rw-r--r--fs/block_dev.c5
-rw-r--r--fs/dax.c146
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/inode.c46
-rw-r--r--fs/ext2/namei.c18
-rw-r--r--fs/ext4/inode.c42
-rw-r--r--fs/libfs.c39
-rw-r--r--fs/xfs/xfs_aops.c34
-rw-r--r--fs/xfs/xfs_aops.h1
-rw-r--r--fs/xfs/xfs_iops.c5
-rw-r--r--include/linux/dax.h42
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/libnvdimm.h4
-rw-r--r--include/linux/nd.h6
-rw-r--r--tools/testing/nvdimm/test/nfit.c239
-rw-r--r--tools/testing/nvdimm/test/nfit_test.h16
50 files changed, 1217 insertions, 788 deletions
diff --git a/Documentation/devicetree/bindings/pmem/pmem-region.txt b/Documentation/devicetree/bindings/pmem/pmem-region.txt
new file mode 100644
index 000000000000..5cfa4f016a00
--- /dev/null
+++ b/Documentation/devicetree/bindings/pmem/pmem-region.txt
@@ -0,0 +1,65 @@
1Device-tree bindings for persistent memory regions
2-----------------------------------------------------
3
4Persistent memory refers to a class of memory devices that are:
5
6 a) Usable as main system memory (i.e. cacheable), and
7 b) Retain their contents across power failure.
8
9Given b) it is best to think of persistent memory as a kind of memory mapped
10storage device. To ensure data integrity the operating system needs to manage
11persistent regions separately to the normal memory pool. To aid with that this
12binding provides a standardised interface for discovering where persistent
13memory regions exist inside the physical address space.
14
15Bindings for the region nodes:
16-----------------------------
17
18Required properties:
19 - compatible = "pmem-region"
20
21 - reg = <base, size>;
22 The reg property should specificy an address range that is
23 translatable to a system physical address range. This address
24 range should be mappable as normal system memory would be
25 (i.e cacheable).
26
27 If the reg property contains multiple address ranges
28 each address range will be treated as though it was specified
29 in a separate device node. Having multiple address ranges in a
30 node implies no special relationship between the two ranges.
31
32Optional properties:
33 - Any relevant NUMA assocativity properties for the target platform.
34
35 - volatile; This property indicates that this region is actually
36 backed by non-persistent memory. This lets the OS know that it
37 may skip the cache flushes required to ensure data is made
38 persistent after a write.
39
40 If this property is absent then the OS must assume that the region
41 is backed by non-volatile memory.
42
43Examples:
44--------------------
45
46 /*
47 * This node specifies one 4KB region spanning from
48 * 0x5000 to 0x5fff that is backed by non-volatile memory.
49 */
50 pmem@5000 {
51 compatible = "pmem-region";
52 reg = <0x00005000 0x00001000>;
53 };
54
55 /*
56 * This node specifies two 4KB regions that are backed by
57 * volatile (normal) memory.
58 */
59 pmem@6000 {
60 compatible = "pmem-region";
61 reg = < 0x00006000 0x00001000
62 0x00008000 0x00001000 >;
63 volatile;
64 };
65
diff --git a/MAINTAINERS b/MAINTAINERS
index dd7ce9171ac0..00855ffc8de9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8048,6 +8048,14 @@ Q: https://patchwork.kernel.org/project/linux-nvdimm/list/
8048S: Supported 8048S: Supported
8049F: drivers/nvdimm/pmem* 8049F: drivers/nvdimm/pmem*
8050 8050
8051LIBNVDIMM: DEVICETREE BINDINGS
8052M: Oliver O'Halloran <oohall@gmail.com>
8053L: linux-nvdimm@lists.01.org
8054Q: https://patchwork.kernel.org/project/linux-nvdimm/list/
8055S: Supported
8056F: drivers/nvdimm/of_pmem.c
8057F: Documentation/devicetree/bindings/pmem/pmem-region.txt
8058
8051LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM 8059LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM
8052M: Dan Williams <dan.j.williams@intel.com> 8060M: Dan Williams <dan.j.williams@intel.com>
8053L: linux-nvdimm@lists.01.org 8061L: linux-nvdimm@lists.01.org
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 516e23de5a3d..48fbb41af5d1 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -824,6 +824,9 @@ static int __init opal_init(void)
824 /* Create i2c platform devices */ 824 /* Create i2c platform devices */
825 opal_pdev_init("ibm,opal-i2c"); 825 opal_pdev_init("ibm,opal-i2c");
826 826
827 /* Handle non-volatile memory devices */
828 opal_pdev_init("pmem-region");
829
827 /* Setup a heatbeat thread if requested by OPAL */ 830 /* Setup a heatbeat thread if requested by OPAL */
828 opal_init_heartbeat(); 831 opal_init_heartbeat();
829 832
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 22a112b4f4d8..e2235ed3e4be 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -36,16 +36,6 @@ static bool force_enable_dimms;
36module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR); 36module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
37MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status"); 37MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
38 38
39static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
40module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
41MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
42
43/* after three payloads of overflow, it's dead jim */
44static unsigned int scrub_overflow_abort = 3;
45module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
46MODULE_PARM_DESC(scrub_overflow_abort,
47 "Number of times we overflow ARS results before abort");
48
49static bool disable_vendor_specific; 39static bool disable_vendor_specific;
50module_param(disable_vendor_specific, bool, S_IRUGO); 40module_param(disable_vendor_specific, bool, S_IRUGO);
51MODULE_PARM_DESC(disable_vendor_specific, 41MODULE_PARM_DESC(disable_vendor_specific,
@@ -60,6 +50,10 @@ module_param(default_dsm_family, int, S_IRUGO);
60MODULE_PARM_DESC(default_dsm_family, 50MODULE_PARM_DESC(default_dsm_family,
61 "Try this DSM type first when identifying NVDIMM family"); 51 "Try this DSM type first when identifying NVDIMM family");
62 52
53static bool no_init_ars;
54module_param(no_init_ars, bool, 0644);
55MODULE_PARM_DESC(no_init_ars, "Skip ARS run at nfit init time");
56
63LIST_HEAD(acpi_descs); 57LIST_HEAD(acpi_descs);
64DEFINE_MUTEX(acpi_desc_lock); 58DEFINE_MUTEX(acpi_desc_lock);
65 59
@@ -197,7 +191,7 @@ static int xlat_nvdimm_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd
197 * In the _LSI, _LSR, _LSW case the locked status is 191 * In the _LSI, _LSR, _LSW case the locked status is
198 * communicated via the read/write commands 192 * communicated via the read/write commands
199 */ 193 */
200 if (nfit_mem->has_lsi) 194 if (nfit_mem->has_lsr)
201 break; 195 break;
202 196
203 if (status >> 16 & ND_CONFIG_LOCKED) 197 if (status >> 16 & ND_CONFIG_LOCKED)
@@ -477,14 +471,14 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
477 in_buf.buffer.length = call_pkg->nd_size_in; 471 in_buf.buffer.length = call_pkg->nd_size_in;
478 } 472 }
479 473
480 dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n", 474 dev_dbg(dev, "%s cmd: %d: func: %d input length: %d\n",
481 __func__, dimm_name, cmd, func, in_buf.buffer.length); 475 dimm_name, cmd, func, in_buf.buffer.length);
482 print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4, 476 print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4,
483 in_buf.buffer.pointer, 477 in_buf.buffer.pointer,
484 min_t(u32, 256, in_buf.buffer.length), true); 478 min_t(u32, 256, in_buf.buffer.length), true);
485 479
486 /* call the BIOS, prefer the named methods over _DSM if available */ 480 /* call the BIOS, prefer the named methods over _DSM if available */
487 if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsi) 481 if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsr)
488 out_obj = acpi_label_info(handle); 482 out_obj = acpi_label_info(handle);
489 else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && nfit_mem->has_lsr) { 483 else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && nfit_mem->has_lsr) {
490 struct nd_cmd_get_config_data_hdr *p = buf; 484 struct nd_cmd_get_config_data_hdr *p = buf;
@@ -507,8 +501,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
507 } 501 }
508 502
509 if (!out_obj) { 503 if (!out_obj) {
510 dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name, 504 dev_dbg(dev, "%s _DSM failed cmd: %s\n", dimm_name, cmd_name);
511 cmd_name);
512 return -EINVAL; 505 return -EINVAL;
513 } 506 }
514 507
@@ -529,13 +522,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
529 } 522 }
530 523
531 if (out_obj->package.type != ACPI_TYPE_BUFFER) { 524 if (out_obj->package.type != ACPI_TYPE_BUFFER) {
532 dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n", 525 dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n",
533 __func__, dimm_name, cmd_name, out_obj->type); 526 dimm_name, cmd_name, out_obj->type);
534 rc = -EINVAL; 527 rc = -EINVAL;
535 goto out; 528 goto out;
536 } 529 }
537 530
538 dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__, dimm_name, 531 dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name,
539 cmd_name, out_obj->buffer.length); 532 cmd_name, out_obj->buffer.length);
540 print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4, 533 print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
541 out_obj->buffer.pointer, 534 out_obj->buffer.pointer,
@@ -547,14 +540,14 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
547 out_obj->buffer.length - offset); 540 out_obj->buffer.length - offset);
548 541
549 if (offset + out_size > out_obj->buffer.length) { 542 if (offset + out_size > out_obj->buffer.length) {
550 dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n", 543 dev_dbg(dev, "%s output object underflow cmd: %s field: %d\n",
551 __func__, dimm_name, cmd_name, i); 544 dimm_name, cmd_name, i);
552 break; 545 break;
553 } 546 }
554 547
555 if (in_buf.buffer.length + offset + out_size > buf_len) { 548 if (in_buf.buffer.length + offset + out_size > buf_len) {
556 dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n", 549 dev_dbg(dev, "%s output overrun cmd: %s field: %d\n",
557 __func__, dimm_name, cmd_name, i); 550 dimm_name, cmd_name, i);
558 rc = -ENXIO; 551 rc = -ENXIO;
559 goto out; 552 goto out;
560 } 553 }
@@ -656,7 +649,7 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc,
656 INIT_LIST_HEAD(&nfit_spa->list); 649 INIT_LIST_HEAD(&nfit_spa->list);
657 memcpy(nfit_spa->spa, spa, sizeof(*spa)); 650 memcpy(nfit_spa->spa, spa, sizeof(*spa));
658 list_add_tail(&nfit_spa->list, &acpi_desc->spas); 651 list_add_tail(&nfit_spa->list, &acpi_desc->spas);
659 dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__, 652 dev_dbg(dev, "spa index: %d type: %s\n",
660 spa->range_index, 653 spa->range_index,
661 spa_type_name(nfit_spa_type(spa))); 654 spa_type_name(nfit_spa_type(spa)));
662 return true; 655 return true;
@@ -685,8 +678,8 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
685 INIT_LIST_HEAD(&nfit_memdev->list); 678 INIT_LIST_HEAD(&nfit_memdev->list);
686 memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev)); 679 memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev));
687 list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs); 680 list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
688 dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d flags: %#x\n", 681 dev_dbg(dev, "memdev handle: %#x spa: %d dcr: %d flags: %#x\n",
689 __func__, memdev->device_handle, memdev->range_index, 682 memdev->device_handle, memdev->range_index,
690 memdev->region_index, memdev->flags); 683 memdev->region_index, memdev->flags);
691 return true; 684 return true;
692} 685}
@@ -754,7 +747,7 @@ static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
754 INIT_LIST_HEAD(&nfit_dcr->list); 747 INIT_LIST_HEAD(&nfit_dcr->list);
755 memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr)); 748 memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr));
756 list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs); 749 list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
757 dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__, 750 dev_dbg(dev, "dcr index: %d windows: %d\n",
758 dcr->region_index, dcr->windows); 751 dcr->region_index, dcr->windows);
759 return true; 752 return true;
760} 753}
@@ -781,7 +774,7 @@ static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
781 INIT_LIST_HEAD(&nfit_bdw->list); 774 INIT_LIST_HEAD(&nfit_bdw->list);
782 memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw)); 775 memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw));
783 list_add_tail(&nfit_bdw->list, &acpi_desc->bdws); 776 list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
784 dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__, 777 dev_dbg(dev, "bdw dcr: %d windows: %d\n",
785 bdw->region_index, bdw->windows); 778 bdw->region_index, bdw->windows);
786 return true; 779 return true;
787} 780}
@@ -820,7 +813,7 @@ static bool add_idt(struct acpi_nfit_desc *acpi_desc,
820 INIT_LIST_HEAD(&nfit_idt->list); 813 INIT_LIST_HEAD(&nfit_idt->list);
821 memcpy(nfit_idt->idt, idt, sizeof_idt(idt)); 814 memcpy(nfit_idt->idt, idt, sizeof_idt(idt));
822 list_add_tail(&nfit_idt->list, &acpi_desc->idts); 815 list_add_tail(&nfit_idt->list, &acpi_desc->idts);
823 dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__, 816 dev_dbg(dev, "idt index: %d num_lines: %d\n",
824 idt->interleave_index, idt->line_count); 817 idt->interleave_index, idt->line_count);
825 return true; 818 return true;
826} 819}
@@ -860,7 +853,7 @@ static bool add_flush(struct acpi_nfit_desc *acpi_desc,
860 INIT_LIST_HEAD(&nfit_flush->list); 853 INIT_LIST_HEAD(&nfit_flush->list);
861 memcpy(nfit_flush->flush, flush, sizeof_flush(flush)); 854 memcpy(nfit_flush->flush, flush, sizeof_flush(flush));
862 list_add_tail(&nfit_flush->list, &acpi_desc->flushes); 855 list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
863 dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__, 856 dev_dbg(dev, "nfit_flush handle: %d hint_count: %d\n",
864 flush->device_handle, flush->hint_count); 857 flush->device_handle, flush->hint_count);
865 return true; 858 return true;
866} 859}
@@ -873,7 +866,7 @@ static bool add_platform_cap(struct acpi_nfit_desc *acpi_desc,
873 866
874 mask = (1 << (pcap->highest_capability + 1)) - 1; 867 mask = (1 << (pcap->highest_capability + 1)) - 1;
875 acpi_desc->platform_cap = pcap->capabilities & mask; 868 acpi_desc->platform_cap = pcap->capabilities & mask;
876 dev_dbg(dev, "%s: cap: %#x\n", __func__, acpi_desc->platform_cap); 869 dev_dbg(dev, "cap: %#x\n", acpi_desc->platform_cap);
877 return true; 870 return true;
878} 871}
879 872
@@ -920,7 +913,7 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc,
920 return err; 913 return err;
921 break; 914 break;
922 case ACPI_NFIT_TYPE_SMBIOS: 915 case ACPI_NFIT_TYPE_SMBIOS:
923 dev_dbg(dev, "%s: smbios\n", __func__); 916 dev_dbg(dev, "smbios\n");
924 break; 917 break;
925 case ACPI_NFIT_TYPE_CAPABILITIES: 918 case ACPI_NFIT_TYPE_CAPABILITIES:
926 if (!add_platform_cap(acpi_desc, table)) 919 if (!add_platform_cap(acpi_desc, table))
@@ -1277,8 +1270,11 @@ static ssize_t scrub_show(struct device *dev,
1277 if (nd_desc) { 1270 if (nd_desc) {
1278 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); 1271 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
1279 1272
1273 mutex_lock(&acpi_desc->init_mutex);
1280 rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, 1274 rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
1281 (work_busy(&acpi_desc->work)) ? "+\n" : "\n"); 1275 work_busy(&acpi_desc->dwork.work)
1276 && !acpi_desc->cancel ? "+\n" : "\n");
1277 mutex_unlock(&acpi_desc->init_mutex);
1282 } 1278 }
1283 device_unlock(dev); 1279 device_unlock(dev);
1284 return rc; 1280 return rc;
@@ -1648,7 +1644,7 @@ void __acpi_nvdimm_notify(struct device *dev, u32 event)
1648 struct nfit_mem *nfit_mem; 1644 struct nfit_mem *nfit_mem;
1649 struct acpi_nfit_desc *acpi_desc; 1645 struct acpi_nfit_desc *acpi_desc;
1650 1646
1651 dev_dbg(dev->parent, "%s: %s: event: %d\n", dev_name(dev), __func__, 1647 dev_dbg(dev->parent, "%s: event: %d\n", dev_name(dev),
1652 event); 1648 event);
1653 1649
1654 if (event != NFIT_NOTIFY_DIMM_HEALTH) { 1650 if (event != NFIT_NOTIFY_DIMM_HEALTH) {
@@ -1681,12 +1677,23 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data)
1681 device_unlock(dev->parent); 1677 device_unlock(dev->parent);
1682} 1678}
1683 1679
1680static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
1681{
1682 acpi_handle handle;
1683 acpi_status status;
1684
1685 status = acpi_get_handle(adev->handle, method, &handle);
1686
1687 if (ACPI_SUCCESS(status))
1688 return true;
1689 return false;
1690}
1691
1684static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, 1692static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
1685 struct nfit_mem *nfit_mem, u32 device_handle) 1693 struct nfit_mem *nfit_mem, u32 device_handle)
1686{ 1694{
1687 struct acpi_device *adev, *adev_dimm; 1695 struct acpi_device *adev, *adev_dimm;
1688 struct device *dev = acpi_desc->dev; 1696 struct device *dev = acpi_desc->dev;
1689 union acpi_object *obj;
1690 unsigned long dsm_mask; 1697 unsigned long dsm_mask;
1691 const guid_t *guid; 1698 const guid_t *guid;
1692 int i; 1699 int i;
@@ -1759,25 +1766,15 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
1759 1ULL << i)) 1766 1ULL << i))
1760 set_bit(i, &nfit_mem->dsm_mask); 1767 set_bit(i, &nfit_mem->dsm_mask);
1761 1768
1762 obj = acpi_label_info(adev_dimm->handle); 1769 if (acpi_nvdimm_has_method(adev_dimm, "_LSI")
1763 if (obj) { 1770 && acpi_nvdimm_has_method(adev_dimm, "_LSR")) {
1764 ACPI_FREE(obj);
1765 nfit_mem->has_lsi = 1;
1766 dev_dbg(dev, "%s: has _LSI\n", dev_name(&adev_dimm->dev));
1767 }
1768
1769 obj = acpi_label_read(adev_dimm->handle, 0, 0);
1770 if (obj) {
1771 ACPI_FREE(obj);
1772 nfit_mem->has_lsr = 1;
1773 dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev)); 1771 dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev));
1772 nfit_mem->has_lsr = true;
1774 } 1773 }
1775 1774
1776 obj = acpi_label_write(adev_dimm->handle, 0, 0, NULL); 1775 if (nfit_mem->has_lsr && acpi_nvdimm_has_method(adev_dimm, "_LSW")) {
1777 if (obj) {
1778 ACPI_FREE(obj);
1779 nfit_mem->has_lsw = 1;
1780 dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev)); 1776 dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev));
1777 nfit_mem->has_lsw = true;
1781 } 1778 }
1782 1779
1783 return 0; 1780 return 0;
@@ -1866,10 +1863,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
1866 cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK; 1863 cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK;
1867 } 1864 }
1868 1865
1869 if (nfit_mem->has_lsi) 1866 if (nfit_mem->has_lsr) {
1870 set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); 1867 set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
1871 if (nfit_mem->has_lsr)
1872 set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); 1868 set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
1869 }
1873 if (nfit_mem->has_lsw) 1870 if (nfit_mem->has_lsw)
1874 set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask); 1871 set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
1875 1872
@@ -2365,7 +2362,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
2365 nvdimm = nd_blk_region_to_dimm(ndbr); 2362 nvdimm = nd_blk_region_to_dimm(ndbr);
2366 nfit_mem = nvdimm_provider_data(nvdimm); 2363 nfit_mem = nvdimm_provider_data(nvdimm);
2367 if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) { 2364 if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
2368 dev_dbg(dev, "%s: missing%s%s%s\n", __func__, 2365 dev_dbg(dev, "missing%s%s%s\n",
2369 nfit_mem ? "" : " nfit_mem", 2366 nfit_mem ? "" : " nfit_mem",
2370 (nfit_mem && nfit_mem->dcr) ? "" : " dcr", 2367 (nfit_mem && nfit_mem->dcr) ? "" : " dcr",
2371 (nfit_mem && nfit_mem->bdw) ? "" : " bdw"); 2368 (nfit_mem && nfit_mem->bdw) ? "" : " bdw");
@@ -2384,7 +2381,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
2384 mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address, 2381 mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
2385 nfit_mem->spa_bdw->length, nd_blk_memremap_flags(ndbr)); 2382 nfit_mem->spa_bdw->length, nd_blk_memremap_flags(ndbr));
2386 if (!mmio->addr.base) { 2383 if (!mmio->addr.base) {
2387 dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, 2384 dev_dbg(dev, "%s failed to map bdw\n",
2388 nvdimm_name(nvdimm)); 2385 nvdimm_name(nvdimm));
2389 return -ENOMEM; 2386 return -ENOMEM;
2390 } 2387 }
@@ -2395,8 +2392,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
2395 rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw, 2392 rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
2396 nfit_mem->memdev_bdw->interleave_ways); 2393 nfit_mem->memdev_bdw->interleave_ways);
2397 if (rc) { 2394 if (rc) {
2398 dev_dbg(dev, "%s: %s failed to init bdw interleave\n", 2395 dev_dbg(dev, "%s failed to init bdw interleave\n",
2399 __func__, nvdimm_name(nvdimm)); 2396 nvdimm_name(nvdimm));
2400 return rc; 2397 return rc;
2401 } 2398 }
2402 2399
@@ -2407,7 +2404,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
2407 mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address, 2404 mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address,
2408 nfit_mem->spa_dcr->length); 2405 nfit_mem->spa_dcr->length);
2409 if (!mmio->addr.base) { 2406 if (!mmio->addr.base) {
2410 dev_dbg(dev, "%s: %s failed to map dcr\n", __func__, 2407 dev_dbg(dev, "%s failed to map dcr\n",
2411 nvdimm_name(nvdimm)); 2408 nvdimm_name(nvdimm));
2412 return -ENOMEM; 2409 return -ENOMEM;
2413 } 2410 }
@@ -2418,15 +2415,15 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
2418 rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr, 2415 rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
2419 nfit_mem->memdev_dcr->interleave_ways); 2416 nfit_mem->memdev_dcr->interleave_ways);
2420 if (rc) { 2417 if (rc) {
2421 dev_dbg(dev, "%s: %s failed to init dcr interleave\n", 2418 dev_dbg(dev, "%s failed to init dcr interleave\n",
2422 __func__, nvdimm_name(nvdimm)); 2419 nvdimm_name(nvdimm));
2423 return rc; 2420 return rc;
2424 } 2421 }
2425 2422
2426 rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk); 2423 rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk);
2427 if (rc < 0) { 2424 if (rc < 0) {
2428 dev_dbg(dev, "%s: %s failed get DIMM flags\n", 2425 dev_dbg(dev, "%s failed get DIMM flags\n",
2429 __func__, nvdimm_name(nvdimm)); 2426 nvdimm_name(nvdimm));
2430 return rc; 2427 return rc;
2431 } 2428 }
2432 2429
@@ -2476,7 +2473,8 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa
2476 memset(&ars_start, 0, sizeof(ars_start)); 2473 memset(&ars_start, 0, sizeof(ars_start));
2477 ars_start.address = spa->address; 2474 ars_start.address = spa->address;
2478 ars_start.length = spa->length; 2475 ars_start.length = spa->length;
2479 ars_start.flags = acpi_desc->ars_start_flags; 2476 if (test_bit(ARS_SHORT, &nfit_spa->ars_state))
2477 ars_start.flags = ND_ARS_RETURN_PREV_DATA;
2480 if (nfit_spa_type(spa) == NFIT_SPA_PM) 2478 if (nfit_spa_type(spa) == NFIT_SPA_PM)
2481 ars_start.type = ND_ARS_PERSISTENT; 2479 ars_start.type = ND_ARS_PERSISTENT;
2482 else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) 2480 else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
@@ -2518,16 +2516,62 @@ static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
2518 int rc, cmd_rc; 2516 int rc, cmd_rc;
2519 2517
2520 rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status, 2518 rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status,
2521 acpi_desc->ars_status_size, &cmd_rc); 2519 acpi_desc->max_ars, &cmd_rc);
2522 if (rc < 0) 2520 if (rc < 0)
2523 return rc; 2521 return rc;
2524 return cmd_rc; 2522 return cmd_rc;
2525} 2523}
2526 2524
2527static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc, 2525static void ars_complete(struct acpi_nfit_desc *acpi_desc,
2528 struct nd_cmd_ars_status *ars_status) 2526 struct nfit_spa *nfit_spa)
2527{
2528 struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
2529 struct acpi_nfit_system_address *spa = nfit_spa->spa;
2530 struct nd_region *nd_region = nfit_spa->nd_region;
2531 struct device *dev;
2532
2533 if ((ars_status->address >= spa->address && ars_status->address
2534 < spa->address + spa->length)
2535 || (ars_status->address < spa->address)) {
2536 /*
2537 * Assume that if a scrub starts at an offset from the
2538 * start of nfit_spa that we are in the continuation
2539 * case.
2540 *
2541 * Otherwise, if the scrub covers the spa range, mark
2542 * any pending request complete.
2543 */
2544 if (ars_status->address + ars_status->length
2545 >= spa->address + spa->length)
2546 /* complete */;
2547 else
2548 return;
2549 } else
2550 return;
2551
2552 if (test_bit(ARS_DONE, &nfit_spa->ars_state))
2553 return;
2554
2555 if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state))
2556 return;
2557
2558 if (nd_region) {
2559 dev = nd_region_dev(nd_region);
2560 nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON);
2561 } else
2562 dev = acpi_desc->dev;
2563
2564 dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index,
2565 test_bit(ARS_SHORT, &nfit_spa->ars_state)
2566 ? "short" : "long");
2567 clear_bit(ARS_SHORT, &nfit_spa->ars_state);
2568 set_bit(ARS_DONE, &nfit_spa->ars_state);
2569}
2570
2571static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
2529{ 2572{
2530 struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus; 2573 struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
2574 struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
2531 int rc; 2575 int rc;
2532 u32 i; 2576 u32 i;
2533 2577
@@ -2606,7 +2650,7 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
2606 struct acpi_nfit_system_address *spa = nfit_spa->spa; 2650 struct acpi_nfit_system_address *spa = nfit_spa->spa;
2607 struct nd_blk_region_desc *ndbr_desc; 2651 struct nd_blk_region_desc *ndbr_desc;
2608 struct nfit_mem *nfit_mem; 2652 struct nfit_mem *nfit_mem;
2609 int blk_valid = 0, rc; 2653 int rc;
2610 2654
2611 if (!nvdimm) { 2655 if (!nvdimm) {
2612 dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n", 2656 dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
@@ -2626,15 +2670,14 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
2626 if (!nfit_mem || !nfit_mem->bdw) { 2670 if (!nfit_mem || !nfit_mem->bdw) {
2627 dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n", 2671 dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
2628 spa->range_index, nvdimm_name(nvdimm)); 2672 spa->range_index, nvdimm_name(nvdimm));
2629 } else { 2673 break;
2630 mapping->size = nfit_mem->bdw->capacity;
2631 mapping->start = nfit_mem->bdw->start_address;
2632 ndr_desc->num_lanes = nfit_mem->bdw->windows;
2633 blk_valid = 1;
2634 } 2674 }
2635 2675
2676 mapping->size = nfit_mem->bdw->capacity;
2677 mapping->start = nfit_mem->bdw->start_address;
2678 ndr_desc->num_lanes = nfit_mem->bdw->windows;
2636 ndr_desc->mapping = mapping; 2679 ndr_desc->mapping = mapping;
2637 ndr_desc->num_mappings = blk_valid; 2680 ndr_desc->num_mappings = 1;
2638 ndbr_desc = to_blk_region_desc(ndr_desc); 2681 ndbr_desc = to_blk_region_desc(ndr_desc);
2639 ndbr_desc->enable = acpi_nfit_blk_region_enable; 2682 ndbr_desc->enable = acpi_nfit_blk_region_enable;
2640 ndbr_desc->do_io = acpi_desc->blk_do_io; 2683 ndbr_desc->do_io = acpi_desc->blk_do_io;
@@ -2682,8 +2725,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
2682 return 0; 2725 return 0;
2683 2726
2684 if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) { 2727 if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) {
2685 dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n", 2728 dev_dbg(acpi_desc->dev, "detected invalid spa index\n");
2686 __func__);
2687 return 0; 2729 return 0;
2688 } 2730 }
2689 2731
@@ -2769,301 +2811,243 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
2769 return rc; 2811 return rc;
2770} 2812}
2771 2813
2772static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc, 2814static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc)
2773 u32 max_ars)
2774{ 2815{
2775 struct device *dev = acpi_desc->dev; 2816 struct device *dev = acpi_desc->dev;
2776 struct nd_cmd_ars_status *ars_status; 2817 struct nd_cmd_ars_status *ars_status;
2777 2818
2778 if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) { 2819 if (acpi_desc->ars_status) {
2779 memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size); 2820 memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
2780 return 0; 2821 return 0;
2781 } 2822 }
2782 2823
2783 if (acpi_desc->ars_status) 2824 ars_status = devm_kzalloc(dev, acpi_desc->max_ars, GFP_KERNEL);
2784 devm_kfree(dev, acpi_desc->ars_status);
2785 acpi_desc->ars_status = NULL;
2786 ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL);
2787 if (!ars_status) 2825 if (!ars_status)
2788 return -ENOMEM; 2826 return -ENOMEM;
2789 acpi_desc->ars_status = ars_status; 2827 acpi_desc->ars_status = ars_status;
2790 acpi_desc->ars_status_size = max_ars;
2791 return 0; 2828 return 0;
2792} 2829}
2793 2830
2794static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc, 2831static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc)
2795 struct nfit_spa *nfit_spa)
2796{ 2832{
2797 struct acpi_nfit_system_address *spa = nfit_spa->spa;
2798 int rc; 2833 int rc;
2799 2834
2800 if (!nfit_spa->max_ars) { 2835 if (ars_status_alloc(acpi_desc))
2801 struct nd_cmd_ars_cap ars_cap;
2802
2803 memset(&ars_cap, 0, sizeof(ars_cap));
2804 rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
2805 if (rc < 0)
2806 return rc;
2807 nfit_spa->max_ars = ars_cap.max_ars_out;
2808 nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
2809 /* check that the supported scrub types match the spa type */
2810 if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE &&
2811 ((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0)
2812 return -ENOTTY;
2813 else if (nfit_spa_type(spa) == NFIT_SPA_PM &&
2814 ((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0)
2815 return -ENOTTY;
2816 }
2817
2818 if (ars_status_alloc(acpi_desc, nfit_spa->max_ars))
2819 return -ENOMEM; 2836 return -ENOMEM;
2820 2837
2821 rc = ars_get_status(acpi_desc); 2838 rc = ars_get_status(acpi_desc);
2839
2822 if (rc < 0 && rc != -ENOSPC) 2840 if (rc < 0 && rc != -ENOSPC)
2823 return rc; 2841 return rc;
2824 2842
2825 if (ars_status_process_records(acpi_desc, acpi_desc->ars_status)) 2843 if (ars_status_process_records(acpi_desc))
2826 return -ENOMEM; 2844 return -ENOMEM;
2827 2845
2828 return 0; 2846 return 0;
2829} 2847}
2830 2848
2831static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc, 2849static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa,
2832 struct nfit_spa *nfit_spa) 2850 int *query_rc)
2833{ 2851{
2834 struct acpi_nfit_system_address *spa = nfit_spa->spa; 2852 int rc = *query_rc;
2835 unsigned int overflow_retry = scrub_overflow_abort;
2836 u64 init_ars_start = 0, init_ars_len = 0;
2837 struct device *dev = acpi_desc->dev;
2838 unsigned int tmo = scrub_timeout;
2839 int rc;
2840 2853
2841 if (!nfit_spa->ars_required || !nfit_spa->nd_region) 2854 if (no_init_ars)
2842 return; 2855 return acpi_nfit_register_region(acpi_desc, nfit_spa);
2843 2856
2844 rc = ars_start(acpi_desc, nfit_spa); 2857 set_bit(ARS_REQ, &nfit_spa->ars_state);
2845 /* 2858 set_bit(ARS_SHORT, &nfit_spa->ars_state);
2846 * If we timed out the initial scan we'll still be busy here,
2847 * and will wait another timeout before giving up permanently.
2848 */
2849 if (rc < 0 && rc != -EBUSY)
2850 return;
2851
2852 do {
2853 u64 ars_start, ars_len;
2854
2855 if (acpi_desc->cancel)
2856 break;
2857 rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
2858 if (rc == -ENOTTY)
2859 break;
2860 if (rc == -EBUSY && !tmo) {
2861 dev_warn(dev, "range %d ars timeout, aborting\n",
2862 spa->range_index);
2863 break;
2864 }
2865 2859
2860 switch (rc) {
2861 case 0:
2862 case -EAGAIN:
2863 rc = ars_start(acpi_desc, nfit_spa);
2866 if (rc == -EBUSY) { 2864 if (rc == -EBUSY) {
2867 /* 2865 *query_rc = rc;
2868 * Note, entries may be appended to the list
2869 * while the lock is dropped, but the workqueue
2870 * being active prevents entries being deleted /
2871 * freed.
2872 */
2873 mutex_unlock(&acpi_desc->init_mutex);
2874 ssleep(1);
2875 tmo--;
2876 mutex_lock(&acpi_desc->init_mutex);
2877 continue;
2878 }
2879
2880 /* we got some results, but there are more pending... */
2881 if (rc == -ENOSPC && overflow_retry--) {
2882 if (!init_ars_len) {
2883 init_ars_len = acpi_desc->ars_status->length;
2884 init_ars_start = acpi_desc->ars_status->address;
2885 }
2886 rc = ars_continue(acpi_desc);
2887 }
2888
2889 if (rc < 0) {
2890 dev_warn(dev, "range %d ars continuation failed\n",
2891 spa->range_index);
2892 break; 2866 break;
2893 } 2867 } else if (rc == 0) {
2894 2868 rc = acpi_nfit_query_poison(acpi_desc);
2895 if (init_ars_len) {
2896 ars_start = init_ars_start;
2897 ars_len = init_ars_len;
2898 } else { 2869 } else {
2899 ars_start = acpi_desc->ars_status->address; 2870 set_bit(ARS_FAILED, &nfit_spa->ars_state);
2900 ars_len = acpi_desc->ars_status->length; 2871 break;
2901 } 2872 }
2902 dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n", 2873 if (rc == -EAGAIN)
2903 spa->range_index, ars_start, ars_len); 2874 clear_bit(ARS_SHORT, &nfit_spa->ars_state);
2904 /* notify the region about new poison entries */ 2875 else if (rc == 0)
2905 nvdimm_region_notify(nfit_spa->nd_region, 2876 ars_complete(acpi_desc, nfit_spa);
2906 NVDIMM_REVALIDATE_POISON);
2907 break; 2877 break;
2908 } while (1); 2878 case -EBUSY:
2879 case -ENOSPC:
2880 break;
2881 default:
2882 set_bit(ARS_FAILED, &nfit_spa->ars_state);
2883 break;
2884 }
2885
2886 if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state))
2887 set_bit(ARS_REQ, &nfit_spa->ars_state);
2888
2889 return acpi_nfit_register_region(acpi_desc, nfit_spa);
2909} 2890}
2910 2891
2911static void acpi_nfit_scrub(struct work_struct *work) 2892static void ars_complete_all(struct acpi_nfit_desc *acpi_desc)
2912{ 2893{
2913 struct device *dev;
2914 u64 init_scrub_length = 0;
2915 struct nfit_spa *nfit_spa; 2894 struct nfit_spa *nfit_spa;
2916 u64 init_scrub_address = 0;
2917 bool init_ars_done = false;
2918 struct acpi_nfit_desc *acpi_desc;
2919 unsigned int tmo = scrub_timeout;
2920 unsigned int overflow_retry = scrub_overflow_abort;
2921
2922 acpi_desc = container_of(work, typeof(*acpi_desc), work);
2923 dev = acpi_desc->dev;
2924
2925 /*
2926 * We scrub in 2 phases. The first phase waits for any platform
2927 * firmware initiated scrubs to complete and then we go search for the
2928 * affected spa regions to mark them scanned. In the second phase we
2929 * initiate a directed scrub for every range that was not scrubbed in
2930 * phase 1. If we're called for a 'rescan', we harmlessly pass through
2931 * the first phase, but really only care about running phase 2, where
2932 * regions can be notified of new poison.
2933 */
2934 2895
2935 /* process platform firmware initiated scrubs */
2936 retry:
2937 mutex_lock(&acpi_desc->init_mutex);
2938 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 2896 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
2939 struct nd_cmd_ars_status *ars_status; 2897 if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
2940 struct acpi_nfit_system_address *spa;
2941 u64 ars_start, ars_len;
2942 int rc;
2943
2944 if (acpi_desc->cancel)
2945 break;
2946
2947 if (nfit_spa->nd_region)
2948 continue; 2898 continue;
2899 ars_complete(acpi_desc, nfit_spa);
2900 }
2901}
2949 2902
2950 if (init_ars_done) { 2903static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
2951 /* 2904 int query_rc)
2952 * No need to re-query, we're now just 2905{
2953 * reconciling all the ranges covered by the 2906 unsigned int tmo = acpi_desc->scrub_tmo;
2954 * initial scrub 2907 struct device *dev = acpi_desc->dev;
2955 */ 2908 struct nfit_spa *nfit_spa;
2956 rc = 0;
2957 } else
2958 rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
2959
2960 if (rc == -ENOTTY) {
2961 /* no ars capability, just register spa and move on */
2962 acpi_nfit_register_region(acpi_desc, nfit_spa);
2963 continue;
2964 }
2965
2966 if (rc == -EBUSY && !tmo) {
2967 /* fallthrough to directed scrub in phase 2 */
2968 dev_warn(dev, "timeout awaiting ars results, continuing...\n");
2969 break;
2970 } else if (rc == -EBUSY) {
2971 mutex_unlock(&acpi_desc->init_mutex);
2972 ssleep(1);
2973 tmo--;
2974 goto retry;
2975 }
2976
2977 /* we got some results, but there are more pending... */
2978 if (rc == -ENOSPC && overflow_retry--) {
2979 ars_status = acpi_desc->ars_status;
2980 /*
2981 * Record the original scrub range, so that we
2982 * can recall all the ranges impacted by the
2983 * initial scrub.
2984 */
2985 if (!init_scrub_length) {
2986 init_scrub_length = ars_status->length;
2987 init_scrub_address = ars_status->address;
2988 }
2989 rc = ars_continue(acpi_desc);
2990 if (rc == 0) {
2991 mutex_unlock(&acpi_desc->init_mutex);
2992 goto retry;
2993 }
2994 }
2995 2909
2996 if (rc < 0) { 2910 if (acpi_desc->cancel)
2997 /* 2911 return 0;
2998 * Initial scrub failed, we'll give it one more
2999 * try below...
3000 */
3001 break;
3002 }
3003 2912
3004 /* We got some final results, record completed ranges */ 2913 if (query_rc == -EBUSY) {
3005 ars_status = acpi_desc->ars_status; 2914 dev_dbg(dev, "ARS: ARS busy\n");
3006 if (init_scrub_length) { 2915 return min(30U * 60U, tmo * 2);
3007 ars_start = init_scrub_address; 2916 }
3008 ars_len = ars_start + init_scrub_length; 2917 if (query_rc == -ENOSPC) {
3009 } else { 2918 dev_dbg(dev, "ARS: ARS continue\n");
3010 ars_start = ars_status->address; 2919 ars_continue(acpi_desc);
3011 ars_len = ars_status->length; 2920 return 1;
3012 } 2921 }
3013 spa = nfit_spa->spa; 2922 if (query_rc && query_rc != -EAGAIN) {
2923 unsigned long long addr, end;
3014 2924
3015 if (!init_ars_done) { 2925 addr = acpi_desc->ars_status->address;
3016 init_ars_done = true; 2926 end = addr + acpi_desc->ars_status->length;
3017 dev_dbg(dev, "init scrub %#llx + %#llx complete\n", 2927 dev_dbg(dev, "ARS: %llx-%llx failed (%d)\n", addr, end,
3018 ars_start, ars_len); 2928 query_rc);
3019 }
3020 if (ars_start <= spa->address && ars_start + ars_len
3021 >= spa->address + spa->length)
3022 acpi_nfit_register_region(acpi_desc, nfit_spa);
3023 } 2929 }
3024 2930
3025 /* 2931 ars_complete_all(acpi_desc);
3026 * For all the ranges not covered by an initial scrub we still
3027 * want to see if there are errors, but it's ok to discover them
3028 * asynchronously.
3029 */
3030 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 2932 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
3031 /* 2933 if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
3032 * Flag all the ranges that still need scrubbing, but 2934 continue;
3033 * register them now to make data available. 2935 if (test_bit(ARS_REQ, &nfit_spa->ars_state)) {
3034 */ 2936 int rc = ars_start(acpi_desc, nfit_spa);
3035 if (!nfit_spa->nd_region) { 2937
3036 nfit_spa->ars_required = 1; 2938 clear_bit(ARS_DONE, &nfit_spa->ars_state);
3037 acpi_nfit_register_region(acpi_desc, nfit_spa); 2939 dev = nd_region_dev(nfit_spa->nd_region);
2940 dev_dbg(dev, "ARS: range %d ARS start (%d)\n",
2941 nfit_spa->spa->range_index, rc);
2942 if (rc == 0 || rc == -EBUSY)
2943 return 1;
2944 dev_err(dev, "ARS: range %d ARS failed (%d)\n",
2945 nfit_spa->spa->range_index, rc);
2946 set_bit(ARS_FAILED, &nfit_spa->ars_state);
3038 } 2947 }
3039 } 2948 }
3040 acpi_desc->init_complete = 1; 2949 return 0;
2950}
3041 2951
3042 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) 2952static void acpi_nfit_scrub(struct work_struct *work)
3043 acpi_nfit_async_scrub(acpi_desc, nfit_spa); 2953{
3044 acpi_desc->scrub_count++; 2954 struct acpi_nfit_desc *acpi_desc;
3045 acpi_desc->ars_start_flags = 0; 2955 unsigned int tmo;
3046 if (acpi_desc->scrub_count_state) 2956 int query_rc;
3047 sysfs_notify_dirent(acpi_desc->scrub_count_state); 2957
2958 acpi_desc = container_of(work, typeof(*acpi_desc), dwork.work);
2959 mutex_lock(&acpi_desc->init_mutex);
2960 query_rc = acpi_nfit_query_poison(acpi_desc);
2961 tmo = __acpi_nfit_scrub(acpi_desc, query_rc);
2962 if (tmo) {
2963 queue_delayed_work(nfit_wq, &acpi_desc->dwork, tmo * HZ);
2964 acpi_desc->scrub_tmo = tmo;
2965 } else {
2966 acpi_desc->scrub_count++;
2967 if (acpi_desc->scrub_count_state)
2968 sysfs_notify_dirent(acpi_desc->scrub_count_state);
2969 }
2970 memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
3048 mutex_unlock(&acpi_desc->init_mutex); 2971 mutex_unlock(&acpi_desc->init_mutex);
3049} 2972}
3050 2973
2974static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc,
2975 struct nfit_spa *nfit_spa)
2976{
2977 int type = nfit_spa_type(nfit_spa->spa);
2978 struct nd_cmd_ars_cap ars_cap;
2979 int rc;
2980
2981 memset(&ars_cap, 0, sizeof(ars_cap));
2982 rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
2983 if (rc < 0)
2984 return;
2985 /* check that the supported scrub types match the spa type */
2986 if (type == NFIT_SPA_VOLATILE && ((ars_cap.status >> 16)
2987 & ND_ARS_VOLATILE) == 0)
2988 return;
2989 if (type == NFIT_SPA_PM && ((ars_cap.status >> 16)
2990 & ND_ARS_PERSISTENT) == 0)
2991 return;
2992
2993 nfit_spa->max_ars = ars_cap.max_ars_out;
2994 nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
2995 acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars);
2996 clear_bit(ARS_FAILED, &nfit_spa->ars_state);
2997 set_bit(ARS_REQ, &nfit_spa->ars_state);
2998}
2999
3051static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) 3000static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
3052{ 3001{
3053 struct nfit_spa *nfit_spa; 3002 struct nfit_spa *nfit_spa;
3054 int rc; 3003 int rc, query_rc;
3004
3005 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
3006 set_bit(ARS_FAILED, &nfit_spa->ars_state);
3007 switch (nfit_spa_type(nfit_spa->spa)) {
3008 case NFIT_SPA_VOLATILE:
3009 case NFIT_SPA_PM:
3010 acpi_nfit_init_ars(acpi_desc, nfit_spa);
3011 break;
3012 }
3013 }
3014
3015 /*
3016 * Reap any results that might be pending before starting new
3017 * short requests.
3018 */
3019 query_rc = acpi_nfit_query_poison(acpi_desc);
3020 if (query_rc == 0)
3021 ars_complete_all(acpi_desc);
3055 3022
3056 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) 3023 list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
3057 if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) { 3024 switch (nfit_spa_type(nfit_spa->spa)) {
3058 /* BLK regions don't need to wait for ars results */ 3025 case NFIT_SPA_VOLATILE:
3026 case NFIT_SPA_PM:
3027 /* register regions and kick off initial ARS run */
3028 rc = ars_register(acpi_desc, nfit_spa, &query_rc);
3029 if (rc)
3030 return rc;
3031 break;
3032 case NFIT_SPA_BDW:
3033 /* nothing to register */
3034 break;
3035 case NFIT_SPA_DCR:
3036 case NFIT_SPA_VDISK:
3037 case NFIT_SPA_VCD:
3038 case NFIT_SPA_PDISK:
3039 case NFIT_SPA_PCD:
3040 /* register known regions that don't support ARS */
3059 rc = acpi_nfit_register_region(acpi_desc, nfit_spa); 3041 rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
3060 if (rc) 3042 if (rc)
3061 return rc; 3043 return rc;
3044 break;
3045 default:
3046 /* don't register unknown regions */
3047 break;
3062 } 3048 }
3063 3049
3064 acpi_desc->ars_start_flags = 0; 3050 queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
3065 if (!acpi_desc->cancel)
3066 queue_work(nfit_wq, &acpi_desc->work);
3067 return 0; 3051 return 0;
3068} 3052}
3069 3053
@@ -3173,8 +3157,7 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
3173 data = add_table(acpi_desc, &prev, data, end); 3157 data = add_table(acpi_desc, &prev, data, end);
3174 3158
3175 if (IS_ERR(data)) { 3159 if (IS_ERR(data)) {
3176 dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__, 3160 dev_dbg(dev, "nfit table parsing error: %ld\n", PTR_ERR(data));
3177 PTR_ERR(data));
3178 rc = PTR_ERR(data); 3161 rc = PTR_ERR(data);
3179 goto out_unlock; 3162 goto out_unlock;
3180 } 3163 }
@@ -3199,49 +3182,20 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
3199} 3182}
3200EXPORT_SYMBOL_GPL(acpi_nfit_init); 3183EXPORT_SYMBOL_GPL(acpi_nfit_init);
3201 3184
3202struct acpi_nfit_flush_work {
3203 struct work_struct work;
3204 struct completion cmp;
3205};
3206
3207static void flush_probe(struct work_struct *work)
3208{
3209 struct acpi_nfit_flush_work *flush;
3210
3211 flush = container_of(work, typeof(*flush), work);
3212 complete(&flush->cmp);
3213}
3214
3215static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) 3185static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
3216{ 3186{
3217 struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); 3187 struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
3218 struct device *dev = acpi_desc->dev; 3188 struct device *dev = acpi_desc->dev;
3219 struct acpi_nfit_flush_work flush;
3220 int rc;
3221 3189
3222 /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ 3190 /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
3223 device_lock(dev); 3191 device_lock(dev);
3224 device_unlock(dev); 3192 device_unlock(dev);
3225 3193
3226 /* bounce the init_mutex to make init_complete valid */ 3194 /* Bounce the init_mutex to complete initial registration */
3227 mutex_lock(&acpi_desc->init_mutex); 3195 mutex_lock(&acpi_desc->init_mutex);
3228 if (acpi_desc->cancel || acpi_desc->init_complete) {
3229 mutex_unlock(&acpi_desc->init_mutex);
3230 return 0;
3231 }
3232
3233 /*
3234 * Scrub work could take 10s of seconds, userspace may give up so we
3235 * need to be interruptible while waiting.
3236 */
3237 INIT_WORK_ONSTACK(&flush.work, flush_probe);
3238 init_completion(&flush.cmp);
3239 queue_work(nfit_wq, &flush.work);
3240 mutex_unlock(&acpi_desc->init_mutex); 3196 mutex_unlock(&acpi_desc->init_mutex);
3241 3197
3242 rc = wait_for_completion_interruptible(&flush.cmp); 3198 return 0;
3243 cancel_work_sync(&flush.work);
3244 return rc;
3245} 3199}
3246 3200
3247static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, 3201static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
@@ -3260,20 +3214,18 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
3260 * just needs guarantees that any ars it initiates are not 3214 * just needs guarantees that any ars it initiates are not
3261 * interrupted by any intervening start reqeusts from userspace. 3215 * interrupted by any intervening start reqeusts from userspace.
3262 */ 3216 */
3263 if (work_busy(&acpi_desc->work)) 3217 if (work_busy(&acpi_desc->dwork.work))
3264 return -EBUSY; 3218 return -EBUSY;
3265 3219
3266 return 0; 3220 return 0;
3267} 3221}
3268 3222
3269int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags) 3223int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
3270{ 3224{
3271 struct device *dev = acpi_desc->dev; 3225 struct device *dev = acpi_desc->dev;
3226 int scheduled = 0, busy = 0;
3272 struct nfit_spa *nfit_spa; 3227 struct nfit_spa *nfit_spa;
3273 3228
3274 if (work_busy(&acpi_desc->work))
3275 return -EBUSY;
3276
3277 mutex_lock(&acpi_desc->init_mutex); 3229 mutex_lock(&acpi_desc->init_mutex);
3278 if (acpi_desc->cancel) { 3230 if (acpi_desc->cancel) {
3279 mutex_unlock(&acpi_desc->init_mutex); 3231 mutex_unlock(&acpi_desc->init_mutex);
@@ -3281,19 +3233,32 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags)
3281 } 3233 }
3282 3234
3283 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 3235 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
3284 struct acpi_nfit_system_address *spa = nfit_spa->spa; 3236 int type = nfit_spa_type(nfit_spa->spa);
3285 3237
3286 if (nfit_spa_type(spa) != NFIT_SPA_PM) 3238 if (type != NFIT_SPA_PM && type != NFIT_SPA_VOLATILE)
3239 continue;
3240 if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
3287 continue; 3241 continue;
3288 3242
3289 nfit_spa->ars_required = 1; 3243 if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state))
3244 busy++;
3245 else {
3246 if (test_bit(ARS_SHORT, &flags))
3247 set_bit(ARS_SHORT, &nfit_spa->ars_state);
3248 scheduled++;
3249 }
3250 }
3251 if (scheduled) {
3252 queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
3253 dev_dbg(dev, "ars_scan triggered\n");
3290 } 3254 }
3291 acpi_desc->ars_start_flags = flags;
3292 queue_work(nfit_wq, &acpi_desc->work);
3293 dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
3294 mutex_unlock(&acpi_desc->init_mutex); 3255 mutex_unlock(&acpi_desc->init_mutex);
3295 3256
3296 return 0; 3257 if (scheduled)
3258 return 0;
3259 if (busy)
3260 return -EBUSY;
3261 return -ENOTTY;
3297} 3262}
3298 3263
3299void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) 3264void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
@@ -3320,7 +3285,8 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
3320 INIT_LIST_HEAD(&acpi_desc->dimms); 3285 INIT_LIST_HEAD(&acpi_desc->dimms);
3321 INIT_LIST_HEAD(&acpi_desc->list); 3286 INIT_LIST_HEAD(&acpi_desc->list);
3322 mutex_init(&acpi_desc->init_mutex); 3287 mutex_init(&acpi_desc->init_mutex);
3323 INIT_WORK(&acpi_desc->work, acpi_nfit_scrub); 3288 acpi_desc->scrub_tmo = 1;
3289 INIT_DELAYED_WORK(&acpi_desc->dwork, acpi_nfit_scrub);
3324} 3290}
3325EXPORT_SYMBOL_GPL(acpi_nfit_desc_init); 3291EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
3326 3292
@@ -3344,6 +3310,7 @@ void acpi_nfit_shutdown(void *data)
3344 3310
3345 mutex_lock(&acpi_desc->init_mutex); 3311 mutex_lock(&acpi_desc->init_mutex);
3346 acpi_desc->cancel = 1; 3312 acpi_desc->cancel = 1;
3313 cancel_delayed_work_sync(&acpi_desc->dwork);
3347 mutex_unlock(&acpi_desc->init_mutex); 3314 mutex_unlock(&acpi_desc->init_mutex);
3348 3315
3349 /* 3316 /*
@@ -3397,8 +3364,8 @@ static int acpi_nfit_add(struct acpi_device *adev)
3397 rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer, 3364 rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
3398 obj->buffer.length); 3365 obj->buffer.length);
3399 else 3366 else
3400 dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n", 3367 dev_dbg(dev, "invalid type %d, ignoring _FIT\n",
3401 __func__, (int) obj->type); 3368 (int) obj->type);
3402 kfree(buf.pointer); 3369 kfree(buf.pointer);
3403 } else 3370 } else
3404 /* skip over the lead-in header table */ 3371 /* skip over the lead-in header table */
@@ -3427,7 +3394,7 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
3427 3394
3428 if (!dev->driver) { 3395 if (!dev->driver) {
3429 /* dev->driver may be null if we're being removed */ 3396 /* dev->driver may be null if we're being removed */
3430 dev_dbg(dev, "%s: no driver found for dev\n", __func__); 3397 dev_dbg(dev, "no driver found for dev\n");
3431 return; 3398 return;
3432 } 3399 }
3433 3400
@@ -3465,15 +3432,15 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
3465static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle) 3432static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle)
3466{ 3433{
3467 struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev); 3434 struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
3468 u8 flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ? 3435 unsigned long flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
3469 0 : ND_ARS_RETURN_PREV_DATA; 3436 0 : 1 << ARS_SHORT;
3470 3437
3471 acpi_nfit_ars_rescan(acpi_desc, flags); 3438 acpi_nfit_ars_rescan(acpi_desc, flags);
3472} 3439}
3473 3440
3474void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) 3441void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
3475{ 3442{
3476 dev_dbg(dev, "%s: event: 0x%x\n", __func__, event); 3443 dev_dbg(dev, "event: 0x%x\n", event);
3477 3444
3478 switch (event) { 3445 switch (event) {
3479 case NFIT_NOTIFY_UPDATE: 3446 case NFIT_NOTIFY_UPDATE:
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index b92921439657..e9626bf6ca29 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -51,9 +51,8 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
51 if ((spa->address + spa->length - 1) < mce->addr) 51 if ((spa->address + spa->length - 1) < mce->addr)
52 continue; 52 continue;
53 found_match = 1; 53 found_match = 1;
54 dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n", 54 dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n",
55 __func__, spa->range_index, spa->address, 55 spa->range_index, spa->address, spa->length);
56 spa->length);
57 /* 56 /*
58 * We can break at the first match because we're going 57 * We can break at the first match because we're going
59 * to rescan all the SPA ranges. There shouldn't be any 58 * to rescan all the SPA ranges. There shouldn't be any
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 50d36e166d70..7d15856a739f 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -117,10 +117,17 @@ enum nfit_dimm_notifiers {
117 NFIT_NOTIFY_DIMM_HEALTH = 0x81, 117 NFIT_NOTIFY_DIMM_HEALTH = 0x81,
118}; 118};
119 119
120enum nfit_ars_state {
121 ARS_REQ,
122 ARS_DONE,
123 ARS_SHORT,
124 ARS_FAILED,
125};
126
120struct nfit_spa { 127struct nfit_spa {
121 struct list_head list; 128 struct list_head list;
122 struct nd_region *nd_region; 129 struct nd_region *nd_region;
123 unsigned int ars_required:1; 130 unsigned long ars_state;
124 u32 clear_err_unit; 131 u32 clear_err_unit;
125 u32 max_ars; 132 u32 max_ars;
126 struct acpi_nfit_system_address spa[0]; 133 struct acpi_nfit_system_address spa[0];
@@ -171,9 +178,8 @@ struct nfit_mem {
171 struct resource *flush_wpq; 178 struct resource *flush_wpq;
172 unsigned long dsm_mask; 179 unsigned long dsm_mask;
173 int family; 180 int family;
174 u32 has_lsi:1; 181 bool has_lsr;
175 u32 has_lsr:1; 182 bool has_lsw;
176 u32 has_lsw:1;
177}; 183};
178 184
179struct acpi_nfit_desc { 185struct acpi_nfit_desc {
@@ -191,18 +197,18 @@ struct acpi_nfit_desc {
191 struct device *dev; 197 struct device *dev;
192 u8 ars_start_flags; 198 u8 ars_start_flags;
193 struct nd_cmd_ars_status *ars_status; 199 struct nd_cmd_ars_status *ars_status;
194 size_t ars_status_size; 200 struct delayed_work dwork;
195 struct work_struct work;
196 struct list_head list; 201 struct list_head list;
197 struct kernfs_node *scrub_count_state; 202 struct kernfs_node *scrub_count_state;
203 unsigned int max_ars;
198 unsigned int scrub_count; 204 unsigned int scrub_count;
199 unsigned int scrub_mode; 205 unsigned int scrub_mode;
200 unsigned int cancel:1; 206 unsigned int cancel:1;
201 unsigned int init_complete:1;
202 unsigned long dimm_cmd_force_en; 207 unsigned long dimm_cmd_force_en;
203 unsigned long bus_cmd_force_en; 208 unsigned long bus_cmd_force_en;
204 unsigned long bus_nfit_cmd_force_en; 209 unsigned long bus_nfit_cmd_force_en;
205 unsigned int platform_cap; 210 unsigned int platform_cap;
211 unsigned int scrub_tmo;
206 int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, 212 int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
207 void *iobuf, u64 len, int rw); 213 void *iobuf, u64 len, int rw);
208}; 214};
@@ -244,7 +250,7 @@ struct nfit_blk {
244 250
245extern struct list_head acpi_descs; 251extern struct list_head acpi_descs;
246extern struct mutex acpi_desc_lock; 252extern struct mutex acpi_desc_lock;
247int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags); 253int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags);
248 254
249#ifdef CONFIG_X86_MCE 255#ifdef CONFIG_X86_MCE
250void nfit_mce_register(void); 256void nfit_mce_register(void);
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index b79aa8f7a497..e0700bf4893a 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -1,3 +1,7 @@
1config DAX_DRIVER
2 select DAX
3 bool
4
1menuconfig DAX 5menuconfig DAX
2 tristate "DAX: direct access to differentiated memory" 6 tristate "DAX: direct access to differentiated memory"
3 select SRCU 7 select SRCU
@@ -16,7 +20,6 @@ config DEV_DAX
16 baseline memory pool. Mappings of a /dev/daxX.Y device impose 20 baseline memory pool. Mappings of a /dev/daxX.Y device impose
17 restrictions that make the mapping behavior deterministic. 21 restrictions that make the mapping behavior deterministic.
18 22
19
20config DEV_DAX_PMEM 23config DEV_DAX_PMEM
21 tristate "PMEM DAX: direct access to persistent memory" 24 tristate "PMEM DAX: direct access to persistent memory"
22 depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX 25 depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 0b61f48f21a6..be8606457f27 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -257,8 +257,8 @@ static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
257 257
258 dax_region = dev_dax->region; 258 dax_region = dev_dax->region;
259 if (dax_region->align > PAGE_SIZE) { 259 if (dax_region->align > PAGE_SIZE) {
260 dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n", 260 dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
261 __func__, dax_region->align, fault_size); 261 dax_region->align, fault_size);
262 return VM_FAULT_SIGBUS; 262 return VM_FAULT_SIGBUS;
263 } 263 }
264 264
@@ -267,8 +267,7 @@ static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
267 267
268 phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE); 268 phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE);
269 if (phys == -1) { 269 if (phys == -1) {
270 dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, 270 dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", vmf->pgoff);
271 vmf->pgoff);
272 return VM_FAULT_SIGBUS; 271 return VM_FAULT_SIGBUS;
273 } 272 }
274 273
@@ -299,14 +298,14 @@ static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
299 298
300 dax_region = dev_dax->region; 299 dax_region = dev_dax->region;
301 if (dax_region->align > PMD_SIZE) { 300 if (dax_region->align > PMD_SIZE) {
302 dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n", 301 dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
303 __func__, dax_region->align, fault_size); 302 dax_region->align, fault_size);
304 return VM_FAULT_SIGBUS; 303 return VM_FAULT_SIGBUS;
305 } 304 }
306 305
307 /* dax pmd mappings require pfn_t_devmap() */ 306 /* dax pmd mappings require pfn_t_devmap() */
308 if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) { 307 if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
309 dev_dbg(dev, "%s: region lacks devmap flags\n", __func__); 308 dev_dbg(dev, "region lacks devmap flags\n");
310 return VM_FAULT_SIGBUS; 309 return VM_FAULT_SIGBUS;
311 } 310 }
312 311
@@ -323,8 +322,7 @@ static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
323 pgoff = linear_page_index(vmf->vma, pmd_addr); 322 pgoff = linear_page_index(vmf->vma, pmd_addr);
324 phys = dax_pgoff_to_phys(dev_dax, pgoff, PMD_SIZE); 323 phys = dax_pgoff_to_phys(dev_dax, pgoff, PMD_SIZE);
325 if (phys == -1) { 324 if (phys == -1) {
326 dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, 325 dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
327 pgoff);
328 return VM_FAULT_SIGBUS; 326 return VM_FAULT_SIGBUS;
329 } 327 }
330 328
@@ -351,14 +349,14 @@ static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
351 349
352 dax_region = dev_dax->region; 350 dax_region = dev_dax->region;
353 if (dax_region->align > PUD_SIZE) { 351 if (dax_region->align > PUD_SIZE) {
354 dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n", 352 dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
355 __func__, dax_region->align, fault_size); 353 dax_region->align, fault_size);
356 return VM_FAULT_SIGBUS; 354 return VM_FAULT_SIGBUS;
357 } 355 }
358 356
359 /* dax pud mappings require pfn_t_devmap() */ 357 /* dax pud mappings require pfn_t_devmap() */
360 if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) { 358 if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
361 dev_dbg(dev, "%s: region lacks devmap flags\n", __func__); 359 dev_dbg(dev, "region lacks devmap flags\n");
362 return VM_FAULT_SIGBUS; 360 return VM_FAULT_SIGBUS;
363 } 361 }
364 362
@@ -375,8 +373,7 @@ static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
375 pgoff = linear_page_index(vmf->vma, pud_addr); 373 pgoff = linear_page_index(vmf->vma, pud_addr);
376 phys = dax_pgoff_to_phys(dev_dax, pgoff, PUD_SIZE); 374 phys = dax_pgoff_to_phys(dev_dax, pgoff, PUD_SIZE);
377 if (phys == -1) { 375 if (phys == -1) {
378 dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, 376 dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
379 pgoff);
380 return VM_FAULT_SIGBUS; 377 return VM_FAULT_SIGBUS;
381 } 378 }
382 379
@@ -399,9 +396,8 @@ static int dev_dax_huge_fault(struct vm_fault *vmf,
399 struct file *filp = vmf->vma->vm_file; 396 struct file *filp = vmf->vma->vm_file;
400 struct dev_dax *dev_dax = filp->private_data; 397 struct dev_dax *dev_dax = filp->private_data;
401 398
402 dev_dbg(&dev_dax->dev, "%s: %s: %s (%#lx - %#lx) size = %d\n", __func__, 399 dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
403 current->comm, (vmf->flags & FAULT_FLAG_WRITE) 400 (vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
404 ? "write" : "read",
405 vmf->vma->vm_start, vmf->vma->vm_end, pe_size); 401 vmf->vma->vm_start, vmf->vma->vm_end, pe_size);
406 402
407 id = dax_read_lock(); 403 id = dax_read_lock();
@@ -460,7 +456,7 @@ static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
460 struct dev_dax *dev_dax = filp->private_data; 456 struct dev_dax *dev_dax = filp->private_data;
461 int rc, id; 457 int rc, id;
462 458
463 dev_dbg(&dev_dax->dev, "%s\n", __func__); 459 dev_dbg(&dev_dax->dev, "trace\n");
464 460
465 /* 461 /*
466 * We lock to check dax_dev liveness and will re-check at 462 * We lock to check dax_dev liveness and will re-check at
@@ -518,7 +514,7 @@ static int dax_open(struct inode *inode, struct file *filp)
518 struct inode *__dax_inode = dax_inode(dax_dev); 514 struct inode *__dax_inode = dax_inode(dax_dev);
519 struct dev_dax *dev_dax = dax_get_private(dax_dev); 515 struct dev_dax *dev_dax = dax_get_private(dax_dev);
520 516
521 dev_dbg(&dev_dax->dev, "%s\n", __func__); 517 dev_dbg(&dev_dax->dev, "trace\n");
522 inode->i_mapping = __dax_inode->i_mapping; 518 inode->i_mapping = __dax_inode->i_mapping;
523 inode->i_mapping->host = __dax_inode; 519 inode->i_mapping->host = __dax_inode;
524 filp->f_mapping = inode->i_mapping; 520 filp->f_mapping = inode->i_mapping;
@@ -533,7 +529,7 @@ static int dax_release(struct inode *inode, struct file *filp)
533{ 529{
534 struct dev_dax *dev_dax = filp->private_data; 530 struct dev_dax *dev_dax = filp->private_data;
535 531
536 dev_dbg(&dev_dax->dev, "%s\n", __func__); 532 dev_dbg(&dev_dax->dev, "trace\n");
537 return 0; 533 return 0;
538} 534}
539 535
@@ -575,7 +571,7 @@ static void unregister_dev_dax(void *dev)
575 struct inode *inode = dax_inode(dax_dev); 571 struct inode *inode = dax_inode(dax_dev);
576 struct cdev *cdev = inode->i_cdev; 572 struct cdev *cdev = inode->i_cdev;
577 573
578 dev_dbg(dev, "%s\n", __func__); 574 dev_dbg(dev, "trace\n");
579 575
580 kill_dev_dax(dev_dax); 576 kill_dev_dax(dev_dax);
581 cdev_device_del(cdev, dev); 577 cdev_device_del(cdev, dev);
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index 31b6ecce4c64..fd49b24fd6af 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -34,7 +34,7 @@ static void dax_pmem_percpu_release(struct percpu_ref *ref)
34{ 34{
35 struct dax_pmem *dax_pmem = to_dax_pmem(ref); 35 struct dax_pmem *dax_pmem = to_dax_pmem(ref);
36 36
37 dev_dbg(dax_pmem->dev, "%s\n", __func__); 37 dev_dbg(dax_pmem->dev, "trace\n");
38 complete(&dax_pmem->cmp); 38 complete(&dax_pmem->cmp);
39} 39}
40 40
@@ -43,7 +43,7 @@ static void dax_pmem_percpu_exit(void *data)
43 struct percpu_ref *ref = data; 43 struct percpu_ref *ref = data;
44 struct dax_pmem *dax_pmem = to_dax_pmem(ref); 44 struct dax_pmem *dax_pmem = to_dax_pmem(ref);
45 45
46 dev_dbg(dax_pmem->dev, "%s\n", __func__); 46 dev_dbg(dax_pmem->dev, "trace\n");
47 wait_for_completion(&dax_pmem->cmp); 47 wait_for_completion(&dax_pmem->cmp);
48 percpu_ref_exit(ref); 48 percpu_ref_exit(ref);
49} 49}
@@ -53,7 +53,7 @@ static void dax_pmem_percpu_kill(void *data)
53 struct percpu_ref *ref = data; 53 struct percpu_ref *ref = data;
54 struct dax_pmem *dax_pmem = to_dax_pmem(ref); 54 struct dax_pmem *dax_pmem = to_dax_pmem(ref);
55 55
56 dev_dbg(dax_pmem->dev, "%s\n", __func__); 56 dev_dbg(dax_pmem->dev, "trace\n");
57 percpu_ref_kill(ref); 57 percpu_ref_kill(ref);
58} 58}
59 59
@@ -150,17 +150,7 @@ static struct nd_device_driver dax_pmem_driver = {
150 .type = ND_DRIVER_DAX_PMEM, 150 .type = ND_DRIVER_DAX_PMEM,
151}; 151};
152 152
153static int __init dax_pmem_init(void) 153module_nd_driver(dax_pmem_driver);
154{
155 return nd_driver_register(&dax_pmem_driver);
156}
157module_init(dax_pmem_init);
158
159static void __exit dax_pmem_exit(void)
160{
161 driver_unregister(&dax_pmem_driver.drv);
162}
163module_exit(dax_pmem_exit);
164 154
165MODULE_LICENSE("GPL v2"); 155MODULE_LICENSE("GPL v2");
166MODULE_AUTHOR("Intel Corporation"); 156MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index ecdc292aa4e4..2b2332b605e4 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -124,10 +124,19 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
124 return len < 0 ? len : -EIO; 124 return len < 0 ? len : -EIO;
125 } 125 }
126 126
127 if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) 127 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) {
128 || pfn_t_devmap(pfn)) 128 /*
129 * An arch that has enabled the pmem api should also
130 * have its drivers support pfn_t_devmap()
131 *
132 * This is a developer warning and should not trigger in
133 * production. dax_flush() will crash since it depends
134 * on being able to do (page_address(pfn_to_page())).
135 */
136 WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
137 } else if (pfn_t_devmap(pfn)) {
129 /* pass */; 138 /* pass */;
130 else { 139 } else {
131 pr_debug("VFS (%s): error: dax support not enabled\n", 140 pr_debug("VFS (%s): error: dax support not enabled\n",
132 sb->s_id); 141 sb->s_id);
133 return -EOPNOTSUPP; 142 return -EOPNOTSUPP;
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 2c8ac3688815..edff083f7c4e 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -201,7 +201,7 @@ config BLK_DEV_DM_BUILTIN
201config BLK_DEV_DM 201config BLK_DEV_DM
202 tristate "Device mapper support" 202 tristate "Device mapper support"
203 select BLK_DEV_DM_BUILTIN 203 select BLK_DEV_DM_BUILTIN
204 select DAX 204 depends on DAX || DAX=n
205 ---help--- 205 ---help---
206 Device-mapper is a low level volume manager. It works by allowing 206 Device-mapper is a low level volume manager. It works by allowing
207 people to specify mappings for ranges of logical sectors. Various 207 people to specify mappings for ranges of logical sectors. Various
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 99297212eeec..775c06d953b7 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -154,6 +154,7 @@ static int linear_iterate_devices(struct dm_target *ti,
154 return fn(ti, lc->dev, lc->start, ti->len, data); 154 return fn(ti, lc->dev, lc->start, ti->len, data);
155} 155}
156 156
157#if IS_ENABLED(CONFIG_DAX_DRIVER)
157static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, 158static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
158 long nr_pages, void **kaddr, pfn_t *pfn) 159 long nr_pages, void **kaddr, pfn_t *pfn)
159{ 160{
@@ -184,6 +185,11 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
184 return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); 185 return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
185} 186}
186 187
188#else
189#define linear_dax_direct_access NULL
190#define linear_dax_copy_from_iter NULL
191#endif
192
187static struct target_type linear_target = { 193static struct target_type linear_target = {
188 .name = "linear", 194 .name = "linear",
189 .version = {1, 4, 0}, 195 .version = {1, 4, 0},
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 9de072b7782a..c90c7c08a77f 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -611,51 +611,6 @@ static int log_mark(struct log_writes_c *lc, char *data)
611 return 0; 611 return 0;
612} 612}
613 613
614static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
615 struct iov_iter *i)
616{
617 struct pending_block *block;
618
619 if (!bytes)
620 return 0;
621
622 block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
623 if (!block) {
624 DMERR("Error allocating dax pending block");
625 return -ENOMEM;
626 }
627
628 block->data = kzalloc(bytes, GFP_KERNEL);
629 if (!block->data) {
630 DMERR("Error allocating dax data space");
631 kfree(block);
632 return -ENOMEM;
633 }
634
635 /* write data provided via the iterator */
636 if (!copy_from_iter(block->data, bytes, i)) {
637 DMERR("Error copying dax data");
638 kfree(block->data);
639 kfree(block);
640 return -EIO;
641 }
642
643 /* rewind the iterator so that the block driver can use it */
644 iov_iter_revert(i, bytes);
645
646 block->datalen = bytes;
647 block->sector = bio_to_dev_sectors(lc, sector);
648 block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
649
650 atomic_inc(&lc->pending_blocks);
651 spin_lock_irq(&lc->blocks_lock);
652 list_add_tail(&block->list, &lc->unflushed_blocks);
653 spin_unlock_irq(&lc->blocks_lock);
654 wake_up_process(lc->log_kthread);
655
656 return 0;
657}
658
659static void log_writes_dtr(struct dm_target *ti) 614static void log_writes_dtr(struct dm_target *ti)
660{ 615{
661 struct log_writes_c *lc = ti->private; 616 struct log_writes_c *lc = ti->private;
@@ -925,6 +880,52 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit
925 limits->io_min = limits->physical_block_size; 880 limits->io_min = limits->physical_block_size;
926} 881}
927 882
883#if IS_ENABLED(CONFIG_DAX_DRIVER)
884static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
885 struct iov_iter *i)
886{
887 struct pending_block *block;
888
889 if (!bytes)
890 return 0;
891
892 block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
893 if (!block) {
894 DMERR("Error allocating dax pending block");
895 return -ENOMEM;
896 }
897
898 block->data = kzalloc(bytes, GFP_KERNEL);
899 if (!block->data) {
900 DMERR("Error allocating dax data space");
901 kfree(block);
902 return -ENOMEM;
903 }
904
905 /* write data provided via the iterator */
906 if (!copy_from_iter(block->data, bytes, i)) {
907 DMERR("Error copying dax data");
908 kfree(block->data);
909 kfree(block);
910 return -EIO;
911 }
912
913 /* rewind the iterator so that the block driver can use it */
914 iov_iter_revert(i, bytes);
915
916 block->datalen = bytes;
917 block->sector = bio_to_dev_sectors(lc, sector);
918 block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
919
920 atomic_inc(&lc->pending_blocks);
921 spin_lock_irq(&lc->blocks_lock);
922 list_add_tail(&block->list, &lc->unflushed_blocks);
923 spin_unlock_irq(&lc->blocks_lock);
924 wake_up_process(lc->log_kthread);
925
926 return 0;
927}
928
928static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, 929static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
929 long nr_pages, void **kaddr, pfn_t *pfn) 930 long nr_pages, void **kaddr, pfn_t *pfn)
930{ 931{
@@ -961,6 +962,10 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
961dax_copy: 962dax_copy:
962 return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); 963 return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
963} 964}
965#else
966#define log_writes_dax_direct_access NULL
967#define log_writes_dax_copy_from_iter NULL
968#endif
964 969
965static struct target_type log_writes_target = { 970static struct target_type log_writes_target = {
966 .name = "log-writes", 971 .name = "log-writes",
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index bb907cb3e60d..fe7fb9b1aec3 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -313,6 +313,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
313 return DM_MAPIO_REMAPPED; 313 return DM_MAPIO_REMAPPED;
314} 314}
315 315
316#if IS_ENABLED(CONFIG_DAX_DRIVER)
316static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, 317static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
317 long nr_pages, void **kaddr, pfn_t *pfn) 318 long nr_pages, void **kaddr, pfn_t *pfn)
318{ 319{
@@ -353,6 +354,11 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
353 return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); 354 return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
354} 355}
355 356
357#else
358#define stripe_dax_direct_access NULL
359#define stripe_dax_copy_from_iter NULL
360#endif
361
356/* 362/*
357 * Stripe status: 363 * Stripe status:
358 * 364 *
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5a81c47be4e4..4ea404dbcf0b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1826,7 +1826,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
1826static struct mapped_device *alloc_dev(int minor) 1826static struct mapped_device *alloc_dev(int minor)
1827{ 1827{
1828 int r, numa_node_id = dm_get_numa_node(); 1828 int r, numa_node_id = dm_get_numa_node();
1829 struct dax_device *dax_dev; 1829 struct dax_device *dax_dev = NULL;
1830 struct mapped_device *md; 1830 struct mapped_device *md;
1831 void *old_md; 1831 void *old_md;
1832 1832
@@ -1892,9 +1892,11 @@ static struct mapped_device *alloc_dev(int minor)
1892 md->disk->private_data = md; 1892 md->disk->private_data = md;
1893 sprintf(md->disk->disk_name, "dm-%d", minor); 1893 sprintf(md->disk->disk_name, "dm-%d", minor);
1894 1894
1895 dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops); 1895 if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
1896 if (!dax_dev) 1896 dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
1897 goto bad; 1897 if (!dax_dev)
1898 goto bad;
1899 }
1898 md->dax_dev = dax_dev; 1900 md->dax_dev = dax_dev;
1899 1901
1900 add_disk_no_queue_reg(md->disk); 1902 add_disk_no_queue_reg(md->disk);
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index a65f2e1d9f53..85997184e047 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -20,7 +20,7 @@ if LIBNVDIMM
20config BLK_DEV_PMEM 20config BLK_DEV_PMEM
21 tristate "PMEM: Persistent memory block device support" 21 tristate "PMEM: Persistent memory block device support"
22 default LIBNVDIMM 22 default LIBNVDIMM
23 select DAX 23 select DAX_DRIVER
24 select ND_BTT if BTT 24 select ND_BTT if BTT
25 select ND_PFN if NVDIMM_PFN 25 select ND_PFN if NVDIMM_PFN
26 help 26 help
@@ -102,4 +102,15 @@ config NVDIMM_DAX
102 102
103 Select Y if unsure 103 Select Y if unsure
104 104
105config OF_PMEM
106 # FIXME: make tristate once OF_NUMA dependency removed
107 bool "Device-tree support for persistent memory regions"
108 depends on OF
109 default LIBNVDIMM
110 help
111 Allows regions of persistent memory to be described in the
112 device-tree.
113
114 Select Y if unsure.
115
105endif 116endif
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 70d5f3ad9909..e8847045dac0 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
4obj-$(CONFIG_ND_BTT) += nd_btt.o 4obj-$(CONFIG_ND_BTT) += nd_btt.o
5obj-$(CONFIG_ND_BLK) += nd_blk.o 5obj-$(CONFIG_ND_BLK) += nd_blk.o
6obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o 6obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
7obj-$(CONFIG_OF_PMEM) += of_pmem.o
7 8
8nd_pmem-y := pmem.o 9nd_pmem-y := pmem.o
9 10
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index d58925295aa7..795ad4ff35ca 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -26,7 +26,7 @@ static void nd_btt_release(struct device *dev)
26 struct nd_region *nd_region = to_nd_region(dev->parent); 26 struct nd_region *nd_region = to_nd_region(dev->parent);
27 struct nd_btt *nd_btt = to_nd_btt(dev); 27 struct nd_btt *nd_btt = to_nd_btt(dev);
28 28
29 dev_dbg(dev, "%s\n", __func__); 29 dev_dbg(dev, "trace\n");
30 nd_detach_ndns(&nd_btt->dev, &nd_btt->ndns); 30 nd_detach_ndns(&nd_btt->dev, &nd_btt->ndns);
31 ida_simple_remove(&nd_region->btt_ida, nd_btt->id); 31 ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
32 kfree(nd_btt->uuid); 32 kfree(nd_btt->uuid);
@@ -74,8 +74,8 @@ static ssize_t sector_size_store(struct device *dev,
74 nvdimm_bus_lock(dev); 74 nvdimm_bus_lock(dev);
75 rc = nd_size_select_store(dev, buf, &nd_btt->lbasize, 75 rc = nd_size_select_store(dev, buf, &nd_btt->lbasize,
76 btt_lbasize_supported); 76 btt_lbasize_supported);
77 dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, 77 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
78 rc, buf, buf[len - 1] == '\n' ? "" : "\n"); 78 buf[len - 1] == '\n' ? "" : "\n");
79 nvdimm_bus_unlock(dev); 79 nvdimm_bus_unlock(dev);
80 device_unlock(dev); 80 device_unlock(dev);
81 81
@@ -101,8 +101,8 @@ static ssize_t uuid_store(struct device *dev,
101 101
102 device_lock(dev); 102 device_lock(dev);
103 rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len); 103 rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
104 dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, 104 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
105 rc, buf, buf[len - 1] == '\n' ? "" : "\n"); 105 buf[len - 1] == '\n' ? "" : "\n");
106 device_unlock(dev); 106 device_unlock(dev);
107 107
108 return rc ? rc : len; 108 return rc ? rc : len;
@@ -131,8 +131,8 @@ static ssize_t namespace_store(struct device *dev,
131 device_lock(dev); 131 device_lock(dev);
132 nvdimm_bus_lock(dev); 132 nvdimm_bus_lock(dev);
133 rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len); 133 rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len);
134 dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, 134 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
135 rc, buf, buf[len - 1] == '\n' ? "" : "\n"); 135 buf[len - 1] == '\n' ? "" : "\n");
136 nvdimm_bus_unlock(dev); 136 nvdimm_bus_unlock(dev);
137 device_unlock(dev); 137 device_unlock(dev);
138 138
@@ -206,8 +206,8 @@ static struct device *__nd_btt_create(struct nd_region *nd_region,
206 dev->groups = nd_btt_attribute_groups; 206 dev->groups = nd_btt_attribute_groups;
207 device_initialize(&nd_btt->dev); 207 device_initialize(&nd_btt->dev);
208 if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) { 208 if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) {
209 dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", 209 dev_dbg(&ndns->dev, "failed, already claimed by %s\n",
210 __func__, dev_name(ndns->claim)); 210 dev_name(ndns->claim));
211 put_device(dev); 211 put_device(dev);
212 return NULL; 212 return NULL;
213 } 213 }
@@ -346,8 +346,7 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns)
346 return -ENOMEM; 346 return -ENOMEM;
347 btt_sb = devm_kzalloc(dev, sizeof(*btt_sb), GFP_KERNEL); 347 btt_sb = devm_kzalloc(dev, sizeof(*btt_sb), GFP_KERNEL);
348 rc = __nd_btt_probe(to_nd_btt(btt_dev), ndns, btt_sb); 348 rc = __nd_btt_probe(to_nd_btt(btt_dev), ndns, btt_sb);
349 dev_dbg(dev, "%s: btt: %s\n", __func__, 349 dev_dbg(dev, "btt: %s\n", rc == 0 ? dev_name(btt_dev) : "<none>");
350 rc == 0 ? dev_name(btt_dev) : "<none>");
351 if (rc < 0) { 350 if (rc < 0) {
352 struct nd_btt *nd_btt = to_nd_btt(btt_dev); 351 struct nd_btt *nd_btt = to_nd_btt(btt_dev);
353 352
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 78eabc3a1ab1..a64023690cad 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -358,6 +358,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
358 nvdimm_bus->dev.release = nvdimm_bus_release; 358 nvdimm_bus->dev.release = nvdimm_bus_release;
359 nvdimm_bus->dev.groups = nd_desc->attr_groups; 359 nvdimm_bus->dev.groups = nd_desc->attr_groups;
360 nvdimm_bus->dev.bus = &nvdimm_bus_type; 360 nvdimm_bus->dev.bus = &nvdimm_bus_type;
361 nvdimm_bus->dev.of_node = nd_desc->of_node;
361 dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); 362 dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
362 rc = device_register(&nvdimm_bus->dev); 363 rc = device_register(&nvdimm_bus->dev);
363 if (rc) { 364 if (rc) {
@@ -984,8 +985,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
984 985
985 if (cmd == ND_CMD_CALL) { 986 if (cmd == ND_CMD_CALL) {
986 func = pkg.nd_command; 987 func = pkg.nd_command;
987 dev_dbg(dev, "%s:%s, idx: %llu, in: %u, out: %u, len %llu\n", 988 dev_dbg(dev, "%s, idx: %llu, in: %u, out: %u, len %llu\n",
988 __func__, dimm_name, pkg.nd_command, 989 dimm_name, pkg.nd_command,
989 in_len, out_len, buf_len); 990 in_len, out_len, buf_len);
990 } 991 }
991 992
@@ -996,8 +997,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
996 u32 copy; 997 u32 copy;
997 998
998 if (out_size == UINT_MAX) { 999 if (out_size == UINT_MAX) {
999 dev_dbg(dev, "%s:%s unknown output size cmd: %s field: %d\n", 1000 dev_dbg(dev, "%s unknown output size cmd: %s field: %d\n",
1000 __func__, dimm_name, cmd_name, i); 1001 dimm_name, cmd_name, i);
1001 return -EFAULT; 1002 return -EFAULT;
1002 } 1003 }
1003 if (out_len < sizeof(out_env)) 1004 if (out_len < sizeof(out_env))
@@ -1012,9 +1013,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
1012 1013
1013 buf_len = (u64) out_len + (u64) in_len; 1014 buf_len = (u64) out_len + (u64) in_len;
1014 if (buf_len > ND_IOCTL_MAX_BUFLEN) { 1015 if (buf_len > ND_IOCTL_MAX_BUFLEN) {
1015 dev_dbg(dev, "%s:%s cmd: %s buf_len: %llu > %d\n", __func__, 1016 dev_dbg(dev, "%s cmd: %s buf_len: %llu > %d\n", dimm_name,
1016 dimm_name, cmd_name, buf_len, 1017 cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN);
1017 ND_IOCTL_MAX_BUFLEN);
1018 return -EINVAL; 1018 return -EINVAL;
1019 } 1019 }
1020 1020
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index b2fc29b8279b..30852270484f 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -148,7 +148,7 @@ ssize_t nd_namespace_store(struct device *dev,
148 char *name; 148 char *name;
149 149
150 if (dev->driver) { 150 if (dev->driver) {
151 dev_dbg(dev, "%s: -EBUSY\n", __func__); 151 dev_dbg(dev, "namespace already active\n");
152 return -EBUSY; 152 return -EBUSY;
153 } 153 }
154 154
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 1dc527660637..acce050856a8 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -134,7 +134,7 @@ static void nvdimm_map_release(struct kref *kref)
134 nvdimm_map = container_of(kref, struct nvdimm_map, kref); 134 nvdimm_map = container_of(kref, struct nvdimm_map, kref);
135 nvdimm_bus = nvdimm_map->nvdimm_bus; 135 nvdimm_bus = nvdimm_map->nvdimm_bus;
136 136
137 dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset); 137 dev_dbg(&nvdimm_bus->dev, "%pa\n", &nvdimm_map->offset);
138 list_del(&nvdimm_map->list); 138 list_del(&nvdimm_map->list);
139 if (nvdimm_map->flags) 139 if (nvdimm_map->flags)
140 memunmap(nvdimm_map->mem); 140 memunmap(nvdimm_map->mem);
@@ -230,8 +230,8 @@ static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
230 230
231 for (i = 0; i < 16; i++) { 231 for (i = 0; i < 16; i++) {
232 if (!isxdigit(str[0]) || !isxdigit(str[1])) { 232 if (!isxdigit(str[0]) || !isxdigit(str[1])) {
233 dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n", 233 dev_dbg(dev, "pos: %d buf[%zd]: %c buf[%zd]: %c\n",
234 __func__, i, str - buf, str[0], 234 i, str - buf, str[0],
235 str + 1 - buf, str[1]); 235 str + 1 - buf, str[1]);
236 return -EINVAL; 236 return -EINVAL;
237 } 237 }
diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c
index 1bf2bd318371..0453f49dc708 100644
--- a/drivers/nvdimm/dax_devs.c
+++ b/drivers/nvdimm/dax_devs.c
@@ -24,7 +24,7 @@ static void nd_dax_release(struct device *dev)
24 struct nd_dax *nd_dax = to_nd_dax(dev); 24 struct nd_dax *nd_dax = to_nd_dax(dev);
25 struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; 25 struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
26 26
27 dev_dbg(dev, "%s\n", __func__); 27 dev_dbg(dev, "trace\n");
28 nd_detach_ndns(dev, &nd_pfn->ndns); 28 nd_detach_ndns(dev, &nd_pfn->ndns);
29 ida_simple_remove(&nd_region->dax_ida, nd_pfn->id); 29 ida_simple_remove(&nd_region->dax_ida, nd_pfn->id);
30 kfree(nd_pfn->uuid); 30 kfree(nd_pfn->uuid);
@@ -129,8 +129,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns)
129 pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); 129 pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
130 nd_pfn->pfn_sb = pfn_sb; 130 nd_pfn->pfn_sb = pfn_sb;
131 rc = nd_pfn_validate(nd_pfn, DAX_SIG); 131 rc = nd_pfn_validate(nd_pfn, DAX_SIG);
132 dev_dbg(dev, "%s: dax: %s\n", __func__, 132 dev_dbg(dev, "dax: %s\n", rc == 0 ? dev_name(dax_dev) : "<none>");
133 rc == 0 ? dev_name(dax_dev) : "<none>");
134 if (rc < 0) { 133 if (rc < 0) {
135 nd_detach_ndns(dax_dev, &nd_pfn->ndns); 134 nd_detach_ndns(dax_dev, &nd_pfn->ndns);
136 put_device(dax_dev); 135 put_device(dax_dev);
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index f8913b8124b6..233907889f96 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -67,9 +67,11 @@ static int nvdimm_probe(struct device *dev)
67 ndd->ns_next = nd_label_next_nsindex(ndd->ns_current); 67 ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
68 nd_label_copy(ndd, to_next_namespace_index(ndd), 68 nd_label_copy(ndd, to_next_namespace_index(ndd),
69 to_current_namespace_index(ndd)); 69 to_current_namespace_index(ndd));
70 rc = nd_label_reserve_dpa(ndd); 70 if (ndd->ns_current >= 0) {
71 if (ndd->ns_current >= 0) 71 rc = nd_label_reserve_dpa(ndd);
72 nvdimm_set_aliasing(dev); 72 if (rc == 0)
73 nvdimm_set_aliasing(dev);
74 }
73 nvdimm_clear_locked(dev); 75 nvdimm_clear_locked(dev);
74 nvdimm_bus_unlock(dev); 76 nvdimm_bus_unlock(dev);
75 77
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 097794d9f786..e00d45522b80 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -131,7 +131,7 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
131 } 131 }
132 memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length); 132 memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length);
133 } 133 }
134 dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc); 134 dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc);
135 kfree(cmd); 135 kfree(cmd);
136 136
137 return rc; 137 return rc;
@@ -266,8 +266,7 @@ void nvdimm_drvdata_release(struct kref *kref)
266 struct device *dev = ndd->dev; 266 struct device *dev = ndd->dev;
267 struct resource *res, *_r; 267 struct resource *res, *_r;
268 268
269 dev_dbg(dev, "%s\n", __func__); 269 dev_dbg(dev, "trace\n");
270
271 nvdimm_bus_lock(dev); 270 nvdimm_bus_lock(dev);
272 for_each_dpa_resource_safe(ndd, res, _r) 271 for_each_dpa_resource_safe(ndd, res, _r)
273 nvdimm_free_dpa(ndd, res); 272 nvdimm_free_dpa(ndd, res);
@@ -660,7 +659,7 @@ int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count)
660 nd_synchronize(); 659 nd_synchronize();
661 660
662 device_for_each_child(&nvdimm_bus->dev, &count, count_dimms); 661 device_for_each_child(&nvdimm_bus->dev, &count, count_dimms);
663 dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count); 662 dev_dbg(&nvdimm_bus->dev, "count: %d\n", count);
664 if (count != dimm_count) 663 if (count != dimm_count)
665 return -ENXIO; 664 return -ENXIO;
666 return 0; 665 return 0;
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index de66c02f6140..1d28cd656536 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -45,9 +45,27 @@ unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd)
45 return ndd->nslabel_size; 45 return ndd->nslabel_size;
46} 46}
47 47
48static size_t __sizeof_namespace_index(u32 nslot)
49{
50 return ALIGN(sizeof(struct nd_namespace_index) + DIV_ROUND_UP(nslot, 8),
51 NSINDEX_ALIGN);
52}
53
54static int __nvdimm_num_label_slots(struct nvdimm_drvdata *ndd,
55 size_t index_size)
56{
57 return (ndd->nsarea.config_size - index_size * 2) /
58 sizeof_namespace_label(ndd);
59}
60
48int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd) 61int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd)
49{ 62{
50 return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1); 63 u32 tmp_nslot, n;
64
65 tmp_nslot = ndd->nsarea.config_size / sizeof_namespace_label(ndd);
66 n = __sizeof_namespace_index(tmp_nslot) / NSINDEX_ALIGN;
67
68 return __nvdimm_num_label_slots(ndd, NSINDEX_ALIGN * n);
51} 69}
52 70
53size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) 71size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
@@ -55,18 +73,14 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
55 u32 nslot, space, size; 73 u32 nslot, space, size;
56 74
57 /* 75 /*
58 * The minimum index space is 512 bytes, with that amount of 76 * Per UEFI 2.7, the minimum size of the Label Storage Area is large
59 * index we can describe ~1400 labels which is less than a byte 77 * enough to hold 2 index blocks and 2 labels. The minimum index
60 * of overhead per label. Round up to a byte of overhead per 78 * block size is 256 bytes, and the minimum label size is 256 bytes.
61 * label and determine the size of the index region. Yes, this
62 * starts to waste space at larger config_sizes, but it's
63 * unlikely we'll ever see anything but 128K.
64 */ 79 */
65 nslot = nvdimm_num_label_slots(ndd); 80 nslot = nvdimm_num_label_slots(ndd);
66 space = ndd->nsarea.config_size - nslot * sizeof_namespace_label(ndd); 81 space = ndd->nsarea.config_size - nslot * sizeof_namespace_label(ndd);
67 size = ALIGN(sizeof(struct nd_namespace_index) + DIV_ROUND_UP(nslot, 8), 82 size = __sizeof_namespace_index(nslot) * 2;
68 NSINDEX_ALIGN) * 2; 83 if (size <= space && nslot >= 2)
69 if (size <= space)
70 return size / 2; 84 return size / 2;
71 85
72 dev_err(ndd->dev, "label area (%d) too small to host (%d byte) labels\n", 86 dev_err(ndd->dev, "label area (%d) too small to host (%d byte) labels\n",
@@ -121,8 +135,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
121 135
122 memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN); 136 memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN);
123 if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) { 137 if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) {
124 dev_dbg(dev, "%s: nsindex%d signature invalid\n", 138 dev_dbg(dev, "nsindex%d signature invalid\n", i);
125 __func__, i);
126 continue; 139 continue;
127 } 140 }
128 141
@@ -135,8 +148,8 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
135 labelsize = 128; 148 labelsize = 128;
136 149
137 if (labelsize != sizeof_namespace_label(ndd)) { 150 if (labelsize != sizeof_namespace_label(ndd)) {
138 dev_dbg(dev, "%s: nsindex%d labelsize %d invalid\n", 151 dev_dbg(dev, "nsindex%d labelsize %d invalid\n",
139 __func__, i, nsindex[i]->labelsize); 152 i, nsindex[i]->labelsize);
140 continue; 153 continue;
141 } 154 }
142 155
@@ -145,30 +158,28 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
145 sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1); 158 sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1);
146 nsindex[i]->checksum = __cpu_to_le64(sum_save); 159 nsindex[i]->checksum = __cpu_to_le64(sum_save);
147 if (sum != sum_save) { 160 if (sum != sum_save) {
148 dev_dbg(dev, "%s: nsindex%d checksum invalid\n", 161 dev_dbg(dev, "nsindex%d checksum invalid\n", i);
149 __func__, i);
150 continue; 162 continue;
151 } 163 }
152 164
153 seq = __le32_to_cpu(nsindex[i]->seq); 165 seq = __le32_to_cpu(nsindex[i]->seq);
154 if ((seq & NSINDEX_SEQ_MASK) == 0) { 166 if ((seq & NSINDEX_SEQ_MASK) == 0) {
155 dev_dbg(dev, "%s: nsindex%d sequence: %#x invalid\n", 167 dev_dbg(dev, "nsindex%d sequence: %#x invalid\n", i, seq);
156 __func__, i, seq);
157 continue; 168 continue;
158 } 169 }
159 170
160 /* sanity check the index against expected values */ 171 /* sanity check the index against expected values */
161 if (__le64_to_cpu(nsindex[i]->myoff) 172 if (__le64_to_cpu(nsindex[i]->myoff)
162 != i * sizeof_namespace_index(ndd)) { 173 != i * sizeof_namespace_index(ndd)) {
163 dev_dbg(dev, "%s: nsindex%d myoff: %#llx invalid\n", 174 dev_dbg(dev, "nsindex%d myoff: %#llx invalid\n",
164 __func__, i, (unsigned long long) 175 i, (unsigned long long)
165 __le64_to_cpu(nsindex[i]->myoff)); 176 __le64_to_cpu(nsindex[i]->myoff));
166 continue; 177 continue;
167 } 178 }
168 if (__le64_to_cpu(nsindex[i]->otheroff) 179 if (__le64_to_cpu(nsindex[i]->otheroff)
169 != (!i) * sizeof_namespace_index(ndd)) { 180 != (!i) * sizeof_namespace_index(ndd)) {
170 dev_dbg(dev, "%s: nsindex%d otheroff: %#llx invalid\n", 181 dev_dbg(dev, "nsindex%d otheroff: %#llx invalid\n",
171 __func__, i, (unsigned long long) 182 i, (unsigned long long)
172 __le64_to_cpu(nsindex[i]->otheroff)); 183 __le64_to_cpu(nsindex[i]->otheroff));
173 continue; 184 continue;
174 } 185 }
@@ -176,8 +187,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
176 size = __le64_to_cpu(nsindex[i]->mysize); 187 size = __le64_to_cpu(nsindex[i]->mysize);
177 if (size > sizeof_namespace_index(ndd) 188 if (size > sizeof_namespace_index(ndd)
178 || size < sizeof(struct nd_namespace_index)) { 189 || size < sizeof(struct nd_namespace_index)) {
179 dev_dbg(dev, "%s: nsindex%d mysize: %#llx invalid\n", 190 dev_dbg(dev, "nsindex%d mysize: %#llx invalid\n", i, size);
180 __func__, i, size);
181 continue; 191 continue;
182 } 192 }
183 193
@@ -185,9 +195,8 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
185 if (nslot * sizeof_namespace_label(ndd) 195 if (nslot * sizeof_namespace_label(ndd)
186 + 2 * sizeof_namespace_index(ndd) 196 + 2 * sizeof_namespace_index(ndd)
187 > ndd->nsarea.config_size) { 197 > ndd->nsarea.config_size) {
188 dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n", 198 dev_dbg(dev, "nsindex%d nslot: %u invalid, config_size: %#x\n",
189 __func__, i, nslot, 199 i, nslot, ndd->nsarea.config_size);
190 ndd->nsarea.config_size);
191 continue; 200 continue;
192 } 201 }
193 valid[i] = true; 202 valid[i] = true;
@@ -356,8 +365,8 @@ static bool slot_valid(struct nvdimm_drvdata *ndd,
356 sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1); 365 sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
357 nd_label->checksum = __cpu_to_le64(sum_save); 366 nd_label->checksum = __cpu_to_le64(sum_save);
358 if (sum != sum_save) { 367 if (sum != sum_save) {
359 dev_dbg(ndd->dev, "%s fail checksum. slot: %d expect: %#llx\n", 368 dev_dbg(ndd->dev, "fail checksum. slot: %d expect: %#llx\n",
360 __func__, slot, sum); 369 slot, sum);
361 return false; 370 return false;
362 } 371 }
363 } 372 }
@@ -422,8 +431,8 @@ int nd_label_active_count(struct nvdimm_drvdata *ndd)
422 u64 dpa = __le64_to_cpu(nd_label->dpa); 431 u64 dpa = __le64_to_cpu(nd_label->dpa);
423 432
424 dev_dbg(ndd->dev, 433 dev_dbg(ndd->dev,
425 "%s: slot%d invalid slot: %d dpa: %llx size: %llx\n", 434 "slot%d invalid slot: %d dpa: %llx size: %llx\n",
426 __func__, slot, label_slot, dpa, size); 435 slot, label_slot, dpa, size);
427 continue; 436 continue;
428 } 437 }
429 count++; 438 count++;
@@ -650,7 +659,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
650 slot = nd_label_alloc_slot(ndd); 659 slot = nd_label_alloc_slot(ndd);
651 if (slot == UINT_MAX) 660 if (slot == UINT_MAX)
652 return -ENXIO; 661 return -ENXIO;
653 dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot); 662 dev_dbg(ndd->dev, "allocated: %d\n", slot);
654 663
655 nd_label = to_label(ndd, slot); 664 nd_label = to_label(ndd, slot);
656 memset(nd_label, 0, sizeof_namespace_label(ndd)); 665 memset(nd_label, 0, sizeof_namespace_label(ndd));
@@ -678,7 +687,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
678 sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1); 687 sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
679 nd_label->checksum = __cpu_to_le64(sum); 688 nd_label->checksum = __cpu_to_le64(sum);
680 } 689 }
681 nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__); 690 nd_dbg_dpa(nd_region, ndd, res, "\n");
682 691
683 /* update label */ 692 /* update label */
684 offset = nd_label_offset(ndd, nd_label); 693 offset = nd_label_offset(ndd, nd_label);
@@ -700,7 +709,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
700 break; 709 break;
701 } 710 }
702 if (victim) { 711 if (victim) {
703 dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot); 712 dev_dbg(ndd->dev, "free: %d\n", slot);
704 slot = to_slot(ndd, victim->label); 713 slot = to_slot(ndd, victim->label);
705 nd_label_free_slot(ndd, slot); 714 nd_label_free_slot(ndd, slot);
706 victim->label = NULL; 715 victim->label = NULL;
@@ -868,7 +877,7 @@ static int __blk_label_update(struct nd_region *nd_region,
868 slot = nd_label_alloc_slot(ndd); 877 slot = nd_label_alloc_slot(ndd);
869 if (slot == UINT_MAX) 878 if (slot == UINT_MAX)
870 goto abort; 879 goto abort;
871 dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot); 880 dev_dbg(ndd->dev, "allocated: %d\n", slot);
872 881
873 nd_label = to_label(ndd, slot); 882 nd_label = to_label(ndd, slot);
874 memset(nd_label, 0, sizeof_namespace_label(ndd)); 883 memset(nd_label, 0, sizeof_namespace_label(ndd));
@@ -928,7 +937,7 @@ static int __blk_label_update(struct nd_region *nd_region,
928 937
929 /* free up now unused slots in the new index */ 938 /* free up now unused slots in the new index */
930 for_each_set_bit(slot, victim_map, victim_map ? nslot : 0) { 939 for_each_set_bit(slot, victim_map, victim_map ? nslot : 0) {
931 dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot); 940 dev_dbg(ndd->dev, "free: %d\n", slot);
932 nd_label_free_slot(ndd, slot); 941 nd_label_free_slot(ndd, slot);
933 } 942 }
934 943
@@ -1092,7 +1101,7 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
1092 active--; 1101 active--;
1093 slot = to_slot(ndd, nd_label); 1102 slot = to_slot(ndd, nd_label);
1094 nd_label_free_slot(ndd, slot); 1103 nd_label_free_slot(ndd, slot);
1095 dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot); 1104 dev_dbg(ndd->dev, "free: %d\n", slot);
1096 list_move_tail(&label_ent->list, &list); 1105 list_move_tail(&label_ent->list, &list);
1097 label_ent->label = NULL; 1106 label_ent->label = NULL;
1098 } 1107 }
@@ -1100,7 +1109,7 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
1100 1109
1101 if (active == 0) { 1110 if (active == 0) {
1102 nd_mapping_free_labels(nd_mapping); 1111 nd_mapping_free_labels(nd_mapping);
1103 dev_dbg(ndd->dev, "%s: no more active labels\n", __func__); 1112 dev_dbg(ndd->dev, "no more active labels\n");
1104 } 1113 }
1105 mutex_unlock(&nd_mapping->lock); 1114 mutex_unlock(&nd_mapping->lock);
1106 1115
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index 1ebf4d3d01ba..18bbe183b3a9 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -33,7 +33,7 @@ enum {
33 BTTINFO_UUID_LEN = 16, 33 BTTINFO_UUID_LEN = 16,
34 BTTINFO_FLAG_ERROR = 0x1, /* error state (read-only) */ 34 BTTINFO_FLAG_ERROR = 0x1, /* error state (read-only) */
35 BTTINFO_MAJOR_VERSION = 1, 35 BTTINFO_MAJOR_VERSION = 1,
36 ND_LABEL_MIN_SIZE = 512 * 129, /* see sizeof_namespace_index() */ 36 ND_LABEL_MIN_SIZE = 256 * 4, /* see sizeof_namespace_index() */
37 ND_LABEL_ID_SIZE = 50, 37 ND_LABEL_ID_SIZE = 50,
38 ND_NSINDEX_INIT = 0x1, 38 ND_NSINDEX_INIT = 0x1,
39}; 39};
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 658ada497be0..28afdd668905 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -421,7 +421,7 @@ static ssize_t alt_name_store(struct device *dev,
421 rc = __alt_name_store(dev, buf, len); 421 rc = __alt_name_store(dev, buf, len);
422 if (rc >= 0) 422 if (rc >= 0)
423 rc = nd_namespace_label_update(nd_region, dev); 423 rc = nd_namespace_label_update(nd_region, dev);
424 dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc); 424 dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
425 nvdimm_bus_unlock(dev); 425 nvdimm_bus_unlock(dev);
426 device_unlock(dev); 426 device_unlock(dev);
427 427
@@ -1007,7 +1007,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
1007 if (uuid_not_set(uuid, dev, __func__)) 1007 if (uuid_not_set(uuid, dev, __func__))
1008 return -ENXIO; 1008 return -ENXIO;
1009 if (nd_region->ndr_mappings == 0) { 1009 if (nd_region->ndr_mappings == 0) {
1010 dev_dbg(dev, "%s: not associated with dimm(s)\n", __func__); 1010 dev_dbg(dev, "not associated with dimm(s)\n");
1011 return -ENXIO; 1011 return -ENXIO;
1012 } 1012 }
1013 1013
@@ -1105,8 +1105,7 @@ static ssize_t size_store(struct device *dev,
1105 *uuid = NULL; 1105 *uuid = NULL;
1106 } 1106 }
1107 1107
1108 dev_dbg(dev, "%s: %llx %s (%d)\n", __func__, val, rc < 0 1108 dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc);
1109 ? "fail" : "success", rc);
1110 1109
1111 nvdimm_bus_unlock(dev); 1110 nvdimm_bus_unlock(dev);
1112 device_unlock(dev); 1111 device_unlock(dev);
@@ -1270,8 +1269,8 @@ static ssize_t uuid_store(struct device *dev,
1270 rc = nd_namespace_label_update(nd_region, dev); 1269 rc = nd_namespace_label_update(nd_region, dev);
1271 else 1270 else
1272 kfree(uuid); 1271 kfree(uuid);
1273 dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, 1272 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
1274 rc, buf, buf[len - 1] == '\n' ? "" : "\n"); 1273 buf[len - 1] == '\n' ? "" : "\n");
1275 nvdimm_bus_unlock(dev); 1274 nvdimm_bus_unlock(dev);
1276 device_unlock(dev); 1275 device_unlock(dev);
1277 1276
@@ -1355,9 +1354,8 @@ static ssize_t sector_size_store(struct device *dev,
1355 rc = nd_size_select_store(dev, buf, lbasize, supported); 1354 rc = nd_size_select_store(dev, buf, lbasize, supported);
1356 if (rc >= 0) 1355 if (rc >= 0)
1357 rc = nd_namespace_label_update(nd_region, dev); 1356 rc = nd_namespace_label_update(nd_region, dev);
1358 dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__, 1357 dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote",
1359 rc, rc < 0 ? "tried" : "wrote", buf, 1358 buf, buf[len - 1] == '\n' ? "" : "\n");
1360 buf[len - 1] == '\n' ? "" : "\n");
1361 nvdimm_bus_unlock(dev); 1359 nvdimm_bus_unlock(dev);
1362 device_unlock(dev); 1360 device_unlock(dev);
1363 1361
@@ -1519,7 +1517,7 @@ static ssize_t holder_class_store(struct device *dev,
1519 rc = __holder_class_store(dev, buf); 1517 rc = __holder_class_store(dev, buf);
1520 if (rc >= 0) 1518 if (rc >= 0)
1521 rc = nd_namespace_label_update(nd_region, dev); 1519 rc = nd_namespace_label_update(nd_region, dev);
1522 dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc); 1520 dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
1523 nvdimm_bus_unlock(dev); 1521 nvdimm_bus_unlock(dev);
1524 device_unlock(dev); 1522 device_unlock(dev);
1525 1523
@@ -1717,8 +1715,7 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
1717 if (uuid_not_set(nsblk->uuid, &ndns->dev, __func__)) 1715 if (uuid_not_set(nsblk->uuid, &ndns->dev, __func__))
1718 return ERR_PTR(-ENODEV); 1716 return ERR_PTR(-ENODEV);
1719 if (!nsblk->lbasize) { 1717 if (!nsblk->lbasize) {
1720 dev_dbg(&ndns->dev, "%s: sector size not set\n", 1718 dev_dbg(&ndns->dev, "sector size not set\n");
1721 __func__);
1722 return ERR_PTR(-ENODEV); 1719 return ERR_PTR(-ENODEV);
1723 } 1720 }
1724 if (!nd_namespace_blk_validate(nsblk)) 1721 if (!nd_namespace_blk_validate(nsblk))
@@ -1798,9 +1795,7 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
1798 } 1795 }
1799 1796
1800 if (found_uuid) { 1797 if (found_uuid) {
1801 dev_dbg(ndd->dev, 1798 dev_dbg(ndd->dev, "duplicate entry for uuid\n");
1802 "%s duplicate entry for uuid\n",
1803 __func__);
1804 return false; 1799 return false;
1805 } 1800 }
1806 found_uuid = true; 1801 found_uuid = true;
@@ -1926,7 +1921,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
1926 } 1921 }
1927 1922
1928 if (i < nd_region->ndr_mappings) { 1923 if (i < nd_region->ndr_mappings) {
1929 struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]); 1924 struct nvdimm *nvdimm = nd_region->mapping[i].nvdimm;
1930 1925
1931 /* 1926 /*
1932 * Give up if we don't find an instance of a uuid at each 1927 * Give up if we don't find an instance of a uuid at each
@@ -1934,7 +1929,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
1934 * find a dimm with two instances of the same uuid. 1929 * find a dimm with two instances of the same uuid.
1935 */ 1930 */
1936 dev_err(&nd_region->dev, "%s missing label for %pUb\n", 1931 dev_err(&nd_region->dev, "%s missing label for %pUb\n",
1937 dev_name(ndd->dev), nd_label->uuid); 1932 nvdimm_name(nvdimm), nd_label->uuid);
1938 rc = -EINVAL; 1933 rc = -EINVAL;
1939 goto err; 1934 goto err;
1940 } 1935 }
@@ -1994,14 +1989,13 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
1994 namespace_pmem_release(dev); 1989 namespace_pmem_release(dev);
1995 switch (rc) { 1990 switch (rc) {
1996 case -EINVAL: 1991 case -EINVAL:
1997 dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__); 1992 dev_dbg(&nd_region->dev, "invalid label(s)\n");
1998 break; 1993 break;
1999 case -ENODEV: 1994 case -ENODEV:
2000 dev_dbg(&nd_region->dev, "%s: label not found\n", __func__); 1995 dev_dbg(&nd_region->dev, "label not found\n");
2001 break; 1996 break;
2002 default: 1997 default:
2003 dev_dbg(&nd_region->dev, "%s: unexpected err: %d\n", 1998 dev_dbg(&nd_region->dev, "unexpected err: %d\n", rc);
2004 __func__, rc);
2005 break; 1999 break;
2006 } 2000 }
2007 return ERR_PTR(rc); 2001 return ERR_PTR(rc);
@@ -2334,8 +2328,8 @@ static struct device **scan_labels(struct nd_region *nd_region)
2334 2328
2335 } 2329 }
2336 2330
2337 dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n", 2331 dev_dbg(&nd_region->dev, "discovered %d %s namespace%s\n",
2338 __func__, count, is_nd_blk(&nd_region->dev) 2332 count, is_nd_blk(&nd_region->dev)
2339 ? "blk" : "pmem", count == 1 ? "" : "s"); 2333 ? "blk" : "pmem", count == 1 ? "" : "s");
2340 2334
2341 if (count == 0) { 2335 if (count == 0) {
@@ -2467,7 +2461,7 @@ static int init_active_labels(struct nd_region *nd_region)
2467 get_ndd(ndd); 2461 get_ndd(ndd);
2468 2462
2469 count = nd_label_active_count(ndd); 2463 count = nd_label_active_count(ndd);
2470 dev_dbg(ndd->dev, "%s: %d\n", __func__, count); 2464 dev_dbg(ndd->dev, "count: %d\n", count);
2471 if (!count) 2465 if (!count)
2472 continue; 2466 continue;
2473 for (j = 0; j < count; j++) { 2467 for (j = 0; j < count; j++) {
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 184e070d50a2..32e0364b48b9 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -340,7 +340,6 @@ static inline struct device *nd_dax_create(struct nd_region *nd_region)
340} 340}
341#endif 341#endif
342 342
343struct nd_region *to_nd_region(struct device *dev);
344int nd_region_to_nstype(struct nd_region *nd_region); 343int nd_region_to_nstype(struct nd_region *nd_region);
345int nd_region_register_namespaces(struct nd_region *nd_region, int *err); 344int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
346u64 nd_region_interleave_set_cookie(struct nd_region *nd_region, 345u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
new file mode 100644
index 000000000000..85013bad35de
--- /dev/null
+++ b/drivers/nvdimm/of_pmem.c
@@ -0,0 +1,119 @@
1// SPDX-License-Identifier: GPL-2.0+
2
3#define pr_fmt(fmt) "of_pmem: " fmt
4
5#include <linux/of_platform.h>
6#include <linux/of_address.h>
7#include <linux/libnvdimm.h>
8#include <linux/module.h>
9#include <linux/ioport.h>
10#include <linux/slab.h>
11
12static const struct attribute_group *region_attr_groups[] = {
13 &nd_region_attribute_group,
14 &nd_device_attribute_group,
15 NULL,
16};
17
18static const struct attribute_group *bus_attr_groups[] = {
19 &nvdimm_bus_attribute_group,
20 NULL,
21};
22
23struct of_pmem_private {
24 struct nvdimm_bus_descriptor bus_desc;
25 struct nvdimm_bus *bus;
26};
27
28static int of_pmem_region_probe(struct platform_device *pdev)
29{
30 struct of_pmem_private *priv;
31 struct device_node *np;
32 struct nvdimm_bus *bus;
33 bool is_volatile;
34 int i;
35
36 np = dev_of_node(&pdev->dev);
37 if (!np)
38 return -ENXIO;
39
40 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
41 if (!priv)
42 return -ENOMEM;
43
44 priv->bus_desc.attr_groups = bus_attr_groups;
45 priv->bus_desc.provider_name = "of_pmem";
46 priv->bus_desc.module = THIS_MODULE;
47 priv->bus_desc.of_node = np;
48
49 priv->bus = bus = nvdimm_bus_register(&pdev->dev, &priv->bus_desc);
50 if (!bus) {
51 kfree(priv);
52 return -ENODEV;
53 }
54 platform_set_drvdata(pdev, priv);
55
56 is_volatile = !!of_find_property(np, "volatile", NULL);
57 dev_dbg(&pdev->dev, "Registering %s regions from %pOF\n",
58 is_volatile ? "volatile" : "non-volatile", np);
59
60 for (i = 0; i < pdev->num_resources; i++) {
61 struct nd_region_desc ndr_desc;
62 struct nd_region *region;
63
64 /*
65 * NB: libnvdimm copies the data from ndr_desc into it's own
66 * structures so passing a stack pointer is fine.
67 */
68 memset(&ndr_desc, 0, sizeof(ndr_desc));
69 ndr_desc.attr_groups = region_attr_groups;
70 ndr_desc.numa_node = of_node_to_nid(np);
71 ndr_desc.res = &pdev->resource[i];
72 ndr_desc.of_node = np;
73 set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
74
75 if (is_volatile)
76 region = nvdimm_volatile_region_create(bus, &ndr_desc);
77 else
78 region = nvdimm_pmem_region_create(bus, &ndr_desc);
79
80 if (!region)
81 dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n",
82 ndr_desc.res, np);
83 else
84 dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n",
85 ndr_desc.res, np);
86 }
87
88 return 0;
89}
90
91static int of_pmem_region_remove(struct platform_device *pdev)
92{
93 struct of_pmem_private *priv = platform_get_drvdata(pdev);
94
95 nvdimm_bus_unregister(priv->bus);
96 kfree(priv);
97
98 return 0;
99}
100
101static const struct of_device_id of_pmem_region_match[] = {
102 { .compatible = "pmem-region" },
103 { },
104};
105
106static struct platform_driver of_pmem_region_driver = {
107 .probe = of_pmem_region_probe,
108 .remove = of_pmem_region_remove,
109 .driver = {
110 .name = "of_pmem",
111 .owner = THIS_MODULE,
112 .of_match_table = of_pmem_region_match,
113 },
114};
115
116module_platform_driver(of_pmem_region_driver);
117MODULE_DEVICE_TABLE(of, of_pmem_region_match);
118MODULE_LICENSE("GPL");
119MODULE_AUTHOR("IBM Corporation");
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 2f4d18752c97..30b08791597d 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -27,7 +27,7 @@ static void nd_pfn_release(struct device *dev)
27 struct nd_region *nd_region = to_nd_region(dev->parent); 27 struct nd_region *nd_region = to_nd_region(dev->parent);
28 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 28 struct nd_pfn *nd_pfn = to_nd_pfn(dev);
29 29
30 dev_dbg(dev, "%s\n", __func__); 30 dev_dbg(dev, "trace\n");
31 nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns); 31 nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns);
32 ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id); 32 ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id);
33 kfree(nd_pfn->uuid); 33 kfree(nd_pfn->uuid);
@@ -94,8 +94,8 @@ static ssize_t mode_store(struct device *dev,
94 else 94 else
95 rc = -EINVAL; 95 rc = -EINVAL;
96 } 96 }
97 dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, 97 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
98 rc, buf, buf[len - 1] == '\n' ? "" : "\n"); 98 buf[len - 1] == '\n' ? "" : "\n");
99 nvdimm_bus_unlock(dev); 99 nvdimm_bus_unlock(dev);
100 device_unlock(dev); 100 device_unlock(dev);
101 101
@@ -144,8 +144,8 @@ static ssize_t align_store(struct device *dev,
144 nvdimm_bus_lock(dev); 144 nvdimm_bus_lock(dev);
145 rc = nd_size_select_store(dev, buf, &nd_pfn->align, 145 rc = nd_size_select_store(dev, buf, &nd_pfn->align,
146 nd_pfn_supported_alignments()); 146 nd_pfn_supported_alignments());
147 dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, 147 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
148 rc, buf, buf[len - 1] == '\n' ? "" : "\n"); 148 buf[len - 1] == '\n' ? "" : "\n");
149 nvdimm_bus_unlock(dev); 149 nvdimm_bus_unlock(dev);
150 device_unlock(dev); 150 device_unlock(dev);
151 151
@@ -171,8 +171,8 @@ static ssize_t uuid_store(struct device *dev,
171 171
172 device_lock(dev); 172 device_lock(dev);
173 rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); 173 rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len);
174 dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, 174 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
175 rc, buf, buf[len - 1] == '\n' ? "" : "\n"); 175 buf[len - 1] == '\n' ? "" : "\n");
176 device_unlock(dev); 176 device_unlock(dev);
177 177
178 return rc ? rc : len; 178 return rc ? rc : len;
@@ -201,8 +201,8 @@ static ssize_t namespace_store(struct device *dev,
201 device_lock(dev); 201 device_lock(dev);
202 nvdimm_bus_lock(dev); 202 nvdimm_bus_lock(dev);
203 rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); 203 rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len);
204 dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, 204 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
205 rc, buf, buf[len - 1] == '\n' ? "" : "\n"); 205 buf[len - 1] == '\n' ? "" : "\n");
206 nvdimm_bus_unlock(dev); 206 nvdimm_bus_unlock(dev);
207 device_unlock(dev); 207 device_unlock(dev);
208 208
@@ -314,8 +314,8 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
314 dev = &nd_pfn->dev; 314 dev = &nd_pfn->dev;
315 device_initialize(&nd_pfn->dev); 315 device_initialize(&nd_pfn->dev);
316 if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { 316 if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
317 dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", 317 dev_dbg(&ndns->dev, "failed, already claimed by %s\n",
318 __func__, dev_name(ndns->claim)); 318 dev_name(ndns->claim));
319 put_device(dev); 319 put_device(dev);
320 return NULL; 320 return NULL;
321 } 321 }
@@ -510,8 +510,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
510 nd_pfn = to_nd_pfn(pfn_dev); 510 nd_pfn = to_nd_pfn(pfn_dev);
511 nd_pfn->pfn_sb = pfn_sb; 511 nd_pfn->pfn_sb = pfn_sb;
512 rc = nd_pfn_validate(nd_pfn, PFN_SIG); 512 rc = nd_pfn_validate(nd_pfn, PFN_SIG);
513 dev_dbg(dev, "%s: pfn: %s\n", __func__, 513 dev_dbg(dev, "pfn: %s\n", rc == 0 ? dev_name(pfn_dev) : "<none>");
514 rc == 0 ? dev_name(pfn_dev) : "<none>");
515 if (rc < 0) { 514 if (rc < 0) {
516 nd_detach_ndns(pfn_dev, &nd_pfn->ndns); 515 nd_detach_ndns(pfn_dev, &nd_pfn->ndns);
517 put_device(pfn_dev); 516 put_device(pfn_dev);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 5a96d30c294a..9d714926ecf5 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -66,7 +66,7 @@ static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
66 rc = BLK_STS_IOERR; 66 rc = BLK_STS_IOERR;
67 if (cleared > 0 && cleared / 512) { 67 if (cleared > 0 && cleared / 512) {
68 cleared /= 512; 68 cleared /= 512;
69 dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__, 69 dev_dbg(dev, "%#llx clear %ld sector%s\n",
70 (unsigned long long) sector, cleared, 70 (unsigned long long) sector, cleared,
71 cleared > 1 ? "s" : ""); 71 cleared > 1 ? "s" : "");
72 badblocks_clear(&pmem->bb, sector, cleared); 72 badblocks_clear(&pmem->bb, sector, cleared);
@@ -547,17 +547,7 @@ static struct nd_device_driver nd_pmem_driver = {
547 .type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM, 547 .type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
548}; 548};
549 549
550static int __init pmem_init(void) 550module_nd_driver(nd_pmem_driver);
551{
552 return nd_driver_register(&nd_pmem_driver);
553}
554module_init(pmem_init);
555
556static void pmem_exit(void)
557{
558 driver_unregister(&nd_pmem_driver.drv);
559}
560module_exit(pmem_exit);
561 551
562MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>"); 552MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
563MODULE_LICENSE("GPL v2"); 553MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 034f0a07d627..b9ca0033cc99 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -27,10 +27,10 @@ static int nd_region_probe(struct device *dev)
27 if (nd_region->num_lanes > num_online_cpus() 27 if (nd_region->num_lanes > num_online_cpus()
28 && nd_region->num_lanes < num_possible_cpus() 28 && nd_region->num_lanes < num_possible_cpus()
29 && !test_and_set_bit(0, &once)) { 29 && !test_and_set_bit(0, &once)) {
30 dev_info(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n", 30 dev_dbg(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n",
31 num_online_cpus(), nd_region->num_lanes, 31 num_online_cpus(), nd_region->num_lanes,
32 num_possible_cpus()); 32 num_possible_cpus());
33 dev_info(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n", 33 dev_dbg(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n",
34 nd_region->num_lanes); 34 nd_region->num_lanes);
35 } 35 }
36 36
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 1593e1806b16..a612be6f019d 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -182,6 +182,14 @@ struct nd_region *to_nd_region(struct device *dev)
182} 182}
183EXPORT_SYMBOL_GPL(to_nd_region); 183EXPORT_SYMBOL_GPL(to_nd_region);
184 184
185struct device *nd_region_dev(struct nd_region *nd_region)
186{
187 if (!nd_region)
188 return NULL;
189 return &nd_region->dev;
190}
191EXPORT_SYMBOL_GPL(nd_region_dev);
192
185struct nd_blk_region *to_nd_blk_region(struct device *dev) 193struct nd_blk_region *to_nd_blk_region(struct device *dev)
186{ 194{
187 struct nd_region *nd_region = to_nd_region(dev); 195 struct nd_region *nd_region = to_nd_region(dev);
@@ -1014,6 +1022,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
1014 dev->parent = &nvdimm_bus->dev; 1022 dev->parent = &nvdimm_bus->dev;
1015 dev->type = dev_type; 1023 dev->type = dev_type;
1016 dev->groups = ndr_desc->attr_groups; 1024 dev->groups = ndr_desc->attr_groups;
1025 dev->of_node = ndr_desc->of_node;
1017 nd_region->ndr_size = resource_size(ndr_desc->res); 1026 nd_region->ndr_size = resource_size(ndr_desc->res);
1018 nd_region->ndr_start = ndr_desc->res->start; 1027 nd_region->ndr_start = ndr_desc->res->start;
1019 nd_device_register(dev); 1028 nd_device_register(dev);
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index 1444333210c7..9ac7574e3cfb 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -15,8 +15,8 @@ config BLK_DEV_XPRAM
15 15
16config DCSSBLK 16config DCSSBLK
17 def_tristate m 17 def_tristate m
18 select DAX
19 select FS_DAX_LIMITED 18 select FS_DAX_LIMITED
19 select DAX_DRIVER
20 prompt "DCSSBLK support" 20 prompt "DCSSBLK support"
21 depends on S390 && BLOCK 21 depends on S390 && BLOCK
22 help 22 help
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7a506c55a993..7ec920e27065 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1948,11 +1948,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
1948static int blkdev_writepages(struct address_space *mapping, 1948static int blkdev_writepages(struct address_space *mapping,
1949 struct writeback_control *wbc) 1949 struct writeback_control *wbc)
1950{ 1950{
1951 if (dax_mapping(mapping)) {
1952 struct block_device *bdev = I_BDEV(mapping->host);
1953
1954 return dax_writeback_mapping_range(mapping, bdev, wbc);
1955 }
1956 return generic_writepages(mapping, wbc); 1951 return generic_writepages(mapping, wbc);
1957} 1952}
1958 1953
diff --git a/fs/dax.c b/fs/dax.c
index 0276df90e86c..a77394fe586e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -73,16 +73,15 @@ fs_initcall(init_dax_wait_table);
73#define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) 73#define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
74#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3)) 74#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
75 75
76static unsigned long dax_radix_sector(void *entry) 76static unsigned long dax_radix_pfn(void *entry)
77{ 77{
78 return (unsigned long)entry >> RADIX_DAX_SHIFT; 78 return (unsigned long)entry >> RADIX_DAX_SHIFT;
79} 79}
80 80
81static void *dax_radix_locked_entry(sector_t sector, unsigned long flags) 81static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags)
82{ 82{
83 return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags | 83 return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
84 ((unsigned long)sector << RADIX_DAX_SHIFT) | 84 (pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK);
85 RADIX_DAX_ENTRY_LOCK);
86} 85}
87 86
88static unsigned int dax_radix_order(void *entry) 87static unsigned int dax_radix_order(void *entry)
@@ -299,6 +298,63 @@ static void put_unlocked_mapping_entry(struct address_space *mapping,
299 dax_wake_mapping_entry_waiter(mapping, index, entry, false); 298 dax_wake_mapping_entry_waiter(mapping, index, entry, false);
300} 299}
301 300
301static unsigned long dax_entry_size(void *entry)
302{
303 if (dax_is_zero_entry(entry))
304 return 0;
305 else if (dax_is_empty_entry(entry))
306 return 0;
307 else if (dax_is_pmd_entry(entry))
308 return PMD_SIZE;
309 else
310 return PAGE_SIZE;
311}
312
313static unsigned long dax_radix_end_pfn(void *entry)
314{
315 return dax_radix_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
316}
317
318/*
319 * Iterate through all mapped pfns represented by an entry, i.e. skip
320 * 'empty' and 'zero' entries.
321 */
322#define for_each_mapped_pfn(entry, pfn) \
323 for (pfn = dax_radix_pfn(entry); \
324 pfn < dax_radix_end_pfn(entry); pfn++)
325
326static void dax_associate_entry(void *entry, struct address_space *mapping)
327{
328 unsigned long pfn;
329
330 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
331 return;
332
333 for_each_mapped_pfn(entry, pfn) {
334 struct page *page = pfn_to_page(pfn);
335
336 WARN_ON_ONCE(page->mapping);
337 page->mapping = mapping;
338 }
339}
340
341static void dax_disassociate_entry(void *entry, struct address_space *mapping,
342 bool trunc)
343{
344 unsigned long pfn;
345
346 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
347 return;
348
349 for_each_mapped_pfn(entry, pfn) {
350 struct page *page = pfn_to_page(pfn);
351
352 WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
353 WARN_ON_ONCE(page->mapping && page->mapping != mapping);
354 page->mapping = NULL;
355 }
356}
357
302/* 358/*
303 * Find radix tree entry at given index. If it points to an exceptional entry, 359 * Find radix tree entry at given index. If it points to an exceptional entry,
304 * return it with the radix tree entry locked. If the radix tree doesn't 360 * return it with the radix tree entry locked. If the radix tree doesn't
@@ -405,6 +461,7 @@ restart:
405 } 461 }
406 462
407 if (pmd_downgrade) { 463 if (pmd_downgrade) {
464 dax_disassociate_entry(entry, mapping, false);
408 radix_tree_delete(&mapping->page_tree, index); 465 radix_tree_delete(&mapping->page_tree, index);
409 mapping->nrexceptional--; 466 mapping->nrexceptional--;
410 dax_wake_mapping_entry_waiter(mapping, index, entry, 467 dax_wake_mapping_entry_waiter(mapping, index, entry,
@@ -454,6 +511,7 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping,
454 (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || 511 (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
455 radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))) 512 radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)))
456 goto out; 513 goto out;
514 dax_disassociate_entry(entry, mapping, trunc);
457 radix_tree_delete(page_tree, index); 515 radix_tree_delete(page_tree, index);
458 mapping->nrexceptional--; 516 mapping->nrexceptional--;
459 ret = 1; 517 ret = 1;
@@ -526,12 +584,13 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
526 */ 584 */
527static void *dax_insert_mapping_entry(struct address_space *mapping, 585static void *dax_insert_mapping_entry(struct address_space *mapping,
528 struct vm_fault *vmf, 586 struct vm_fault *vmf,
529 void *entry, sector_t sector, 587 void *entry, pfn_t pfn_t,
530 unsigned long flags, bool dirty) 588 unsigned long flags, bool dirty)
531{ 589{
532 struct radix_tree_root *page_tree = &mapping->page_tree; 590 struct radix_tree_root *page_tree = &mapping->page_tree;
533 void *new_entry; 591 unsigned long pfn = pfn_t_to_pfn(pfn_t);
534 pgoff_t index = vmf->pgoff; 592 pgoff_t index = vmf->pgoff;
593 void *new_entry;
535 594
536 if (dirty) 595 if (dirty)
537 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 596 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -546,7 +605,11 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
546 } 605 }
547 606
548 spin_lock_irq(&mapping->tree_lock); 607 spin_lock_irq(&mapping->tree_lock);
549 new_entry = dax_radix_locked_entry(sector, flags); 608 new_entry = dax_radix_locked_entry(pfn, flags);
609 if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
610 dax_disassociate_entry(entry, mapping, false);
611 dax_associate_entry(new_entry, mapping);
612 }
550 613
551 if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { 614 if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
552 /* 615 /*
@@ -657,17 +720,14 @@ unlock_pte:
657 i_mmap_unlock_read(mapping); 720 i_mmap_unlock_read(mapping);
658} 721}
659 722
660static int dax_writeback_one(struct block_device *bdev, 723static int dax_writeback_one(struct dax_device *dax_dev,
661 struct dax_device *dax_dev, struct address_space *mapping, 724 struct address_space *mapping, pgoff_t index, void *entry)
662 pgoff_t index, void *entry)
663{ 725{
664 struct radix_tree_root *page_tree = &mapping->page_tree; 726 struct radix_tree_root *page_tree = &mapping->page_tree;
665 void *entry2, **slot, *kaddr; 727 void *entry2, **slot;
666 long ret = 0, id; 728 unsigned long pfn;
667 sector_t sector; 729 long ret = 0;
668 pgoff_t pgoff;
669 size_t size; 730 size_t size;
670 pfn_t pfn;
671 731
672 /* 732 /*
673 * A page got tagged dirty in DAX mapping? Something is seriously 733 * A page got tagged dirty in DAX mapping? Something is seriously
@@ -683,10 +743,10 @@ static int dax_writeback_one(struct block_device *bdev,
683 goto put_unlocked; 743 goto put_unlocked;
684 /* 744 /*
685 * Entry got reallocated elsewhere? No need to writeback. We have to 745 * Entry got reallocated elsewhere? No need to writeback. We have to
686 * compare sectors as we must not bail out due to difference in lockbit 746 * compare pfns as we must not bail out due to difference in lockbit
687 * or entry type. 747 * or entry type.
688 */ 748 */
689 if (dax_radix_sector(entry2) != dax_radix_sector(entry)) 749 if (dax_radix_pfn(entry2) != dax_radix_pfn(entry))
690 goto put_unlocked; 750 goto put_unlocked;
691 if (WARN_ON_ONCE(dax_is_empty_entry(entry) || 751 if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
692 dax_is_zero_entry(entry))) { 752 dax_is_zero_entry(entry))) {
@@ -712,33 +772,15 @@ static int dax_writeback_one(struct block_device *bdev,
712 /* 772 /*
713 * Even if dax_writeback_mapping_range() was given a wbc->range_start 773 * Even if dax_writeback_mapping_range() was given a wbc->range_start
714 * in the middle of a PMD, the 'index' we are given will be aligned to 774 * in the middle of a PMD, the 'index' we are given will be aligned to
715 * the start index of the PMD, as will the sector we pull from 775 * the start index of the PMD, as will the pfn we pull from 'entry'.
716 * 'entry'. This allows us to flush for PMD_SIZE and not have to 776 * This allows us to flush for PMD_SIZE and not have to worry about
717 * worry about partial PMD writebacks. 777 * partial PMD writebacks.
718 */ 778 */
719 sector = dax_radix_sector(entry); 779 pfn = dax_radix_pfn(entry);
720 size = PAGE_SIZE << dax_radix_order(entry); 780 size = PAGE_SIZE << dax_radix_order(entry);
721 781
722 id = dax_read_lock(); 782 dax_mapping_entry_mkclean(mapping, index, pfn);
723 ret = bdev_dax_pgoff(bdev, sector, size, &pgoff); 783 dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size);
724 if (ret)
725 goto dax_unlock;
726
727 /*
728 * dax_direct_access() may sleep, so cannot hold tree_lock over
729 * its invocation.
730 */
731 ret = dax_direct_access(dax_dev, pgoff, size / PAGE_SIZE, &kaddr, &pfn);
732 if (ret < 0)
733 goto dax_unlock;
734
735 if (WARN_ON_ONCE(ret < size / PAGE_SIZE)) {
736 ret = -EIO;
737 goto dax_unlock;
738 }
739
740 dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn));
741 dax_flush(dax_dev, kaddr, size);
742 /* 784 /*
743 * After we have flushed the cache, we can clear the dirty tag. There 785 * After we have flushed the cache, we can clear the dirty tag. There
744 * cannot be new dirty data in the pfn after the flush has completed as 786 * cannot be new dirty data in the pfn after the flush has completed as
@@ -749,8 +791,6 @@ static int dax_writeback_one(struct block_device *bdev,
749 radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY); 791 radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY);
750 spin_unlock_irq(&mapping->tree_lock); 792 spin_unlock_irq(&mapping->tree_lock);
751 trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT); 793 trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT);
752 dax_unlock:
753 dax_read_unlock(id);
754 put_locked_mapping_entry(mapping, index); 794 put_locked_mapping_entry(mapping, index);
755 return ret; 795 return ret;
756 796
@@ -808,8 +848,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
808 break; 848 break;
809 } 849 }
810 850
811 ret = dax_writeback_one(bdev, dax_dev, mapping, 851 ret = dax_writeback_one(dax_dev, mapping, indices[i],
812 indices[i], pvec.pages[i]); 852 pvec.pages[i]);
813 if (ret < 0) { 853 if (ret < 0) {
814 mapping_set_error(mapping, ret); 854 mapping_set_error(mapping, ret);
815 goto out; 855 goto out;
@@ -877,6 +917,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
877 int ret = VM_FAULT_NOPAGE; 917 int ret = VM_FAULT_NOPAGE;
878 struct page *zero_page; 918 struct page *zero_page;
879 void *entry2; 919 void *entry2;
920 pfn_t pfn;
880 921
881 zero_page = ZERO_PAGE(0); 922 zero_page = ZERO_PAGE(0);
882 if (unlikely(!zero_page)) { 923 if (unlikely(!zero_page)) {
@@ -884,14 +925,15 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
884 goto out; 925 goto out;
885 } 926 }
886 927
887 entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0, 928 pfn = page_to_pfn_t(zero_page);
929 entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
888 RADIX_DAX_ZERO_PAGE, false); 930 RADIX_DAX_ZERO_PAGE, false);
889 if (IS_ERR(entry2)) { 931 if (IS_ERR(entry2)) {
890 ret = VM_FAULT_SIGBUS; 932 ret = VM_FAULT_SIGBUS;
891 goto out; 933 goto out;
892 } 934 }
893 935
894 vm_insert_mixed(vmf->vma, vaddr, page_to_pfn_t(zero_page)); 936 vm_insert_mixed(vmf->vma, vaddr, pfn);
895out: 937out:
896 trace_dax_load_hole(inode, vmf, ret); 938 trace_dax_load_hole(inode, vmf, ret);
897 return ret; 939 return ret;
@@ -1200,8 +1242,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1200 if (error < 0) 1242 if (error < 0)
1201 goto error_finish_iomap; 1243 goto error_finish_iomap;
1202 1244
1203 entry = dax_insert_mapping_entry(mapping, vmf, entry, 1245 entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
1204 dax_iomap_sector(&iomap, pos),
1205 0, write && !sync); 1246 0, write && !sync);
1206 if (IS_ERR(entry)) { 1247 if (IS_ERR(entry)) {
1207 error = PTR_ERR(entry); 1248 error = PTR_ERR(entry);
@@ -1280,13 +1321,15 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
1280 void *ret = NULL; 1321 void *ret = NULL;
1281 spinlock_t *ptl; 1322 spinlock_t *ptl;
1282 pmd_t pmd_entry; 1323 pmd_t pmd_entry;
1324 pfn_t pfn;
1283 1325
1284 zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm); 1326 zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
1285 1327
1286 if (unlikely(!zero_page)) 1328 if (unlikely(!zero_page))
1287 goto fallback; 1329 goto fallback;
1288 1330
1289 ret = dax_insert_mapping_entry(mapping, vmf, entry, 0, 1331 pfn = page_to_pfn_t(zero_page);
1332 ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
1290 RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false); 1333 RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
1291 if (IS_ERR(ret)) 1334 if (IS_ERR(ret))
1292 goto fallback; 1335 goto fallback;
@@ -1409,8 +1452,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1409 if (error < 0) 1452 if (error < 0)
1410 goto finish_iomap; 1453 goto finish_iomap;
1411 1454
1412 entry = dax_insert_mapping_entry(mapping, vmf, entry, 1455 entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
1413 dax_iomap_sector(&iomap, pos),
1414 RADIX_DAX_PMD, write && !sync); 1456 RADIX_DAX_PMD, write && !sync);
1415 if (IS_ERR(entry)) 1457 if (IS_ERR(entry))
1416 goto finish_iomap; 1458 goto finish_iomap;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 032295e1d386..cc40802ddfa8 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -814,6 +814,7 @@ extern const struct inode_operations ext2_file_inode_operations;
814extern const struct file_operations ext2_file_operations; 814extern const struct file_operations ext2_file_operations;
815 815
816/* inode.c */ 816/* inode.c */
817extern void ext2_set_file_ops(struct inode *inode);
817extern const struct address_space_operations ext2_aops; 818extern const struct address_space_operations ext2_aops;
818extern const struct address_space_operations ext2_nobh_aops; 819extern const struct address_space_operations ext2_nobh_aops;
819extern const struct iomap_ops ext2_iomap_ops; 820extern const struct iomap_ops ext2_iomap_ops;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 9b2ac55ac34f..1e01fabef130 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -940,9 +940,6 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
940 loff_t offset = iocb->ki_pos; 940 loff_t offset = iocb->ki_pos;
941 ssize_t ret; 941 ssize_t ret;
942 942
943 if (WARN_ON_ONCE(IS_DAX(inode)))
944 return -EIO;
945
946 ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block); 943 ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
947 if (ret < 0 && iov_iter_rw(iter) == WRITE) 944 if (ret < 0 && iov_iter_rw(iter) == WRITE)
948 ext2_write_failed(mapping, offset + count); 945 ext2_write_failed(mapping, offset + count);
@@ -952,17 +949,16 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
952static int 949static int
953ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) 950ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
954{ 951{
955#ifdef CONFIG_FS_DAX
956 if (dax_mapping(mapping)) {
957 return dax_writeback_mapping_range(mapping,
958 mapping->host->i_sb->s_bdev,
959 wbc);
960 }
961#endif
962
963 return mpage_writepages(mapping, wbc, ext2_get_block); 952 return mpage_writepages(mapping, wbc, ext2_get_block);
964} 953}
965 954
955static int
956ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc)
957{
958 return dax_writeback_mapping_range(mapping,
959 mapping->host->i_sb->s_bdev, wbc);
960}
961
966const struct address_space_operations ext2_aops = { 962const struct address_space_operations ext2_aops = {
967 .readpage = ext2_readpage, 963 .readpage = ext2_readpage,
968 .readpages = ext2_readpages, 964 .readpages = ext2_readpages,
@@ -990,6 +986,13 @@ const struct address_space_operations ext2_nobh_aops = {
990 .error_remove_page = generic_error_remove_page, 986 .error_remove_page = generic_error_remove_page,
991}; 987};
992 988
989static const struct address_space_operations ext2_dax_aops = {
990 .writepages = ext2_dax_writepages,
991 .direct_IO = noop_direct_IO,
992 .set_page_dirty = noop_set_page_dirty,
993 .invalidatepage = noop_invalidatepage,
994};
995
993/* 996/*
994 * Probably it should be a library function... search for first non-zero word 997 * Probably it should be a library function... search for first non-zero word
995 * or memcmp with zero_page, whatever is better for particular architecture. 998 * or memcmp with zero_page, whatever is better for particular architecture.
@@ -1388,6 +1391,18 @@ void ext2_set_inode_flags(struct inode *inode)
1388 inode->i_flags |= S_DAX; 1391 inode->i_flags |= S_DAX;
1389} 1392}
1390 1393
1394void ext2_set_file_ops(struct inode *inode)
1395{
1396 inode->i_op = &ext2_file_inode_operations;
1397 inode->i_fop = &ext2_file_operations;
1398 if (IS_DAX(inode))
1399 inode->i_mapping->a_ops = &ext2_dax_aops;
1400 else if (test_opt(inode->i_sb, NOBH))
1401 inode->i_mapping->a_ops = &ext2_nobh_aops;
1402 else
1403 inode->i_mapping->a_ops = &ext2_aops;
1404}
1405
1391struct inode *ext2_iget (struct super_block *sb, unsigned long ino) 1406struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1392{ 1407{
1393 struct ext2_inode_info *ei; 1408 struct ext2_inode_info *ei;
@@ -1480,14 +1495,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1480 ei->i_data[n] = raw_inode->i_block[n]; 1495 ei->i_data[n] = raw_inode->i_block[n];
1481 1496
1482 if (S_ISREG(inode->i_mode)) { 1497 if (S_ISREG(inode->i_mode)) {
1483 inode->i_op = &ext2_file_inode_operations; 1498 ext2_set_file_ops(inode);
1484 if (test_opt(inode->i_sb, NOBH)) {
1485 inode->i_mapping->a_ops = &ext2_nobh_aops;
1486 inode->i_fop = &ext2_file_operations;
1487 } else {
1488 inode->i_mapping->a_ops = &ext2_aops;
1489 inode->i_fop = &ext2_file_operations;
1490 }
1491 } else if (S_ISDIR(inode->i_mode)) { 1499 } else if (S_ISDIR(inode->i_mode)) {
1492 inode->i_op = &ext2_dir_inode_operations; 1500 inode->i_op = &ext2_dir_inode_operations;
1493 inode->i_fop = &ext2_dir_operations; 1501 inode->i_fop = &ext2_dir_operations;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index e078075dc66f..55f7caadb093 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -107,14 +107,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode
107 if (IS_ERR(inode)) 107 if (IS_ERR(inode))
108 return PTR_ERR(inode); 108 return PTR_ERR(inode);
109 109
110 inode->i_op = &ext2_file_inode_operations; 110 ext2_set_file_ops(inode);
111 if (test_opt(inode->i_sb, NOBH)) {
112 inode->i_mapping->a_ops = &ext2_nobh_aops;
113 inode->i_fop = &ext2_file_operations;
114 } else {
115 inode->i_mapping->a_ops = &ext2_aops;
116 inode->i_fop = &ext2_file_operations;
117 }
118 mark_inode_dirty(inode); 111 mark_inode_dirty(inode);
119 return ext2_add_nondir(dentry, inode); 112 return ext2_add_nondir(dentry, inode);
120} 113}
@@ -125,14 +118,7 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
125 if (IS_ERR(inode)) 118 if (IS_ERR(inode))
126 return PTR_ERR(inode); 119 return PTR_ERR(inode);
127 120
128 inode->i_op = &ext2_file_inode_operations; 121 ext2_set_file_ops(inode);
129 if (test_opt(inode->i_sb, NOBH)) {
130 inode->i_mapping->a_ops = &ext2_nobh_aops;
131 inode->i_fop = &ext2_file_operations;
132 } else {
133 inode->i_mapping->a_ops = &ext2_aops;
134 inode->i_fop = &ext2_file_operations;
135 }
136 mark_inode_dirty(inode); 122 mark_inode_dirty(inode);
137 d_tmpfile(dentry, inode); 123 d_tmpfile(dentry, inode);
138 unlock_new_inode(inode); 124 unlock_new_inode(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 129205028300..1e50c5efae67 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2716,12 +2716,6 @@ static int ext4_writepages(struct address_space *mapping,
2716 percpu_down_read(&sbi->s_journal_flag_rwsem); 2716 percpu_down_read(&sbi->s_journal_flag_rwsem);
2717 trace_ext4_writepages(inode, wbc); 2717 trace_ext4_writepages(inode, wbc);
2718 2718
2719 if (dax_mapping(mapping)) {
2720 ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
2721 wbc);
2722 goto out_writepages;
2723 }
2724
2725 /* 2719 /*
2726 * No pages to write? This is mainly a kludge to avoid starting 2720 * No pages to write? This is mainly a kludge to avoid starting
2727 * a transaction for special inodes like journal inode on last iput() 2721 * a transaction for special inodes like journal inode on last iput()
@@ -2942,6 +2936,27 @@ out_writepages:
2942 return ret; 2936 return ret;
2943} 2937}
2944 2938
2939static int ext4_dax_writepages(struct address_space *mapping,
2940 struct writeback_control *wbc)
2941{
2942 int ret;
2943 long nr_to_write = wbc->nr_to_write;
2944 struct inode *inode = mapping->host;
2945 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2946
2947 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
2948 return -EIO;
2949
2950 percpu_down_read(&sbi->s_journal_flag_rwsem);
2951 trace_ext4_writepages(inode, wbc);
2952
2953 ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, wbc);
2954 trace_ext4_writepages_result(inode, wbc, ret,
2955 nr_to_write - wbc->nr_to_write);
2956 percpu_up_read(&sbi->s_journal_flag_rwsem);
2957 return ret;
2958}
2959
2945static int ext4_nonda_switch(struct super_block *sb) 2960static int ext4_nonda_switch(struct super_block *sb)
2946{ 2961{
2947 s64 free_clusters, dirty_clusters; 2962 s64 free_clusters, dirty_clusters;
@@ -3845,10 +3860,6 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
3845 if (ext4_has_inline_data(inode)) 3860 if (ext4_has_inline_data(inode))
3846 return 0; 3861 return 0;
3847 3862
3848 /* DAX uses iomap path now */
3849 if (WARN_ON_ONCE(IS_DAX(inode)))
3850 return 0;
3851
3852 trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); 3863 trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
3853 if (iov_iter_rw(iter) == READ) 3864 if (iov_iter_rw(iter) == READ)
3854 ret = ext4_direct_IO_read(iocb, iter); 3865 ret = ext4_direct_IO_read(iocb, iter);
@@ -3934,6 +3945,13 @@ static const struct address_space_operations ext4_da_aops = {
3934 .error_remove_page = generic_error_remove_page, 3945 .error_remove_page = generic_error_remove_page,
3935}; 3946};
3936 3947
3948static const struct address_space_operations ext4_dax_aops = {
3949 .writepages = ext4_dax_writepages,
3950 .direct_IO = noop_direct_IO,
3951 .set_page_dirty = noop_set_page_dirty,
3952 .invalidatepage = noop_invalidatepage,
3953};
3954
3937void ext4_set_aops(struct inode *inode) 3955void ext4_set_aops(struct inode *inode)
3938{ 3956{
3939 switch (ext4_inode_journal_mode(inode)) { 3957 switch (ext4_inode_journal_mode(inode)) {
@@ -3946,7 +3964,9 @@ void ext4_set_aops(struct inode *inode)
3946 default: 3964 default:
3947 BUG(); 3965 BUG();
3948 } 3966 }
3949 if (test_opt(inode->i_sb, DELALLOC)) 3967 if (IS_DAX(inode))
3968 inode->i_mapping->a_ops = &ext4_dax_aops;
3969 else if (test_opt(inode->i_sb, DELALLOC))
3950 inode->i_mapping->a_ops = &ext4_da_aops; 3970 inode->i_mapping->a_ops = &ext4_da_aops;
3951 else 3971 else
3952 inode->i_mapping->a_ops = &ext4_aops; 3972 inode->i_mapping->a_ops = &ext4_aops;
diff --git a/fs/libfs.c b/fs/libfs.c
index 7ff3cb904acd..0fb590d79f30 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1060,6 +1060,45 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1060} 1060}
1061EXPORT_SYMBOL(noop_fsync); 1061EXPORT_SYMBOL(noop_fsync);
1062 1062
1063int noop_set_page_dirty(struct page *page)
1064{
1065 /*
1066 * Unlike __set_page_dirty_no_writeback that handles dirty page
1067 * tracking in the page object, dax does all dirty tracking in
1068 * the inode address_space in response to mkwrite faults. In the
1069 * dax case we only need to worry about potentially dirty CPU
1070 * caches, not dirty page cache pages to write back.
1071 *
1072 * This callback is defined to prevent fallback to
1073 * __set_page_dirty_buffers() in set_page_dirty().
1074 */
1075 return 0;
1076}
1077EXPORT_SYMBOL_GPL(noop_set_page_dirty);
1078
1079void noop_invalidatepage(struct page *page, unsigned int offset,
1080 unsigned int length)
1081{
1082 /*
1083 * There is no page cache to invalidate in the dax case, however
1084 * we need this callback defined to prevent falling back to
1085 * block_invalidatepage() in do_invalidatepage().
1086 */
1087}
1088EXPORT_SYMBOL_GPL(noop_invalidatepage);
1089
1090ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
1091{
1092 /*
1093 * iomap based filesystems support direct I/O without need for
1094 * this callback. However, it still needs to be set in
1095 * inode->a_ops so that open/fcntl know that direct I/O is
1096 * generally supported.
1097 */
1098 return -EINVAL;
1099}
1100EXPORT_SYMBOL_GPL(noop_direct_IO);
1101
1063/* Because kfree isn't assignment-compatible with void(void*) ;-/ */ 1102/* Because kfree isn't assignment-compatible with void(void*) ;-/ */
1064void kfree_link(void *p) 1103void kfree_link(void *p)
1065{ 1104{
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 31f1f10eecd1..436a1de3fcdf 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1195,16 +1195,22 @@ xfs_vm_writepages(
1195 int ret; 1195 int ret;
1196 1196
1197 xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 1197 xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
1198 if (dax_mapping(mapping))
1199 return dax_writeback_mapping_range(mapping,
1200 xfs_find_bdev_for_inode(mapping->host), wbc);
1201
1202 ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc); 1198 ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
1203 if (wpc.ioend) 1199 if (wpc.ioend)
1204 ret = xfs_submit_ioend(wbc, wpc.ioend, ret); 1200 ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
1205 return ret; 1201 return ret;
1206} 1202}
1207 1203
1204STATIC int
1205xfs_dax_writepages(
1206 struct address_space *mapping,
1207 struct writeback_control *wbc)
1208{
1209 xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
1210 return dax_writeback_mapping_range(mapping,
1211 xfs_find_bdev_for_inode(mapping->host), wbc);
1212}
1213
1208/* 1214/*
1209 * Called to move a page into cleanable state - and from there 1215 * Called to move a page into cleanable state - and from there
1210 * to be released. The page should already be clean. We always 1216 * to be released. The page should already be clean. We always
@@ -1367,17 +1373,6 @@ out_unlock:
1367 return error; 1373 return error;
1368} 1374}
1369 1375
1370STATIC ssize_t
1371xfs_vm_direct_IO(
1372 struct kiocb *iocb,
1373 struct iov_iter *iter)
1374{
1375 /*
1376 * We just need the method present so that open/fcntl allow direct I/O.
1377 */
1378 return -EINVAL;
1379}
1380
1381STATIC sector_t 1376STATIC sector_t
1382xfs_vm_bmap( 1377xfs_vm_bmap(
1383 struct address_space *mapping, 1378 struct address_space *mapping,
@@ -1500,8 +1495,15 @@ const struct address_space_operations xfs_address_space_operations = {
1500 .releasepage = xfs_vm_releasepage, 1495 .releasepage = xfs_vm_releasepage,
1501 .invalidatepage = xfs_vm_invalidatepage, 1496 .invalidatepage = xfs_vm_invalidatepage,
1502 .bmap = xfs_vm_bmap, 1497 .bmap = xfs_vm_bmap,
1503 .direct_IO = xfs_vm_direct_IO, 1498 .direct_IO = noop_direct_IO,
1504 .migratepage = buffer_migrate_page, 1499 .migratepage = buffer_migrate_page,
1505 .is_partially_uptodate = block_is_partially_uptodate, 1500 .is_partially_uptodate = block_is_partially_uptodate,
1506 .error_remove_page = generic_error_remove_page, 1501 .error_remove_page = generic_error_remove_page,
1507}; 1502};
1503
1504const struct address_space_operations xfs_dax_aops = {
1505 .writepages = xfs_dax_writepages,
1506 .direct_IO = noop_direct_IO,
1507 .set_page_dirty = noop_set_page_dirty,
1508 .invalidatepage = noop_invalidatepage,
1509};
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 88c85ea63da0..69346d460dfa 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -54,6 +54,7 @@ struct xfs_ioend {
54}; 54};
55 55
56extern const struct address_space_operations xfs_address_space_operations; 56extern const struct address_space_operations xfs_address_space_operations;
57extern const struct address_space_operations xfs_dax_aops;
57 58
58int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); 59int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
59 60
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index e0307fbff911..154725b1b813 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1285,7 +1285,10 @@ xfs_setup_iops(
1285 case S_IFREG: 1285 case S_IFREG:
1286 inode->i_op = &xfs_inode_operations; 1286 inode->i_op = &xfs_inode_operations;
1287 inode->i_fop = &xfs_file_operations; 1287 inode->i_fop = &xfs_file_operations;
1288 inode->i_mapping->a_ops = &xfs_address_space_operations; 1288 if (IS_DAX(inode))
1289 inode->i_mapping->a_ops = &xfs_dax_aops;
1290 else
1291 inode->i_mapping->a_ops = &xfs_address_space_operations;
1289 break; 1292 break;
1290 case S_IFDIR: 1293 case S_IFDIR:
1291 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) 1294 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 0185ecdae135..f9eb22ad341e 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -26,18 +26,42 @@ extern struct attribute_group dax_attribute_group;
26 26
27#if IS_ENABLED(CONFIG_DAX) 27#if IS_ENABLED(CONFIG_DAX)
28struct dax_device *dax_get_by_host(const char *host); 28struct dax_device *dax_get_by_host(const char *host);
29struct dax_device *alloc_dax(void *private, const char *host,
30 const struct dax_operations *ops);
29void put_dax(struct dax_device *dax_dev); 31void put_dax(struct dax_device *dax_dev);
32void kill_dax(struct dax_device *dax_dev);
33void dax_write_cache(struct dax_device *dax_dev, bool wc);
34bool dax_write_cache_enabled(struct dax_device *dax_dev);
30#else 35#else
31static inline struct dax_device *dax_get_by_host(const char *host) 36static inline struct dax_device *dax_get_by_host(const char *host)
32{ 37{
33 return NULL; 38 return NULL;
34} 39}
35 40static inline struct dax_device *alloc_dax(void *private, const char *host,
41 const struct dax_operations *ops)
42{
43 /*
44 * Callers should check IS_ENABLED(CONFIG_DAX) to know if this
45 * NULL is an error or expected.
46 */
47 return NULL;
48}
36static inline void put_dax(struct dax_device *dax_dev) 49static inline void put_dax(struct dax_device *dax_dev)
37{ 50{
38} 51}
52static inline void kill_dax(struct dax_device *dax_dev)
53{
54}
55static inline void dax_write_cache(struct dax_device *dax_dev, bool wc)
56{
57}
58static inline bool dax_write_cache_enabled(struct dax_device *dax_dev)
59{
60 return false;
61}
39#endif 62#endif
40 63
64struct writeback_control;
41int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); 65int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
42#if IS_ENABLED(CONFIG_FS_DAX) 66#if IS_ENABLED(CONFIG_FS_DAX)
43int __bdev_dax_supported(struct super_block *sb, int blocksize); 67int __bdev_dax_supported(struct super_block *sb, int blocksize);
@@ -57,6 +81,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev)
57} 81}
58 82
59struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); 83struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
84int dax_writeback_mapping_range(struct address_space *mapping,
85 struct block_device *bdev, struct writeback_control *wbc);
60#else 86#else
61static inline int bdev_dax_supported(struct super_block *sb, int blocksize) 87static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
62{ 88{
@@ -76,22 +102,23 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
76{ 102{
77 return NULL; 103 return NULL;
78} 104}
105
106static inline int dax_writeback_mapping_range(struct address_space *mapping,
107 struct block_device *bdev, struct writeback_control *wbc)
108{
109 return -EOPNOTSUPP;
110}
79#endif 111#endif
80 112
81int dax_read_lock(void); 113int dax_read_lock(void);
82void dax_read_unlock(int id); 114void dax_read_unlock(int id);
83struct dax_device *alloc_dax(void *private, const char *host,
84 const struct dax_operations *ops);
85bool dax_alive(struct dax_device *dax_dev); 115bool dax_alive(struct dax_device *dax_dev);
86void kill_dax(struct dax_device *dax_dev);
87void *dax_get_private(struct dax_device *dax_dev); 116void *dax_get_private(struct dax_device *dax_dev);
88long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, 117long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
89 void **kaddr, pfn_t *pfn); 118 void **kaddr, pfn_t *pfn);
90size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, 119size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
91 size_t bytes, struct iov_iter *i); 120 size_t bytes, struct iov_iter *i);
92void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); 121void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
93void dax_write_cache(struct dax_device *dax_dev, bool wc);
94bool dax_write_cache_enabled(struct dax_device *dax_dev);
95 122
96ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, 123ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
97 const struct iomap_ops *ops); 124 const struct iomap_ops *ops);
@@ -121,7 +148,4 @@ static inline bool dax_mapping(struct address_space *mapping)
121 return mapping->host && IS_DAX(mapping->host); 148 return mapping->host && IS_DAX(mapping->host);
122} 149}
123 150
124struct writeback_control;
125int dax_writeback_mapping_range(struct address_space *mapping,
126 struct block_device *bdev, struct writeback_control *wbc);
127#endif 151#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1ee7f592e239..2aa02cad94d4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3127,6 +3127,10 @@ extern int simple_rmdir(struct inode *, struct dentry *);
3127extern int simple_rename(struct inode *, struct dentry *, 3127extern int simple_rename(struct inode *, struct dentry *,
3128 struct inode *, struct dentry *, unsigned int); 3128 struct inode *, struct dentry *, unsigned int);
3129extern int noop_fsync(struct file *, loff_t, loff_t, int); 3129extern int noop_fsync(struct file *, loff_t, loff_t, int);
3130extern int noop_set_page_dirty(struct page *page);
3131extern void noop_invalidatepage(struct page *page, unsigned int offset,
3132 unsigned int length);
3133extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
3130extern int simple_empty(struct dentry *); 3134extern int simple_empty(struct dentry *);
3131extern int simple_readpage(struct file *file, struct page *page); 3135extern int simple_readpage(struct file *file, struct page *page);
3132extern int simple_write_begin(struct file *file, struct address_space *mapping, 3136extern int simple_write_begin(struct file *file, struct address_space *mapping,
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index ff855ed965fb..097072c5a852 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -76,12 +76,14 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
76 struct nvdimm *nvdimm, unsigned int cmd, void *buf, 76 struct nvdimm *nvdimm, unsigned int cmd, void *buf,
77 unsigned int buf_len, int *cmd_rc); 77 unsigned int buf_len, int *cmd_rc);
78 78
79struct device_node;
79struct nvdimm_bus_descriptor { 80struct nvdimm_bus_descriptor {
80 const struct attribute_group **attr_groups; 81 const struct attribute_group **attr_groups;
81 unsigned long bus_dsm_mask; 82 unsigned long bus_dsm_mask;
82 unsigned long cmd_mask; 83 unsigned long cmd_mask;
83 struct module *module; 84 struct module *module;
84 char *provider_name; 85 char *provider_name;
86 struct device_node *of_node;
85 ndctl_fn ndctl; 87 ndctl_fn ndctl;
86 int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); 88 int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
87 int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc, 89 int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc,
@@ -123,6 +125,7 @@ struct nd_region_desc {
123 int num_lanes; 125 int num_lanes;
124 int numa_node; 126 int numa_node;
125 unsigned long flags; 127 unsigned long flags;
128 struct device_node *of_node;
126}; 129};
127 130
128struct device; 131struct device;
@@ -164,6 +167,7 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
164struct nvdimm_bus *to_nvdimm_bus(struct device *dev); 167struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
165struct nvdimm *to_nvdimm(struct device *dev); 168struct nvdimm *to_nvdimm(struct device *dev);
166struct nd_region *to_nd_region(struct device *dev); 169struct nd_region *to_nd_region(struct device *dev);
170struct device *nd_region_dev(struct nd_region *nd_region);
167struct nd_blk_region *to_nd_blk_region(struct device *dev); 171struct nd_blk_region *to_nd_blk_region(struct device *dev);
168struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); 172struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
169struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus); 173struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
diff --git a/include/linux/nd.h b/include/linux/nd.h
index 5dc6b695437d..43c181a6add5 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -180,6 +180,12 @@ struct nd_region;
180void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event); 180void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event);
181int __must_check __nd_driver_register(struct nd_device_driver *nd_drv, 181int __must_check __nd_driver_register(struct nd_device_driver *nd_drv,
182 struct module *module, const char *mod_name); 182 struct module *module, const char *mod_name);
183static inline void nd_driver_unregister(struct nd_device_driver *drv)
184{
185 driver_unregister(&drv->drv);
186}
183#define nd_driver_register(driver) \ 187#define nd_driver_register(driver) \
184 __nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) 188 __nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
189#define module_nd_driver(driver) \
190 module_driver(driver, nd_driver_register, nd_driver_unregister)
185#endif /* __LINUX_ND_H__ */ 191#endif /* __LINUX_ND_H__ */
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 620fa78b3b1b..cb166be4918d 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -104,7 +104,8 @@ enum {
104 NUM_HINTS = 8, 104 NUM_HINTS = 8,
105 NUM_BDW = NUM_DCR, 105 NUM_BDW = NUM_DCR,
106 NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW, 106 NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
107 NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */, 107 NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */
108 + 4 /* spa1 iset */ + 1 /* spa11 iset */,
108 DIMM_SIZE = SZ_32M, 109 DIMM_SIZE = SZ_32M,
109 LABEL_SIZE = SZ_128K, 110 LABEL_SIZE = SZ_128K,
110 SPA_VCD_SIZE = SZ_4M, 111 SPA_VCD_SIZE = SZ_4M,
@@ -153,6 +154,7 @@ struct nfit_test {
153 void *nfit_buf; 154 void *nfit_buf;
154 dma_addr_t nfit_dma; 155 dma_addr_t nfit_dma;
155 size_t nfit_size; 156 size_t nfit_size;
157 size_t nfit_filled;
156 int dcr_idx; 158 int dcr_idx;
157 int num_dcr; 159 int num_dcr;
158 int num_pm; 160 int num_pm;
@@ -709,7 +711,9 @@ static void smart_notify(struct device *bus_dev,
709 >= thresh->media_temperature) 711 >= thresh->media_temperature)
710 || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP) 712 || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP)
711 && smart->ctrl_temperature 713 && smart->ctrl_temperature
712 >= thresh->ctrl_temperature)) { 714 >= thresh->ctrl_temperature)
715 || (smart->health != ND_INTEL_SMART_NON_CRITICAL_HEALTH)
716 || (smart->shutdown_state != 0)) {
713 device_lock(bus_dev); 717 device_lock(bus_dev);
714 __acpi_nvdimm_notify(dimm_dev, 0x81); 718 __acpi_nvdimm_notify(dimm_dev, 0x81);
715 device_unlock(bus_dev); 719 device_unlock(bus_dev);
@@ -735,6 +739,32 @@ static int nfit_test_cmd_smart_set_threshold(
735 return 0; 739 return 0;
736} 740}
737 741
742static int nfit_test_cmd_smart_inject(
743 struct nd_intel_smart_inject *inj,
744 unsigned int buf_len,
745 struct nd_intel_smart_threshold *thresh,
746 struct nd_intel_smart *smart,
747 struct device *bus_dev, struct device *dimm_dev)
748{
749 if (buf_len != sizeof(*inj))
750 return -EINVAL;
751
752 if (inj->mtemp_enable)
753 smart->media_temperature = inj->media_temperature;
754 if (inj->spare_enable)
755 smart->spares = inj->spares;
756 if (inj->fatal_enable)
757 smart->health = ND_INTEL_SMART_FATAL_HEALTH;
758 if (inj->unsafe_shutdown_enable) {
759 smart->shutdown_state = 1;
760 smart->shutdown_count++;
761 }
762 inj->status = 0;
763 smart_notify(bus_dev, dimm_dev, smart, thresh);
764
765 return 0;
766}
767
738static void uc_error_notify(struct work_struct *work) 768static void uc_error_notify(struct work_struct *work)
739{ 769{
740 struct nfit_test *t = container_of(work, typeof(*t), work); 770 struct nfit_test *t = container_of(work, typeof(*t), work);
@@ -935,6 +965,13 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
935 t->dcr_idx], 965 t->dcr_idx],
936 &t->smart[i - t->dcr_idx], 966 &t->smart[i - t->dcr_idx],
937 &t->pdev.dev, t->dimm_dev[i]); 967 &t->pdev.dev, t->dimm_dev[i]);
968 case ND_INTEL_SMART_INJECT:
969 return nfit_test_cmd_smart_inject(buf,
970 buf_len,
971 &t->smart_threshold[i -
972 t->dcr_idx],
973 &t->smart[i - t->dcr_idx],
974 &t->pdev.dev, t->dimm_dev[i]);
938 default: 975 default:
939 return -ENOTTY; 976 return -ENOTTY;
940 } 977 }
@@ -1222,7 +1259,7 @@ static void smart_init(struct nfit_test *t)
1222 | ND_INTEL_SMART_MTEMP_VALID, 1259 | ND_INTEL_SMART_MTEMP_VALID,
1223 .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH, 1260 .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
1224 .media_temperature = 23 * 16, 1261 .media_temperature = 23 * 16,
1225 .ctrl_temperature = 30 * 16, 1262 .ctrl_temperature = 25 * 16,
1226 .pmic_temperature = 40 * 16, 1263 .pmic_temperature = 40 * 16,
1227 .spares = 75, 1264 .spares = 75,
1228 .alarm_flags = ND_INTEL_SMART_SPARE_TRIP 1265 .alarm_flags = ND_INTEL_SMART_SPARE_TRIP
@@ -1366,7 +1403,7 @@ static void nfit_test0_setup(struct nfit_test *t)
1366 struct acpi_nfit_data_region *bdw; 1403 struct acpi_nfit_data_region *bdw;
1367 struct acpi_nfit_flush_address *flush; 1404 struct acpi_nfit_flush_address *flush;
1368 struct acpi_nfit_capabilities *pcap; 1405 struct acpi_nfit_capabilities *pcap;
1369 unsigned int offset, i; 1406 unsigned int offset = 0, i;
1370 1407
1371 /* 1408 /*
1372 * spa0 (interleave first half of dimm0 and dimm1, note storage 1409 * spa0 (interleave first half of dimm0 and dimm1, note storage
@@ -1380,93 +1417,102 @@ static void nfit_test0_setup(struct nfit_test *t)
1380 spa->range_index = 0+1; 1417 spa->range_index = 0+1;
1381 spa->address = t->spa_set_dma[0]; 1418 spa->address = t->spa_set_dma[0];
1382 spa->length = SPA0_SIZE; 1419 spa->length = SPA0_SIZE;
1420 offset += spa->header.length;
1383 1421
1384 /* 1422 /*
1385 * spa1 (interleave last half of the 4 DIMMS, note storage 1423 * spa1 (interleave last half of the 4 DIMMS, note storage
1386 * does not actually alias the related block-data-window 1424 * does not actually alias the related block-data-window
1387 * regions) 1425 * regions)
1388 */ 1426 */
1389 spa = nfit_buf + sizeof(*spa); 1427 spa = nfit_buf + offset;
1390 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; 1428 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
1391 spa->header.length = sizeof(*spa); 1429 spa->header.length = sizeof(*spa);
1392 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); 1430 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
1393 spa->range_index = 1+1; 1431 spa->range_index = 1+1;
1394 spa->address = t->spa_set_dma[1]; 1432 spa->address = t->spa_set_dma[1];
1395 spa->length = SPA1_SIZE; 1433 spa->length = SPA1_SIZE;
1434 offset += spa->header.length;
1396 1435
1397 /* spa2 (dcr0) dimm0 */ 1436 /* spa2 (dcr0) dimm0 */
1398 spa = nfit_buf + sizeof(*spa) * 2; 1437 spa = nfit_buf + offset;
1399 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; 1438 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
1400 spa->header.length = sizeof(*spa); 1439 spa->header.length = sizeof(*spa);
1401 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); 1440 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
1402 spa->range_index = 2+1; 1441 spa->range_index = 2+1;
1403 spa->address = t->dcr_dma[0]; 1442 spa->address = t->dcr_dma[0];
1404 spa->length = DCR_SIZE; 1443 spa->length = DCR_SIZE;
1444 offset += spa->header.length;
1405 1445
1406 /* spa3 (dcr1) dimm1 */ 1446 /* spa3 (dcr1) dimm1 */
1407 spa = nfit_buf + sizeof(*spa) * 3; 1447 spa = nfit_buf + offset;
1408 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; 1448 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
1409 spa->header.length = sizeof(*spa); 1449 spa->header.length = sizeof(*spa);
1410 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); 1450 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
1411 spa->range_index = 3+1; 1451 spa->range_index = 3+1;
1412 spa->address = t->dcr_dma[1]; 1452 spa->address = t->dcr_dma[1];
1413 spa->length = DCR_SIZE; 1453 spa->length = DCR_SIZE;
1454 offset += spa->header.length;
1414 1455
1415 /* spa4 (dcr2) dimm2 */ 1456 /* spa4 (dcr2) dimm2 */
1416 spa = nfit_buf + sizeof(*spa) * 4; 1457 spa = nfit_buf + offset;
1417 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; 1458 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
1418 spa->header.length = sizeof(*spa); 1459 spa->header.length = sizeof(*spa);
1419 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); 1460 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
1420 spa->range_index = 4+1; 1461 spa->range_index = 4+1;
1421 spa->address = t->dcr_dma[2]; 1462 spa->address = t->dcr_dma[2];
1422 spa->length = DCR_SIZE; 1463 spa->length = DCR_SIZE;
1464 offset += spa->header.length;
1423 1465
1424 /* spa5 (dcr3) dimm3 */ 1466 /* spa5 (dcr3) dimm3 */
1425 spa = nfit_buf + sizeof(*spa) * 5; 1467 spa = nfit_buf + offset;
1426 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; 1468 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
1427 spa->header.length = sizeof(*spa); 1469 spa->header.length = sizeof(*spa);
1428 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); 1470 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
1429 spa->range_index = 5+1; 1471 spa->range_index = 5+1;
1430 spa->address = t->dcr_dma[3]; 1472 spa->address = t->dcr_dma[3];
1431 spa->length = DCR_SIZE; 1473 spa->length = DCR_SIZE;
1474 offset += spa->header.length;
1432 1475
1433 /* spa6 (bdw for dcr0) dimm0 */ 1476 /* spa6 (bdw for dcr0) dimm0 */
1434 spa = nfit_buf + sizeof(*spa) * 6; 1477 spa = nfit_buf + offset;
1435 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; 1478 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
1436 spa->header.length = sizeof(*spa); 1479 spa->header.length = sizeof(*spa);
1437 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); 1480 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
1438 spa->range_index = 6+1; 1481 spa->range_index = 6+1;
1439 spa->address = t->dimm_dma[0]; 1482 spa->address = t->dimm_dma[0];
1440 spa->length = DIMM_SIZE; 1483 spa->length = DIMM_SIZE;
1484 offset += spa->header.length;
1441 1485
1442 /* spa7 (bdw for dcr1) dimm1 */ 1486 /* spa7 (bdw for dcr1) dimm1 */
1443 spa = nfit_buf + sizeof(*spa) * 7; 1487 spa = nfit_buf + offset;
1444 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; 1488 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
1445 spa->header.length = sizeof(*spa); 1489 spa->header.length = sizeof(*spa);
1446 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); 1490 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
1447 spa->range_index = 7+1; 1491 spa->range_index = 7+1;
1448 spa->address = t->dimm_dma[1]; 1492 spa->address = t->dimm_dma[1];
1449 spa->length = DIMM_SIZE; 1493 spa->length = DIMM_SIZE;
1494 offset += spa->header.length;
1450 1495
1451 /* spa8 (bdw for dcr2) dimm2 */ 1496 /* spa8 (bdw for dcr2) dimm2 */
1452 spa = nfit_buf + sizeof(*spa) * 8; 1497 spa = nfit_buf + offset;
1453 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; 1498 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
1454 spa->header.length = sizeof(*spa); 1499 spa->header.length = sizeof(*spa);
1455 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); 1500 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
1456 spa->range_index = 8+1; 1501 spa->range_index = 8+1;
1457 spa->address = t->dimm_dma[2]; 1502 spa->address = t->dimm_dma[2];
1458 spa->length = DIMM_SIZE; 1503 spa->length = DIMM_SIZE;
1504 offset += spa->header.length;
1459 1505
1460 /* spa9 (bdw for dcr3) dimm3 */ 1506 /* spa9 (bdw for dcr3) dimm3 */
1461 spa = nfit_buf + sizeof(*spa) * 9; 1507 spa = nfit_buf + offset;
1462 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; 1508 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
1463 spa->header.length = sizeof(*spa); 1509 spa->header.length = sizeof(*spa);
1464 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); 1510 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
1465 spa->range_index = 9+1; 1511 spa->range_index = 9+1;
1466 spa->address = t->dimm_dma[3]; 1512 spa->address = t->dimm_dma[3];
1467 spa->length = DIMM_SIZE; 1513 spa->length = DIMM_SIZE;
1514 offset += spa->header.length;
1468 1515
1469 offset = sizeof(*spa) * 10;
1470 /* mem-region0 (spa0, dimm0) */ 1516 /* mem-region0 (spa0, dimm0) */
1471 memdev = nfit_buf + offset; 1517 memdev = nfit_buf + offset;
1472 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 1518 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1481,9 +1527,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1481 memdev->address = 0; 1527 memdev->address = 0;
1482 memdev->interleave_index = 0; 1528 memdev->interleave_index = 0;
1483 memdev->interleave_ways = 2; 1529 memdev->interleave_ways = 2;
1530 offset += memdev->header.length;
1484 1531
1485 /* mem-region1 (spa0, dimm1) */ 1532 /* mem-region1 (spa0, dimm1) */
1486 memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map); 1533 memdev = nfit_buf + offset;
1487 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 1534 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
1488 memdev->header.length = sizeof(*memdev); 1535 memdev->header.length = sizeof(*memdev);
1489 memdev->device_handle = handle[1]; 1536 memdev->device_handle = handle[1];
@@ -1497,9 +1544,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1497 memdev->interleave_index = 0; 1544 memdev->interleave_index = 0;
1498 memdev->interleave_ways = 2; 1545 memdev->interleave_ways = 2;
1499 memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; 1546 memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
1547 offset += memdev->header.length;
1500 1548
1501 /* mem-region2 (spa1, dimm0) */ 1549 /* mem-region2 (spa1, dimm0) */
1502 memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2; 1550 memdev = nfit_buf + offset;
1503 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 1551 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
1504 memdev->header.length = sizeof(*memdev); 1552 memdev->header.length = sizeof(*memdev);
1505 memdev->device_handle = handle[0]; 1553 memdev->device_handle = handle[0];
@@ -1513,9 +1561,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1513 memdev->interleave_index = 0; 1561 memdev->interleave_index = 0;
1514 memdev->interleave_ways = 4; 1562 memdev->interleave_ways = 4;
1515 memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; 1563 memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
1564 offset += memdev->header.length;
1516 1565
1517 /* mem-region3 (spa1, dimm1) */ 1566 /* mem-region3 (spa1, dimm1) */
1518 memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3; 1567 memdev = nfit_buf + offset;
1519 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 1568 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
1520 memdev->header.length = sizeof(*memdev); 1569 memdev->header.length = sizeof(*memdev);
1521 memdev->device_handle = handle[1]; 1570 memdev->device_handle = handle[1];
@@ -1528,9 +1577,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1528 memdev->address = SPA0_SIZE/2; 1577 memdev->address = SPA0_SIZE/2;
1529 memdev->interleave_index = 0; 1578 memdev->interleave_index = 0;
1530 memdev->interleave_ways = 4; 1579 memdev->interleave_ways = 4;
1580 offset += memdev->header.length;
1531 1581
1532 /* mem-region4 (spa1, dimm2) */ 1582 /* mem-region4 (spa1, dimm2) */
1533 memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 4; 1583 memdev = nfit_buf + offset;
1534 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 1584 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
1535 memdev->header.length = sizeof(*memdev); 1585 memdev->header.length = sizeof(*memdev);
1536 memdev->device_handle = handle[2]; 1586 memdev->device_handle = handle[2];
@@ -1544,9 +1594,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1544 memdev->interleave_index = 0; 1594 memdev->interleave_index = 0;
1545 memdev->interleave_ways = 4; 1595 memdev->interleave_ways = 4;
1546 memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; 1596 memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
1597 offset += memdev->header.length;
1547 1598
1548 /* mem-region5 (spa1, dimm3) */ 1599 /* mem-region5 (spa1, dimm3) */
1549 memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5; 1600 memdev = nfit_buf + offset;
1550 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 1601 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
1551 memdev->header.length = sizeof(*memdev); 1602 memdev->header.length = sizeof(*memdev);
1552 memdev->device_handle = handle[3]; 1603 memdev->device_handle = handle[3];
@@ -1559,9 +1610,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1559 memdev->address = SPA0_SIZE/2; 1610 memdev->address = SPA0_SIZE/2;
1560 memdev->interleave_index = 0; 1611 memdev->interleave_index = 0;
1561 memdev->interleave_ways = 4; 1612 memdev->interleave_ways = 4;
1613 offset += memdev->header.length;
1562 1614
1563 /* mem-region6 (spa/dcr0, dimm0) */ 1615 /* mem-region6 (spa/dcr0, dimm0) */
1564 memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 6; 1616 memdev = nfit_buf + offset;
1565 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 1617 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
1566 memdev->header.length = sizeof(*memdev); 1618 memdev->header.length = sizeof(*memdev);
1567 memdev->device_handle = handle[0]; 1619 memdev->device_handle = handle[0];
@@ -1574,9 +1626,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1574 memdev->address = 0; 1626 memdev->address = 0;
1575 memdev->interleave_index = 0; 1627 memdev->interleave_index = 0;
1576 memdev->interleave_ways = 1; 1628 memdev->interleave_ways = 1;
1629 offset += memdev->header.length;
1577 1630
1578 /* mem-region7 (spa/dcr1, dimm1) */ 1631 /* mem-region7 (spa/dcr1, dimm1) */
1579 memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 7; 1632 memdev = nfit_buf + offset;
1580 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 1633 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
1581 memdev->header.length = sizeof(*memdev); 1634 memdev->header.length = sizeof(*memdev);
1582 memdev->device_handle = handle[1]; 1635 memdev->device_handle = handle[1];
@@ -1589,9 +1642,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1589 memdev->address = 0; 1642 memdev->address = 0;
1590 memdev->interleave_index = 0; 1643 memdev->interleave_index = 0;
1591 memdev->interleave_ways = 1; 1644 memdev->interleave_ways = 1;
1645 offset += memdev->header.length;
1592 1646
1593 /* mem-region8 (spa/dcr2, dimm2) */ 1647 /* mem-region8 (spa/dcr2, dimm2) */
1594 memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 8; 1648 memdev = nfit_buf + offset;
1595 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 1649 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
1596 memdev->header.length = sizeof(*memdev); 1650 memdev->header.length = sizeof(*memdev);
1597 memdev->device_handle = handle[2]; 1651 memdev->device_handle = handle[2];
@@ -1604,9 +1658,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1604 memdev->address = 0; 1658 memdev->address = 0;
1605 memdev->interleave_index = 0; 1659 memdev->interleave_index = 0;
1606 memdev->interleave_ways = 1; 1660 memdev->interleave_ways = 1;
1661 offset += memdev->header.length;
1607 1662
1608 /* mem-region9 (spa/dcr3, dimm3) */ 1663 /* mem-region9 (spa/dcr3, dimm3) */
1609 memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 9; 1664 memdev = nfit_buf + offset;
1610 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 1665 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
1611 memdev->header.length = sizeof(*memdev); 1666 memdev->header.length = sizeof(*memdev);
1612 memdev->device_handle = handle[3]; 1667 memdev->device_handle = handle[3];
@@ -1619,9 +1674,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1619 memdev->address = 0; 1674 memdev->address = 0;
1620 memdev->interleave_index = 0; 1675 memdev->interleave_index = 0;
1621 memdev->interleave_ways = 1; 1676 memdev->interleave_ways = 1;
1677 offset += memdev->header.length;
1622 1678
1623 /* mem-region10 (spa/bdw0, dimm0) */ 1679 /* mem-region10 (spa/bdw0, dimm0) */
1624 memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 10; 1680 memdev = nfit_buf + offset;
1625 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 1681 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
1626 memdev->header.length = sizeof(*memdev); 1682 memdev->header.length = sizeof(*memdev);
1627 memdev->device_handle = handle[0]; 1683 memdev->device_handle = handle[0];
@@ -1634,9 +1690,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1634 memdev->address = 0; 1690 memdev->address = 0;
1635 memdev->interleave_index = 0; 1691 memdev->interleave_index = 0;
1636 memdev->interleave_ways = 1; 1692 memdev->interleave_ways = 1;
1693 offset += memdev->header.length;
1637 1694
1638 /* mem-region11 (spa/bdw1, dimm1) */ 1695 /* mem-region11 (spa/bdw1, dimm1) */
1639 memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 11; 1696 memdev = nfit_buf + offset;
1640 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 1697 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
1641 memdev->header.length = sizeof(*memdev); 1698 memdev->header.length = sizeof(*memdev);
1642 memdev->device_handle = handle[1]; 1699 memdev->device_handle = handle[1];
@@ -1649,9 +1706,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1649 memdev->address = 0; 1706 memdev->address = 0;
1650 memdev->interleave_index = 0; 1707 memdev->interleave_index = 0;
1651 memdev->interleave_ways = 1; 1708 memdev->interleave_ways = 1;
1709 offset += memdev->header.length;
1652 1710
1653 /* mem-region12 (spa/bdw2, dimm2) */ 1711 /* mem-region12 (spa/bdw2, dimm2) */
1654 memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 12; 1712 memdev = nfit_buf + offset;
1655 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 1713 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
1656 memdev->header.length = sizeof(*memdev); 1714 memdev->header.length = sizeof(*memdev);
1657 memdev->device_handle = handle[2]; 1715 memdev->device_handle = handle[2];
@@ -1664,9 +1722,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1664 memdev->address = 0; 1722 memdev->address = 0;
1665 memdev->interleave_index = 0; 1723 memdev->interleave_index = 0;
1666 memdev->interleave_ways = 1; 1724 memdev->interleave_ways = 1;
1725 offset += memdev->header.length;
1667 1726
1668 /* mem-region13 (spa/dcr3, dimm3) */ 1727 /* mem-region13 (spa/dcr3, dimm3) */
1669 memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 13; 1728 memdev = nfit_buf + offset;
1670 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 1729 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
1671 memdev->header.length = sizeof(*memdev); 1730 memdev->header.length = sizeof(*memdev);
1672 memdev->device_handle = handle[3]; 1731 memdev->device_handle = handle[3];
@@ -1680,12 +1739,12 @@ static void nfit_test0_setup(struct nfit_test *t)
1680 memdev->interleave_index = 0; 1739 memdev->interleave_index = 0;
1681 memdev->interleave_ways = 1; 1740 memdev->interleave_ways = 1;
1682 memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; 1741 memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
1742 offset += memdev->header.length;
1683 1743
1684 offset = offset + sizeof(struct acpi_nfit_memory_map) * 14;
1685 /* dcr-descriptor0: blk */ 1744 /* dcr-descriptor0: blk */
1686 dcr = nfit_buf + offset; 1745 dcr = nfit_buf + offset;
1687 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1746 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
1688 dcr->header.length = sizeof(struct acpi_nfit_control_region); 1747 dcr->header.length = sizeof(*dcr);
1689 dcr->region_index = 0+1; 1748 dcr->region_index = 0+1;
1690 dcr_common_init(dcr); 1749 dcr_common_init(dcr);
1691 dcr->serial_number = ~handle[0]; 1750 dcr->serial_number = ~handle[0];
@@ -1696,11 +1755,12 @@ static void nfit_test0_setup(struct nfit_test *t)
1696 dcr->command_size = 8; 1755 dcr->command_size = 8;
1697 dcr->status_offset = 8; 1756 dcr->status_offset = 8;
1698 dcr->status_size = 4; 1757 dcr->status_size = 4;
1758 offset += dcr->header.length;
1699 1759
1700 /* dcr-descriptor1: blk */ 1760 /* dcr-descriptor1: blk */
1701 dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region); 1761 dcr = nfit_buf + offset;
1702 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1762 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
1703 dcr->header.length = sizeof(struct acpi_nfit_control_region); 1763 dcr->header.length = sizeof(*dcr);
1704 dcr->region_index = 1+1; 1764 dcr->region_index = 1+1;
1705 dcr_common_init(dcr); 1765 dcr_common_init(dcr);
1706 dcr->serial_number = ~handle[1]; 1766 dcr->serial_number = ~handle[1];
@@ -1711,11 +1771,12 @@ static void nfit_test0_setup(struct nfit_test *t)
1711 dcr->command_size = 8; 1771 dcr->command_size = 8;
1712 dcr->status_offset = 8; 1772 dcr->status_offset = 8;
1713 dcr->status_size = 4; 1773 dcr->status_size = 4;
1774 offset += dcr->header.length;
1714 1775
1715 /* dcr-descriptor2: blk */ 1776 /* dcr-descriptor2: blk */
1716 dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2; 1777 dcr = nfit_buf + offset;
1717 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1778 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
1718 dcr->header.length = sizeof(struct acpi_nfit_control_region); 1779 dcr->header.length = sizeof(*dcr);
1719 dcr->region_index = 2+1; 1780 dcr->region_index = 2+1;
1720 dcr_common_init(dcr); 1781 dcr_common_init(dcr);
1721 dcr->serial_number = ~handle[2]; 1782 dcr->serial_number = ~handle[2];
@@ -1726,11 +1787,12 @@ static void nfit_test0_setup(struct nfit_test *t)
1726 dcr->command_size = 8; 1787 dcr->command_size = 8;
1727 dcr->status_offset = 8; 1788 dcr->status_offset = 8;
1728 dcr->status_size = 4; 1789 dcr->status_size = 4;
1790 offset += dcr->header.length;
1729 1791
1730 /* dcr-descriptor3: blk */ 1792 /* dcr-descriptor3: blk */
1731 dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3; 1793 dcr = nfit_buf + offset;
1732 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1794 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
1733 dcr->header.length = sizeof(struct acpi_nfit_control_region); 1795 dcr->header.length = sizeof(*dcr);
1734 dcr->region_index = 3+1; 1796 dcr->region_index = 3+1;
1735 dcr_common_init(dcr); 1797 dcr_common_init(dcr);
1736 dcr->serial_number = ~handle[3]; 1798 dcr->serial_number = ~handle[3];
@@ -1741,8 +1803,8 @@ static void nfit_test0_setup(struct nfit_test *t)
1741 dcr->command_size = 8; 1803 dcr->command_size = 8;
1742 dcr->status_offset = 8; 1804 dcr->status_offset = 8;
1743 dcr->status_size = 4; 1805 dcr->status_size = 4;
1806 offset += dcr->header.length;
1744 1807
1745 offset = offset + sizeof(struct acpi_nfit_control_region) * 4;
1746 /* dcr-descriptor0: pmem */ 1808 /* dcr-descriptor0: pmem */
1747 dcr = nfit_buf + offset; 1809 dcr = nfit_buf + offset;
1748 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1810 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -1753,10 +1815,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1753 dcr->serial_number = ~handle[0]; 1815 dcr->serial_number = ~handle[0];
1754 dcr->code = NFIT_FIC_BYTEN; 1816 dcr->code = NFIT_FIC_BYTEN;
1755 dcr->windows = 0; 1817 dcr->windows = 0;
1818 offset += dcr->header.length;
1756 1819
1757 /* dcr-descriptor1: pmem */ 1820 /* dcr-descriptor1: pmem */
1758 dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, 1821 dcr = nfit_buf + offset;
1759 window_size);
1760 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1822 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
1761 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1823 dcr->header.length = offsetof(struct acpi_nfit_control_region,
1762 window_size); 1824 window_size);
@@ -1765,10 +1827,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1765 dcr->serial_number = ~handle[1]; 1827 dcr->serial_number = ~handle[1];
1766 dcr->code = NFIT_FIC_BYTEN; 1828 dcr->code = NFIT_FIC_BYTEN;
1767 dcr->windows = 0; 1829 dcr->windows = 0;
1830 offset += dcr->header.length;
1768 1831
1769 /* dcr-descriptor2: pmem */ 1832 /* dcr-descriptor2: pmem */
1770 dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, 1833 dcr = nfit_buf + offset;
1771 window_size) * 2;
1772 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1834 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
1773 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1835 dcr->header.length = offsetof(struct acpi_nfit_control_region,
1774 window_size); 1836 window_size);
@@ -1777,10 +1839,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1777 dcr->serial_number = ~handle[2]; 1839 dcr->serial_number = ~handle[2];
1778 dcr->code = NFIT_FIC_BYTEN; 1840 dcr->code = NFIT_FIC_BYTEN;
1779 dcr->windows = 0; 1841 dcr->windows = 0;
1842 offset += dcr->header.length;
1780 1843
1781 /* dcr-descriptor3: pmem */ 1844 /* dcr-descriptor3: pmem */
1782 dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, 1845 dcr = nfit_buf + offset;
1783 window_size) * 3;
1784 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1846 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
1785 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1847 dcr->header.length = offsetof(struct acpi_nfit_control_region,
1786 window_size); 1848 window_size);
@@ -1789,54 +1851,56 @@ static void nfit_test0_setup(struct nfit_test *t)
1789 dcr->serial_number = ~handle[3]; 1851 dcr->serial_number = ~handle[3];
1790 dcr->code = NFIT_FIC_BYTEN; 1852 dcr->code = NFIT_FIC_BYTEN;
1791 dcr->windows = 0; 1853 dcr->windows = 0;
1854 offset += dcr->header.length;
1792 1855
1793 offset = offset + offsetof(struct acpi_nfit_control_region,
1794 window_size) * 4;
1795 /* bdw0 (spa/dcr0, dimm0) */ 1856 /* bdw0 (spa/dcr0, dimm0) */
1796 bdw = nfit_buf + offset; 1857 bdw = nfit_buf + offset;
1797 bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; 1858 bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
1798 bdw->header.length = sizeof(struct acpi_nfit_data_region); 1859 bdw->header.length = sizeof(*bdw);
1799 bdw->region_index = 0+1; 1860 bdw->region_index = 0+1;
1800 bdw->windows = 1; 1861 bdw->windows = 1;
1801 bdw->offset = 0; 1862 bdw->offset = 0;
1802 bdw->size = BDW_SIZE; 1863 bdw->size = BDW_SIZE;
1803 bdw->capacity = DIMM_SIZE; 1864 bdw->capacity = DIMM_SIZE;
1804 bdw->start_address = 0; 1865 bdw->start_address = 0;
1866 offset += bdw->header.length;
1805 1867
1806 /* bdw1 (spa/dcr1, dimm1) */ 1868 /* bdw1 (spa/dcr1, dimm1) */
1807 bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region); 1869 bdw = nfit_buf + offset;
1808 bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; 1870 bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
1809 bdw->header.length = sizeof(struct acpi_nfit_data_region); 1871 bdw->header.length = sizeof(*bdw);
1810 bdw->region_index = 1+1; 1872 bdw->region_index = 1+1;
1811 bdw->windows = 1; 1873 bdw->windows = 1;
1812 bdw->offset = 0; 1874 bdw->offset = 0;
1813 bdw->size = BDW_SIZE; 1875 bdw->size = BDW_SIZE;
1814 bdw->capacity = DIMM_SIZE; 1876 bdw->capacity = DIMM_SIZE;
1815 bdw->start_address = 0; 1877 bdw->start_address = 0;
1878 offset += bdw->header.length;
1816 1879
1817 /* bdw2 (spa/dcr2, dimm2) */ 1880 /* bdw2 (spa/dcr2, dimm2) */
1818 bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 2; 1881 bdw = nfit_buf + offset;
1819 bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; 1882 bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
1820 bdw->header.length = sizeof(struct acpi_nfit_data_region); 1883 bdw->header.length = sizeof(*bdw);
1821 bdw->region_index = 2+1; 1884 bdw->region_index = 2+1;
1822 bdw->windows = 1; 1885 bdw->windows = 1;
1823 bdw->offset = 0; 1886 bdw->offset = 0;
1824 bdw->size = BDW_SIZE; 1887 bdw->size = BDW_SIZE;
1825 bdw->capacity = DIMM_SIZE; 1888 bdw->capacity = DIMM_SIZE;
1826 bdw->start_address = 0; 1889 bdw->start_address = 0;
1890 offset += bdw->header.length;
1827 1891
1828 /* bdw3 (spa/dcr3, dimm3) */ 1892 /* bdw3 (spa/dcr3, dimm3) */
1829 bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 3; 1893 bdw = nfit_buf + offset;
1830 bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; 1894 bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
1831 bdw->header.length = sizeof(struct acpi_nfit_data_region); 1895 bdw->header.length = sizeof(*bdw);
1832 bdw->region_index = 3+1; 1896 bdw->region_index = 3+1;
1833 bdw->windows = 1; 1897 bdw->windows = 1;
1834 bdw->offset = 0; 1898 bdw->offset = 0;
1835 bdw->size = BDW_SIZE; 1899 bdw->size = BDW_SIZE;
1836 bdw->capacity = DIMM_SIZE; 1900 bdw->capacity = DIMM_SIZE;
1837 bdw->start_address = 0; 1901 bdw->start_address = 0;
1902 offset += bdw->header.length;
1838 1903
1839 offset = offset + sizeof(struct acpi_nfit_data_region) * 4;
1840 /* flush0 (dimm0) */ 1904 /* flush0 (dimm0) */
1841 flush = nfit_buf + offset; 1905 flush = nfit_buf + offset;
1842 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1906 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
@@ -1845,48 +1909,52 @@ static void nfit_test0_setup(struct nfit_test *t)
1845 flush->hint_count = NUM_HINTS; 1909 flush->hint_count = NUM_HINTS;
1846 for (i = 0; i < NUM_HINTS; i++) 1910 for (i = 0; i < NUM_HINTS; i++)
1847 flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64); 1911 flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64);
1912 offset += flush->header.length;
1848 1913
1849 /* flush1 (dimm1) */ 1914 /* flush1 (dimm1) */
1850 flush = nfit_buf + offset + flush_hint_size * 1; 1915 flush = nfit_buf + offset;
1851 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1916 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
1852 flush->header.length = flush_hint_size; 1917 flush->header.length = flush_hint_size;
1853 flush->device_handle = handle[1]; 1918 flush->device_handle = handle[1];
1854 flush->hint_count = NUM_HINTS; 1919 flush->hint_count = NUM_HINTS;
1855 for (i = 0; i < NUM_HINTS; i++) 1920 for (i = 0; i < NUM_HINTS; i++)
1856 flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64); 1921 flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64);
1922 offset += flush->header.length;
1857 1923
1858 /* flush2 (dimm2) */ 1924 /* flush2 (dimm2) */
1859 flush = nfit_buf + offset + flush_hint_size * 2; 1925 flush = nfit_buf + offset;
1860 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1926 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
1861 flush->header.length = flush_hint_size; 1927 flush->header.length = flush_hint_size;
1862 flush->device_handle = handle[2]; 1928 flush->device_handle = handle[2];
1863 flush->hint_count = NUM_HINTS; 1929 flush->hint_count = NUM_HINTS;
1864 for (i = 0; i < NUM_HINTS; i++) 1930 for (i = 0; i < NUM_HINTS; i++)
1865 flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64); 1931 flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64);
1932 offset += flush->header.length;
1866 1933
1867 /* flush3 (dimm3) */ 1934 /* flush3 (dimm3) */
1868 flush = nfit_buf + offset + flush_hint_size * 3; 1935 flush = nfit_buf + offset;
1869 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1936 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
1870 flush->header.length = flush_hint_size; 1937 flush->header.length = flush_hint_size;
1871 flush->device_handle = handle[3]; 1938 flush->device_handle = handle[3];
1872 flush->hint_count = NUM_HINTS; 1939 flush->hint_count = NUM_HINTS;
1873 for (i = 0; i < NUM_HINTS; i++) 1940 for (i = 0; i < NUM_HINTS; i++)
1874 flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64); 1941 flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
1942 offset += flush->header.length;
1875 1943
1876 /* platform capabilities */ 1944 /* platform capabilities */
1877 pcap = nfit_buf + offset + flush_hint_size * 4; 1945 pcap = nfit_buf + offset;
1878 pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES; 1946 pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES;
1879 pcap->header.length = sizeof(*pcap); 1947 pcap->header.length = sizeof(*pcap);
1880 pcap->highest_capability = 1; 1948 pcap->highest_capability = 1;
1881 pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH | 1949 pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH |
1882 ACPI_NFIT_CAPABILITY_MEM_FLUSH; 1950 ACPI_NFIT_CAPABILITY_MEM_FLUSH;
1951 offset += pcap->header.length;
1883 1952
1884 if (t->setup_hotplug) { 1953 if (t->setup_hotplug) {
1885 offset = offset + flush_hint_size * 4 + sizeof(*pcap);
1886 /* dcr-descriptor4: blk */ 1954 /* dcr-descriptor4: blk */
1887 dcr = nfit_buf + offset; 1955 dcr = nfit_buf + offset;
1888 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1956 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
1889 dcr->header.length = sizeof(struct acpi_nfit_control_region); 1957 dcr->header.length = sizeof(*dcr);
1890 dcr->region_index = 8+1; 1958 dcr->region_index = 8+1;
1891 dcr_common_init(dcr); 1959 dcr_common_init(dcr);
1892 dcr->serial_number = ~handle[4]; 1960 dcr->serial_number = ~handle[4];
@@ -1897,8 +1965,8 @@ static void nfit_test0_setup(struct nfit_test *t)
1897 dcr->command_size = 8; 1965 dcr->command_size = 8;
1898 dcr->status_offset = 8; 1966 dcr->status_offset = 8;
1899 dcr->status_size = 4; 1967 dcr->status_size = 4;
1968 offset += dcr->header.length;
1900 1969
1901 offset = offset + sizeof(struct acpi_nfit_control_region);
1902 /* dcr-descriptor4: pmem */ 1970 /* dcr-descriptor4: pmem */
1903 dcr = nfit_buf + offset; 1971 dcr = nfit_buf + offset;
1904 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1972 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -1909,21 +1977,20 @@ static void nfit_test0_setup(struct nfit_test *t)
1909 dcr->serial_number = ~handle[4]; 1977 dcr->serial_number = ~handle[4];
1910 dcr->code = NFIT_FIC_BYTEN; 1978 dcr->code = NFIT_FIC_BYTEN;
1911 dcr->windows = 0; 1979 dcr->windows = 0;
1980 offset += dcr->header.length;
1912 1981
1913 offset = offset + offsetof(struct acpi_nfit_control_region,
1914 window_size);
1915 /* bdw4 (spa/dcr4, dimm4) */ 1982 /* bdw4 (spa/dcr4, dimm4) */
1916 bdw = nfit_buf + offset; 1983 bdw = nfit_buf + offset;
1917 bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; 1984 bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
1918 bdw->header.length = sizeof(struct acpi_nfit_data_region); 1985 bdw->header.length = sizeof(*bdw);
1919 bdw->region_index = 8+1; 1986 bdw->region_index = 8+1;
1920 bdw->windows = 1; 1987 bdw->windows = 1;
1921 bdw->offset = 0; 1988 bdw->offset = 0;
1922 bdw->size = BDW_SIZE; 1989 bdw->size = BDW_SIZE;
1923 bdw->capacity = DIMM_SIZE; 1990 bdw->capacity = DIMM_SIZE;
1924 bdw->start_address = 0; 1991 bdw->start_address = 0;
1992 offset += bdw->header.length;
1925 1993
1926 offset = offset + sizeof(struct acpi_nfit_data_region);
1927 /* spa10 (dcr4) dimm4 */ 1994 /* spa10 (dcr4) dimm4 */
1928 spa = nfit_buf + offset; 1995 spa = nfit_buf + offset;
1929 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; 1996 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
@@ -1932,30 +1999,32 @@ static void nfit_test0_setup(struct nfit_test *t)
1932 spa->range_index = 10+1; 1999 spa->range_index = 10+1;
1933 spa->address = t->dcr_dma[4]; 2000 spa->address = t->dcr_dma[4];
1934 spa->length = DCR_SIZE; 2001 spa->length = DCR_SIZE;
2002 offset += spa->header.length;
1935 2003
1936 /* 2004 /*
1937 * spa11 (single-dimm interleave for hotplug, note storage 2005 * spa11 (single-dimm interleave for hotplug, note storage
1938 * does not actually alias the related block-data-window 2006 * does not actually alias the related block-data-window
1939 * regions) 2007 * regions)
1940 */ 2008 */
1941 spa = nfit_buf + offset + sizeof(*spa); 2009 spa = nfit_buf + offset;
1942 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; 2010 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
1943 spa->header.length = sizeof(*spa); 2011 spa->header.length = sizeof(*spa);
1944 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); 2012 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
1945 spa->range_index = 11+1; 2013 spa->range_index = 11+1;
1946 spa->address = t->spa_set_dma[2]; 2014 spa->address = t->spa_set_dma[2];
1947 spa->length = SPA0_SIZE; 2015 spa->length = SPA0_SIZE;
2016 offset += spa->header.length;
1948 2017
1949 /* spa12 (bdw for dcr4) dimm4 */ 2018 /* spa12 (bdw for dcr4) dimm4 */
1950 spa = nfit_buf + offset + sizeof(*spa) * 2; 2019 spa = nfit_buf + offset;
1951 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; 2020 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
1952 spa->header.length = sizeof(*spa); 2021 spa->header.length = sizeof(*spa);
1953 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); 2022 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
1954 spa->range_index = 12+1; 2023 spa->range_index = 12+1;
1955 spa->address = t->dimm_dma[4]; 2024 spa->address = t->dimm_dma[4];
1956 spa->length = DIMM_SIZE; 2025 spa->length = DIMM_SIZE;
2026 offset += spa->header.length;
1957 2027
1958 offset = offset + sizeof(*spa) * 3;
1959 /* mem-region14 (spa/dcr4, dimm4) */ 2028 /* mem-region14 (spa/dcr4, dimm4) */
1960 memdev = nfit_buf + offset; 2029 memdev = nfit_buf + offset;
1961 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 2030 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1970,10 +2039,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1970 memdev->address = 0; 2039 memdev->address = 0;
1971 memdev->interleave_index = 0; 2040 memdev->interleave_index = 0;
1972 memdev->interleave_ways = 1; 2041 memdev->interleave_ways = 1;
2042 offset += memdev->header.length;
1973 2043
1974 /* mem-region15 (spa0, dimm4) */ 2044 /* mem-region15 (spa11, dimm4) */
1975 memdev = nfit_buf + offset + 2045 memdev = nfit_buf + offset;
1976 sizeof(struct acpi_nfit_memory_map);
1977 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 2046 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
1978 memdev->header.length = sizeof(*memdev); 2047 memdev->header.length = sizeof(*memdev);
1979 memdev->device_handle = handle[4]; 2048 memdev->device_handle = handle[4];
@@ -1987,10 +2056,10 @@ static void nfit_test0_setup(struct nfit_test *t)
1987 memdev->interleave_index = 0; 2056 memdev->interleave_index = 0;
1988 memdev->interleave_ways = 1; 2057 memdev->interleave_ways = 1;
1989 memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; 2058 memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
2059 offset += memdev->header.length;
1990 2060
1991 /* mem-region16 (spa/bdw4, dimm4) */ 2061 /* mem-region16 (spa/bdw4, dimm4) */
1992 memdev = nfit_buf + offset + 2062 memdev = nfit_buf + offset;
1993 sizeof(struct acpi_nfit_memory_map) * 2;
1994 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 2063 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
1995 memdev->header.length = sizeof(*memdev); 2064 memdev->header.length = sizeof(*memdev);
1996 memdev->device_handle = handle[4]; 2065 memdev->device_handle = handle[4];
@@ -2003,8 +2072,8 @@ static void nfit_test0_setup(struct nfit_test *t)
2003 memdev->address = 0; 2072 memdev->address = 0;
2004 memdev->interleave_index = 0; 2073 memdev->interleave_index = 0;
2005 memdev->interleave_ways = 1; 2074 memdev->interleave_ways = 1;
2075 offset += memdev->header.length;
2006 2076
2007 offset = offset + sizeof(struct acpi_nfit_memory_map) * 3;
2008 /* flush3 (dimm4) */ 2077 /* flush3 (dimm4) */
2009 flush = nfit_buf + offset; 2078 flush = nfit_buf + offset;
2010 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 2079 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
@@ -2014,8 +2083,14 @@ static void nfit_test0_setup(struct nfit_test *t)
2014 for (i = 0; i < NUM_HINTS; i++) 2083 for (i = 0; i < NUM_HINTS; i++)
2015 flush->hint_address[i] = t->flush_dma[4] 2084 flush->hint_address[i] = t->flush_dma[4]
2016 + i * sizeof(u64); 2085 + i * sizeof(u64);
2086 offset += flush->header.length;
2087
2088 /* sanity check to make sure we've filled the buffer */
2089 WARN_ON(offset != t->nfit_size);
2017 } 2090 }
2018 2091
2092 t->nfit_filled = offset;
2093
2019 post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0], 2094 post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
2020 SPA0_SIZE); 2095 SPA0_SIZE);
2021 2096
@@ -2026,6 +2101,7 @@ static void nfit_test0_setup(struct nfit_test *t)
2026 set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en); 2101 set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en);
2027 set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); 2102 set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
2028 set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en); 2103 set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
2104 set_bit(ND_INTEL_SMART_INJECT, &acpi_desc->dimm_cmd_force_en);
2029 set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); 2105 set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
2030 set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); 2106 set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
2031 set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); 2107 set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
@@ -2061,17 +2137,18 @@ static void nfit_test1_setup(struct nfit_test *t)
2061 spa->range_index = 0+1; 2137 spa->range_index = 0+1;
2062 spa->address = t->spa_set_dma[0]; 2138 spa->address = t->spa_set_dma[0];
2063 spa->length = SPA2_SIZE; 2139 spa->length = SPA2_SIZE;
2140 offset += spa->header.length;
2064 2141
2065 /* virtual cd region */ 2142 /* virtual cd region */
2066 spa = nfit_buf + sizeof(*spa); 2143 spa = nfit_buf + offset;
2067 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; 2144 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
2068 spa->header.length = sizeof(*spa); 2145 spa->header.length = sizeof(*spa);
2069 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16); 2146 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
2070 spa->range_index = 0; 2147 spa->range_index = 0;
2071 spa->address = t->spa_set_dma[1]; 2148 spa->address = t->spa_set_dma[1];
2072 spa->length = SPA_VCD_SIZE; 2149 spa->length = SPA_VCD_SIZE;
2150 offset += spa->header.length;
2073 2151
2074 offset += sizeof(*spa) * 2;
2075 /* mem-region0 (spa0, dimm0) */ 2152 /* mem-region0 (spa0, dimm0) */
2076 memdev = nfit_buf + offset; 2153 memdev = nfit_buf + offset;
2077 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 2154 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -2089,8 +2166,8 @@ static void nfit_test1_setup(struct nfit_test *t)
2089 memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED 2166 memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED
2090 | ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED 2167 | ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED
2091 | ACPI_NFIT_MEM_NOT_ARMED; 2168 | ACPI_NFIT_MEM_NOT_ARMED;
2169 offset += memdev->header.length;
2092 2170
2093 offset += sizeof(*memdev);
2094 /* dcr-descriptor0 */ 2171 /* dcr-descriptor0 */
2095 dcr = nfit_buf + offset; 2172 dcr = nfit_buf + offset;
2096 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 2173 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -2101,8 +2178,8 @@ static void nfit_test1_setup(struct nfit_test *t)
2101 dcr->serial_number = ~handle[5]; 2178 dcr->serial_number = ~handle[5];
2102 dcr->code = NFIT_FIC_BYTE; 2179 dcr->code = NFIT_FIC_BYTE;
2103 dcr->windows = 0; 2180 dcr->windows = 0;
2104
2105 offset += dcr->header.length; 2181 offset += dcr->header.length;
2182
2106 memdev = nfit_buf + offset; 2183 memdev = nfit_buf + offset;
2107 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 2184 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
2108 memdev->header.length = sizeof(*memdev); 2185 memdev->header.length = sizeof(*memdev);
@@ -2117,9 +2194,9 @@ static void nfit_test1_setup(struct nfit_test *t)
2117 memdev->interleave_index = 0; 2194 memdev->interleave_index = 0;
2118 memdev->interleave_ways = 1; 2195 memdev->interleave_ways = 1;
2119 memdev->flags = ACPI_NFIT_MEM_MAP_FAILED; 2196 memdev->flags = ACPI_NFIT_MEM_MAP_FAILED;
2197 offset += memdev->header.length;
2120 2198
2121 /* dcr-descriptor1 */ 2199 /* dcr-descriptor1 */
2122 offset += sizeof(*memdev);
2123 dcr = nfit_buf + offset; 2200 dcr = nfit_buf + offset;
2124 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 2201 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
2125 dcr->header.length = offsetof(struct acpi_nfit_control_region, 2202 dcr->header.length = offsetof(struct acpi_nfit_control_region,
@@ -2129,6 +2206,12 @@ static void nfit_test1_setup(struct nfit_test *t)
2129 dcr->serial_number = ~handle[6]; 2206 dcr->serial_number = ~handle[6];
2130 dcr->code = NFIT_FIC_BYTE; 2207 dcr->code = NFIT_FIC_BYTE;
2131 dcr->windows = 0; 2208 dcr->windows = 0;
2209 offset += dcr->header.length;
2210
2211 /* sanity check to make sure we've filled the buffer */
2212 WARN_ON(offset != t->nfit_size);
2213
2214 t->nfit_filled = offset;
2132 2215
2133 post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0], 2216 post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
2134 SPA2_SIZE); 2217 SPA2_SIZE);
@@ -2487,7 +2570,7 @@ static int nfit_test_probe(struct platform_device *pdev)
2487 nd_desc->ndctl = nfit_test_ctl; 2570 nd_desc->ndctl = nfit_test_ctl;
2488 2571
2489 rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf, 2572 rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
2490 nfit_test->nfit_size); 2573 nfit_test->nfit_filled);
2491 if (rc) 2574 if (rc)
2492 return rc; 2575 return rc;
2493 2576
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index 428344519cdf..33752e06ff8d 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -93,6 +93,7 @@ struct nd_cmd_ars_err_inj_stat {
93#define ND_INTEL_FW_FINISH_UPDATE 15 93#define ND_INTEL_FW_FINISH_UPDATE 15
94#define ND_INTEL_FW_FINISH_QUERY 16 94#define ND_INTEL_FW_FINISH_QUERY 16
95#define ND_INTEL_SMART_SET_THRESHOLD 17 95#define ND_INTEL_SMART_SET_THRESHOLD 17
96#define ND_INTEL_SMART_INJECT 18
96 97
97#define ND_INTEL_SMART_HEALTH_VALID (1 << 0) 98#define ND_INTEL_SMART_HEALTH_VALID (1 << 0)
98#define ND_INTEL_SMART_SPARES_VALID (1 << 1) 99#define ND_INTEL_SMART_SPARES_VALID (1 << 1)
@@ -111,6 +112,10 @@ struct nd_cmd_ars_err_inj_stat {
111#define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0) 112#define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0)
112#define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1) 113#define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1)
113#define ND_INTEL_SMART_FATAL_HEALTH (1 << 2) 114#define ND_INTEL_SMART_FATAL_HEALTH (1 << 2)
115#define ND_INTEL_SMART_INJECT_MTEMP (1 << 0)
116#define ND_INTEL_SMART_INJECT_SPARE (1 << 1)
117#define ND_INTEL_SMART_INJECT_FATAL (1 << 2)
118#define ND_INTEL_SMART_INJECT_SHUTDOWN (1 << 3)
114 119
115struct nd_intel_smart { 120struct nd_intel_smart {
116 __u32 status; 121 __u32 status;
@@ -158,6 +163,17 @@ struct nd_intel_smart_set_threshold {
158 __u32 status; 163 __u32 status;
159} __packed; 164} __packed;
160 165
166struct nd_intel_smart_inject {
167 __u64 flags;
168 __u8 mtemp_enable;
169 __u16 media_temperature;
170 __u8 spare_enable;
171 __u8 spares;
172 __u8 fatal_enable;
173 __u8 unsafe_shutdown_enable;
174 __u32 status;
175} __packed;
176
161#define INTEL_FW_STORAGE_SIZE 0x100000 177#define INTEL_FW_STORAGE_SIZE 0x100000
162#define INTEL_FW_MAX_SEND_LEN 0xFFEC 178#define INTEL_FW_MAX_SEND_LEN 0xFFEC
163#define INTEL_FW_QUERY_INTERVAL 250000 179#define INTEL_FW_QUERY_INTERVAL 250000