aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-23 14:18:01 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-23 14:18:01 -0400
commit1f40c49570eb01436786a9b5845c4469a9a1f362 (patch)
treef0a31705d5c0a65604784d9b01841c453055d62f
parent7639dad93a5564579987abded4ec05e3db13659d (diff)
parent36092ee8ba695fce023b2118ececa6c2a56b1331 (diff)
Merge tag 'libnvdimm-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams: "The bulk of this update was stabilized before the merge window and appeared in -next. The "device dax" implementation was revised this week in response to review feedback, and to address failures detected by the recently expanded ndctl unit test suite. Not included in this pull request are two dax topic branches (dax error handling, and dax radix-tree locking). These topics were deferred to get a few more days of -next integration testing, and to coordinate a branch baseline with Ted and the ext4 tree. Vishal and Ross will send the error handling and locking topics respectively in the next few days. This branch has received a positive build result from the kbuild robot across 226 configs. Summary: - Device DAX for persistent memory: Device DAX is the device-centric analogue of Filesystem DAX (CONFIG_FS_DAX). It allows memory ranges to be allocated and mapped without need of an intervening file system. Device DAX is strict, precise and predictable. Specifically this interface: a) Guarantees fault granularity with respect to a given page size (pte, pmd, or pud) set at configuration time. b) Enforces deterministic behavior by being strict about what fault scenarios are supported. Persistent memory is the first target, but the mechanism is also targeted for exclusive allocations of performance/feature differentiated memory ranges. - Support for the HPE DSM (device specific method) command formats. This enables management of these first generation devices until a unified DSM specification materializes. - Further ACPI 6.1 compliance with support for the common dimm identifier format. - Various fixes and cleanups across the subsystem" * tag 'libnvdimm-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (40 commits) libnvdimm, dax: fix deletion libnvdimm, dax: fix alignment validation libnvdimm, dax: autodetect support libnvdimm: release ida resources Revert "block: enable dax for raw block devices" /dev/dax, core: file operations and dax-mmap /dev/dax, pmem: direct access to persistent memory libnvdimm: stop requiring a driver ->remove() method libnvdimm, dax: record the specified alignment of a dax-device instance libnvdimm, dax: reserve space to store labels for device-dax libnvdimm, dax: introduce device-dax infrastructure nfit: add sysfs dimm 'family' and 'dsm_mask' attributes tools/testing/nvdimm: ND_CMD_CALL support nfit: disable vendor specific commands nfit: export subsystem ids as attributes nfit: fix format interface code byte order per ACPI6.1 nfit, libnvdimm: limited/whitelisted dimm command marshaling mechanism nfit, libnvdimm: clarify "commands" vs "_DSMs" libnvdimm: increase max envelope size for ioctl acpi/nfit: Add sysfs "id" for NVDIMM ID ...
-rw-r--r--block/ioctl.c32
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/acpi/nfit.c282
-rw-r--r--drivers/acpi/nfit.h31
-rw-r--r--drivers/acpi/utils.c4
-rw-r--r--drivers/dax/Kconfig26
-rw-r--r--drivers/dax/Makefile4
-rw-r--r--drivers/dax/dax.c575
-rw-r--r--drivers/dax/dax.h24
-rw-r--r--drivers/dax/pmem.c158
-rw-r--r--drivers/nvdimm/Kconfig13
-rw-r--r--drivers/nvdimm/Makefile1
-rw-r--r--drivers/nvdimm/blk.c208
-rw-r--r--drivers/nvdimm/btt.c26
-rw-r--r--drivers/nvdimm/btt_devs.c24
-rw-r--r--drivers/nvdimm/bus.c63
-rw-r--r--drivers/nvdimm/claim.c86
-rw-r--r--drivers/nvdimm/core.c5
-rw-r--r--drivers/nvdimm/dax_devs.c134
-rw-r--r--drivers/nvdimm/dimm_devs.c23
-rw-r--r--drivers/nvdimm/namespace_devs.c38
-rw-r--r--drivers/nvdimm/nd-core.h6
-rw-r--r--drivers/nvdimm/nd.h83
-rw-r--r--drivers/nvdimm/pfn.h5
-rw-r--r--drivers/nvdimm/pfn_devs.c315
-rw-r--r--drivers/nvdimm/pmem.c493
-rw-r--r--drivers/nvdimm/region.c2
-rw-r--r--drivers/nvdimm/region_devs.c34
-rw-r--r--fs/block_dev.c96
-rw-r--r--include/acpi/acpi_bus.h6
-rw-r--r--include/linux/fs.h8
-rw-r--r--include/linux/libnvdimm.h7
-rw-r--r--include/linux/nd.h11
-rw-r--r--include/uapi/linux/fs.h1
-rw-r--r--include/uapi/linux/ndctl.h80
-rw-r--r--mm/huge_memory.c1
-rw-r--r--mm/hugetlb.c1
-rw-r--r--tools/testing/nvdimm/Kbuild11
-rw-r--r--tools/testing/nvdimm/config_check.c2
-rw-r--r--tools/testing/nvdimm/test/iomap.c27
-rw-r--r--tools/testing/nvdimm/test/nfit.c90
42 files changed, 2252 insertions, 787 deletions
diff --git a/block/ioctl.c b/block/ioctl.c
index 4ff1f92f89ca..698c7933d582 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -407,35 +407,6 @@ static inline int is_unrecognized_ioctl(int ret)
407 ret == -ENOIOCTLCMD; 407 ret == -ENOIOCTLCMD;
408} 408}
409 409
410#ifdef CONFIG_FS_DAX
411bool blkdev_dax_capable(struct block_device *bdev)
412{
413 struct gendisk *disk = bdev->bd_disk;
414
415 if (!disk->fops->direct_access)
416 return false;
417
418 /*
419 * If the partition is not aligned on a page boundary, we can't
420 * do dax I/O to it.
421 */
422 if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
423 || (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
424 return false;
425
426 /*
427 * If the device has known bad blocks, force all I/O through the
428 * driver / page cache.
429 *
430 * TODO: support finer grained dax error handling
431 */
432 if (disk->bb && disk->bb->count)
433 return false;
434
435 return true;
436}
437#endif
438
439static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, 410static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
440 unsigned cmd, unsigned long arg) 411 unsigned cmd, unsigned long arg)
441{ 412{
@@ -598,9 +569,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
598 case BLKTRACESETUP: 569 case BLKTRACESETUP:
599 case BLKTRACETEARDOWN: 570 case BLKTRACETEARDOWN:
600 return blk_trace_ioctl(bdev, cmd, argp); 571 return blk_trace_ioctl(bdev, cmd, argp);
601 case BLKDAXGET:
602 return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
603 break;
604 case IOC_PR_REGISTER: 572 case IOC_PR_REGISTER:
605 return blkdev_pr_register(bdev, argp); 573 return blkdev_pr_register(bdev, argp);
606 case IOC_PR_RESERVE: 574 case IOC_PR_RESERVE:
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 430f761b0d8d..e1e2066cecdb 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -192,6 +192,8 @@ source "drivers/android/Kconfig"
192 192
193source "drivers/nvdimm/Kconfig" 193source "drivers/nvdimm/Kconfig"
194 194
195source "drivers/dax/Kconfig"
196
195source "drivers/nvmem/Kconfig" 197source "drivers/nvmem/Kconfig"
196 198
197source "drivers/hwtracing/stm/Kconfig" 199source "drivers/hwtracing/stm/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 8f5d076baeb0..0b6f3d60193d 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -66,6 +66,7 @@ obj-$(CONFIG_PARPORT) += parport/
66obj-$(CONFIG_NVM) += lightnvm/ 66obj-$(CONFIG_NVM) += lightnvm/
67obj-y += base/ block/ misc/ mfd/ nfc/ 67obj-y += base/ block/ misc/ mfd/ nfc/
68obj-$(CONFIG_LIBNVDIMM) += nvdimm/ 68obj-$(CONFIG_LIBNVDIMM) += nvdimm/
69obj-$(CONFIG_DEV_DAX) += dax/
69obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ 70obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
70obj-$(CONFIG_NUBUS) += nubus/ 71obj-$(CONFIG_NUBUS) += nubus/
71obj-y += macintosh/ 72obj-y += macintosh/
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 63cc9dbe4f3b..2215fc847fa9 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -45,6 +45,11 @@ module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
45MODULE_PARM_DESC(scrub_overflow_abort, 45MODULE_PARM_DESC(scrub_overflow_abort,
46 "Number of times we overflow ARS results before abort"); 46 "Number of times we overflow ARS results before abort");
47 47
48static bool disable_vendor_specific;
49module_param(disable_vendor_specific, bool, S_IRUGO);
50MODULE_PARM_DESC(disable_vendor_specific,
51 "Limit commands to the publicly specified set\n");
52
48static struct workqueue_struct *nfit_wq; 53static struct workqueue_struct *nfit_wq;
49 54
50struct nfit_table_prev { 55struct nfit_table_prev {
@@ -171,33 +176,46 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
171 unsigned int buf_len, int *cmd_rc) 176 unsigned int buf_len, int *cmd_rc)
172{ 177{
173 struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); 178 struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
174 const struct nd_cmd_desc *desc = NULL;
175 union acpi_object in_obj, in_buf, *out_obj; 179 union acpi_object in_obj, in_buf, *out_obj;
180 const struct nd_cmd_desc *desc = NULL;
176 struct device *dev = acpi_desc->dev; 181 struct device *dev = acpi_desc->dev;
182 struct nd_cmd_pkg *call_pkg = NULL;
177 const char *cmd_name, *dimm_name; 183 const char *cmd_name, *dimm_name;
178 unsigned long dsm_mask; 184 unsigned long cmd_mask, dsm_mask;
179 acpi_handle handle; 185 acpi_handle handle;
186 unsigned int func;
180 const u8 *uuid; 187 const u8 *uuid;
181 u32 offset; 188 u32 offset;
182 int rc, i; 189 int rc, i;
183 190
191 func = cmd;
192 if (cmd == ND_CMD_CALL) {
193 call_pkg = buf;
194 func = call_pkg->nd_command;
195 }
196
184 if (nvdimm) { 197 if (nvdimm) {
185 struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); 198 struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
186 struct acpi_device *adev = nfit_mem->adev; 199 struct acpi_device *adev = nfit_mem->adev;
187 200
188 if (!adev) 201 if (!adev)
189 return -ENOTTY; 202 return -ENOTTY;
203 if (call_pkg && nfit_mem->family != call_pkg->nd_family)
204 return -ENOTTY;
205
190 dimm_name = nvdimm_name(nvdimm); 206 dimm_name = nvdimm_name(nvdimm);
191 cmd_name = nvdimm_cmd_name(cmd); 207 cmd_name = nvdimm_cmd_name(cmd);
208 cmd_mask = nvdimm_cmd_mask(nvdimm);
192 dsm_mask = nfit_mem->dsm_mask; 209 dsm_mask = nfit_mem->dsm_mask;
193 desc = nd_cmd_dimm_desc(cmd); 210 desc = nd_cmd_dimm_desc(cmd);
194 uuid = to_nfit_uuid(NFIT_DEV_DIMM); 211 uuid = to_nfit_uuid(nfit_mem->family);
195 handle = adev->handle; 212 handle = adev->handle;
196 } else { 213 } else {
197 struct acpi_device *adev = to_acpi_dev(acpi_desc); 214 struct acpi_device *adev = to_acpi_dev(acpi_desc);
198 215
199 cmd_name = nvdimm_bus_cmd_name(cmd); 216 cmd_name = nvdimm_bus_cmd_name(cmd);
200 dsm_mask = nd_desc->dsm_mask; 217 cmd_mask = nd_desc->cmd_mask;
218 dsm_mask = cmd_mask;
201 desc = nd_cmd_bus_desc(cmd); 219 desc = nd_cmd_bus_desc(cmd);
202 uuid = to_nfit_uuid(NFIT_DEV_BUS); 220 uuid = to_nfit_uuid(NFIT_DEV_BUS);
203 handle = adev->handle; 221 handle = adev->handle;
@@ -207,7 +225,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
207 if (!desc || (cmd && (desc->out_num + desc->in_num == 0))) 225 if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
208 return -ENOTTY; 226 return -ENOTTY;
209 227
210 if (!test_bit(cmd, &dsm_mask)) 228 if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &dsm_mask))
211 return -ENOTTY; 229 return -ENOTTY;
212 230
213 in_obj.type = ACPI_TYPE_PACKAGE; 231 in_obj.type = ACPI_TYPE_PACKAGE;
@@ -222,21 +240,44 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
222 in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc, 240 in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc,
223 i, buf); 241 i, buf);
224 242
243 if (call_pkg) {
244 /* skip over package wrapper */
245 in_buf.buffer.pointer = (void *) &call_pkg->nd_payload;
246 in_buf.buffer.length = call_pkg->nd_size_in;
247 }
248
225 if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) { 249 if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
226 dev_dbg(dev, "%s:%s cmd: %s input length: %d\n", __func__, 250 dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n",
227 dimm_name, cmd_name, in_buf.buffer.length); 251 __func__, dimm_name, cmd, func,
228 print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 252 in_buf.buffer.length);
229 4, in_buf.buffer.pointer, min_t(u32, 128, 253 print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4,
230 in_buf.buffer.length), true); 254 in_buf.buffer.pointer,
255 min_t(u32, 256, in_buf.buffer.length), true);
231 } 256 }
232 257
233 out_obj = acpi_evaluate_dsm(handle, uuid, 1, cmd, &in_obj); 258 out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj);
234 if (!out_obj) { 259 if (!out_obj) {
235 dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name, 260 dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
236 cmd_name); 261 cmd_name);
237 return -EINVAL; 262 return -EINVAL;
238 } 263 }
239 264
265 if (call_pkg) {
266 call_pkg->nd_fw_size = out_obj->buffer.length;
267 memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
268 out_obj->buffer.pointer,
269 min(call_pkg->nd_fw_size, call_pkg->nd_size_out));
270
271 ACPI_FREE(out_obj);
272 /*
273 * Need to support FW function w/o known size in advance.
274 * Caller can determine required size based upon nd_fw_size.
275 * If we return an error (like elsewhere) then caller wouldn't
276 * be able to rely upon data returned to make calculation.
277 */
278 return 0;
279 }
280
240 if (out_obj->package.type != ACPI_TYPE_BUFFER) { 281 if (out_obj->package.type != ACPI_TYPE_BUFFER) {
241 dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n", 282 dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
242 __func__, dimm_name, cmd_name, out_obj->type); 283 __func__, dimm_name, cmd_name, out_obj->type);
@@ -658,6 +699,7 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
658 if (!nfit_mem) 699 if (!nfit_mem)
659 return -ENOMEM; 700 return -ENOMEM;
660 INIT_LIST_HEAD(&nfit_mem->list); 701 INIT_LIST_HEAD(&nfit_mem->list);
702 nfit_mem->acpi_desc = acpi_desc;
661 list_add(&nfit_mem->list, &acpi_desc->dimms); 703 list_add(&nfit_mem->list, &acpi_desc->dimms);
662 } 704 }
663 705
@@ -819,7 +861,7 @@ static ssize_t vendor_show(struct device *dev,
819{ 861{
820 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); 862 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
821 863
822 return sprintf(buf, "%#x\n", dcr->vendor_id); 864 return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->vendor_id));
823} 865}
824static DEVICE_ATTR_RO(vendor); 866static DEVICE_ATTR_RO(vendor);
825 867
@@ -828,7 +870,7 @@ static ssize_t rev_id_show(struct device *dev,
828{ 870{
829 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); 871 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
830 872
831 return sprintf(buf, "%#x\n", dcr->revision_id); 873 return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->revision_id));
832} 874}
833static DEVICE_ATTR_RO(rev_id); 875static DEVICE_ATTR_RO(rev_id);
834 876
@@ -837,28 +879,142 @@ static ssize_t device_show(struct device *dev,
837{ 879{
838 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); 880 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
839 881
840 return sprintf(buf, "%#x\n", dcr->device_id); 882 return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->device_id));
841} 883}
842static DEVICE_ATTR_RO(device); 884static DEVICE_ATTR_RO(device);
843 885
886static ssize_t subsystem_vendor_show(struct device *dev,
887 struct device_attribute *attr, char *buf)
888{
889 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
890
891 return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_vendor_id));
892}
893static DEVICE_ATTR_RO(subsystem_vendor);
894
895static ssize_t subsystem_rev_id_show(struct device *dev,
896 struct device_attribute *attr, char *buf)
897{
898 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
899
900 return sprintf(buf, "0x%04x\n",
901 be16_to_cpu(dcr->subsystem_revision_id));
902}
903static DEVICE_ATTR_RO(subsystem_rev_id);
904
905static ssize_t subsystem_device_show(struct device *dev,
906 struct device_attribute *attr, char *buf)
907{
908 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
909
910 return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_device_id));
911}
912static DEVICE_ATTR_RO(subsystem_device);
913
914static int num_nvdimm_formats(struct nvdimm *nvdimm)
915{
916 struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
917 int formats = 0;
918
919 if (nfit_mem->memdev_pmem)
920 formats++;
921 if (nfit_mem->memdev_bdw)
922 formats++;
923 return formats;
924}
925
844static ssize_t format_show(struct device *dev, 926static ssize_t format_show(struct device *dev,
845 struct device_attribute *attr, char *buf) 927 struct device_attribute *attr, char *buf)
846{ 928{
847 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); 929 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
848 930
849 return sprintf(buf, "%#x\n", dcr->code); 931 return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->code));
850} 932}
851static DEVICE_ATTR_RO(format); 933static DEVICE_ATTR_RO(format);
852 934
935static ssize_t format1_show(struct device *dev,
936 struct device_attribute *attr, char *buf)
937{
938 u32 handle;
939 ssize_t rc = -ENXIO;
940 struct nfit_mem *nfit_mem;
941 struct nfit_memdev *nfit_memdev;
942 struct acpi_nfit_desc *acpi_desc;
943 struct nvdimm *nvdimm = to_nvdimm(dev);
944 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
945
946 nfit_mem = nvdimm_provider_data(nvdimm);
947 acpi_desc = nfit_mem->acpi_desc;
948 handle = to_nfit_memdev(dev)->device_handle;
949
950 /* assumes DIMMs have at most 2 published interface codes */
951 mutex_lock(&acpi_desc->init_mutex);
952 list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
953 struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
954 struct nfit_dcr *nfit_dcr;
955
956 if (memdev->device_handle != handle)
957 continue;
958
959 list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
960 if (nfit_dcr->dcr->region_index != memdev->region_index)
961 continue;
962 if (nfit_dcr->dcr->code == dcr->code)
963 continue;
964 rc = sprintf(buf, "%#x\n",
965 be16_to_cpu(nfit_dcr->dcr->code));
966 break;
967 }
968 if (rc != ENXIO)
969 break;
970 }
971 mutex_unlock(&acpi_desc->init_mutex);
972 return rc;
973}
974static DEVICE_ATTR_RO(format1);
975
976static ssize_t formats_show(struct device *dev,
977 struct device_attribute *attr, char *buf)
978{
979 struct nvdimm *nvdimm = to_nvdimm(dev);
980
981 return sprintf(buf, "%d\n", num_nvdimm_formats(nvdimm));
982}
983static DEVICE_ATTR_RO(formats);
984
853static ssize_t serial_show(struct device *dev, 985static ssize_t serial_show(struct device *dev,
854 struct device_attribute *attr, char *buf) 986 struct device_attribute *attr, char *buf)
855{ 987{
856 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); 988 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
857 989
858 return sprintf(buf, "%#x\n", dcr->serial_number); 990 return sprintf(buf, "0x%08x\n", be32_to_cpu(dcr->serial_number));
859} 991}
860static DEVICE_ATTR_RO(serial); 992static DEVICE_ATTR_RO(serial);
861 993
994static ssize_t family_show(struct device *dev,
995 struct device_attribute *attr, char *buf)
996{
997 struct nvdimm *nvdimm = to_nvdimm(dev);
998 struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
999
1000 if (nfit_mem->family < 0)
1001 return -ENXIO;
1002 return sprintf(buf, "%d\n", nfit_mem->family);
1003}
1004static DEVICE_ATTR_RO(family);
1005
1006static ssize_t dsm_mask_show(struct device *dev,
1007 struct device_attribute *attr, char *buf)
1008{
1009 struct nvdimm *nvdimm = to_nvdimm(dev);
1010 struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1011
1012 if (nfit_mem->family < 0)
1013 return -ENXIO;
1014 return sprintf(buf, "%#lx\n", nfit_mem->dsm_mask);
1015}
1016static DEVICE_ATTR_RO(dsm_mask);
1017
862static ssize_t flags_show(struct device *dev, 1018static ssize_t flags_show(struct device *dev,
863 struct device_attribute *attr, char *buf) 1019 struct device_attribute *attr, char *buf)
864{ 1020{
@@ -873,15 +1029,41 @@ static ssize_t flags_show(struct device *dev,
873} 1029}
874static DEVICE_ATTR_RO(flags); 1030static DEVICE_ATTR_RO(flags);
875 1031
1032static ssize_t id_show(struct device *dev,
1033 struct device_attribute *attr, char *buf)
1034{
1035 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1036
1037 if (dcr->valid_fields & ACPI_NFIT_CONTROL_MFG_INFO_VALID)
1038 return sprintf(buf, "%04x-%02x-%04x-%08x\n",
1039 be16_to_cpu(dcr->vendor_id),
1040 dcr->manufacturing_location,
1041 be16_to_cpu(dcr->manufacturing_date),
1042 be32_to_cpu(dcr->serial_number));
1043 else
1044 return sprintf(buf, "%04x-%08x\n",
1045 be16_to_cpu(dcr->vendor_id),
1046 be32_to_cpu(dcr->serial_number));
1047}
1048static DEVICE_ATTR_RO(id);
1049
876static struct attribute *acpi_nfit_dimm_attributes[] = { 1050static struct attribute *acpi_nfit_dimm_attributes[] = {
877 &dev_attr_handle.attr, 1051 &dev_attr_handle.attr,
878 &dev_attr_phys_id.attr, 1052 &dev_attr_phys_id.attr,
879 &dev_attr_vendor.attr, 1053 &dev_attr_vendor.attr,
880 &dev_attr_device.attr, 1054 &dev_attr_device.attr,
1055 &dev_attr_rev_id.attr,
1056 &dev_attr_subsystem_vendor.attr,
1057 &dev_attr_subsystem_device.attr,
1058 &dev_attr_subsystem_rev_id.attr,
881 &dev_attr_format.attr, 1059 &dev_attr_format.attr,
1060 &dev_attr_formats.attr,
1061 &dev_attr_format1.attr,
882 &dev_attr_serial.attr, 1062 &dev_attr_serial.attr,
883 &dev_attr_rev_id.attr,
884 &dev_attr_flags.attr, 1063 &dev_attr_flags.attr,
1064 &dev_attr_id.attr,
1065 &dev_attr_family.attr,
1066 &dev_attr_dsm_mask.attr,
885 NULL, 1067 NULL,
886}; 1068};
887 1069
@@ -889,11 +1071,13 @@ static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
889 struct attribute *a, int n) 1071 struct attribute *a, int n)
890{ 1072{
891 struct device *dev = container_of(kobj, struct device, kobj); 1073 struct device *dev = container_of(kobj, struct device, kobj);
1074 struct nvdimm *nvdimm = to_nvdimm(dev);
892 1075
893 if (to_nfit_dcr(dev)) 1076 if (!to_nfit_dcr(dev))
894 return a->mode;
895 else
896 return 0; 1077 return 0;
1078 if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1)
1079 return 0;
1080 return a->mode;
897} 1081}
898 1082
899static struct attribute_group acpi_nfit_dimm_attribute_group = { 1083static struct attribute_group acpi_nfit_dimm_attribute_group = {
@@ -926,10 +1110,13 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
926{ 1110{
927 struct acpi_device *adev, *adev_dimm; 1111 struct acpi_device *adev, *adev_dimm;
928 struct device *dev = acpi_desc->dev; 1112 struct device *dev = acpi_desc->dev;
929 const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM); 1113 unsigned long dsm_mask;
1114 const u8 *uuid;
930 int i; 1115 int i;
931 1116
932 nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en; 1117 /* nfit test assumes 1:1 relationship between commands and dsms */
1118 nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en;
1119 nfit_mem->family = NVDIMM_FAMILY_INTEL;
933 adev = to_acpi_dev(acpi_desc); 1120 adev = to_acpi_dev(acpi_desc);
934 if (!adev) 1121 if (!adev)
935 return 0; 1122 return 0;
@@ -942,7 +1129,35 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
942 return force_enable_dimms ? 0 : -ENODEV; 1129 return force_enable_dimms ? 0 : -ENODEV;
943 } 1130 }
944 1131
945 for (i = ND_CMD_SMART; i <= ND_CMD_VENDOR; i++) 1132 /*
1133 * Until standardization materializes we need to consider up to 3
1134 * different command sets. Note, that checking for function0 (bit0)
1135 * tells us if any commands are reachable through this uuid.
1136 */
1137 for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_HPE2; i++)
1138 if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
1139 break;
1140
1141 /* limit the supported commands to those that are publicly documented */
1142 nfit_mem->family = i;
1143 if (nfit_mem->family == NVDIMM_FAMILY_INTEL) {
1144 dsm_mask = 0x3fe;
1145 if (disable_vendor_specific)
1146 dsm_mask &= ~(1 << ND_CMD_VENDOR);
1147 } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1)
1148 dsm_mask = 0x1c3c76;
1149 else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) {
1150 dsm_mask = 0x1fe;
1151 if (disable_vendor_specific)
1152 dsm_mask &= ~(1 << 8);
1153 } else {
1154 dev_err(dev, "unknown dimm command family\n");
1155 nfit_mem->family = -1;
1156 return force_enable_dimms ? 0 : -ENODEV;
1157 }
1158
1159 uuid = to_nfit_uuid(nfit_mem->family);
1160 for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
946 if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i)) 1161 if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
947 set_bit(i, &nfit_mem->dsm_mask); 1162 set_bit(i, &nfit_mem->dsm_mask);
948 1163
@@ -955,8 +1170,8 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
955 int dimm_count = 0; 1170 int dimm_count = 0;
956 1171
957 list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { 1172 list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
1173 unsigned long flags = 0, cmd_mask;
958 struct nvdimm *nvdimm; 1174 struct nvdimm *nvdimm;
959 unsigned long flags = 0;
960 u32 device_handle; 1175 u32 device_handle;
961 u16 mem_flags; 1176 u16 mem_flags;
962 int rc; 1177 int rc;
@@ -979,9 +1194,18 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
979 if (rc) 1194 if (rc)
980 continue; 1195 continue;
981 1196
1197 /*
1198 * TODO: provide translation for non-NVDIMM_FAMILY_INTEL
1199 * devices (i.e. from nd_cmd to acpi_dsm) to standardize the
1200 * userspace interface.
1201 */
1202 cmd_mask = 1UL << ND_CMD_CALL;
1203 if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
1204 cmd_mask |= nfit_mem->dsm_mask;
1205
982 nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem, 1206 nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
983 acpi_nfit_dimm_attribute_groups, 1207 acpi_nfit_dimm_attribute_groups,
984 flags, &nfit_mem->dsm_mask); 1208 flags, cmd_mask);
985 if (!nvdimm) 1209 if (!nvdimm)
986 return -ENOMEM; 1210 return -ENOMEM;
987 1211
@@ -1010,14 +1234,14 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
1010 struct acpi_device *adev; 1234 struct acpi_device *adev;
1011 int i; 1235 int i;
1012 1236
1013 nd_desc->dsm_mask = acpi_desc->bus_dsm_force_en; 1237 nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en;
1014 adev = to_acpi_dev(acpi_desc); 1238 adev = to_acpi_dev(acpi_desc);
1015 if (!adev) 1239 if (!adev)
1016 return; 1240 return;
1017 1241
1018 for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++) 1242 for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
1019 if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i)) 1243 if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
1020 set_bit(i, &nd_desc->dsm_mask); 1244 set_bit(i, &nd_desc->cmd_mask);
1021} 1245}
1022 1246
1023static ssize_t range_index_show(struct device *dev, 1247static ssize_t range_index_show(struct device *dev,
@@ -2309,7 +2533,7 @@ static int acpi_nfit_add(struct acpi_device *adev)
2309 acpi_size sz; 2533 acpi_size sz;
2310 int rc; 2534 int rc;
2311 2535
2312 status = acpi_get_table_with_size("NFIT", 0, &tbl, &sz); 2536 status = acpi_get_table_with_size(ACPI_SIG_NFIT, 0, &tbl, &sz);
2313 if (ACPI_FAILURE(status)) { 2537 if (ACPI_FAILURE(status)) {
2314 /* This is ok, we could have an nvdimm hotplugged later */ 2538 /* This is ok, we could have an nvdimm hotplugged later */
2315 dev_dbg(dev, "failed to find NFIT at startup\n"); 2539 dev_dbg(dev, "failed to find NFIT at startup\n");
@@ -2466,6 +2690,8 @@ static __init int nfit_init(void)
2466 acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]); 2690 acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
2467 acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]); 2691 acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
2468 acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]); 2692 acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
2693 acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
2694 acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
2469 2695
2470 nfit_wq = create_singlethread_workqueue("nfit"); 2696 nfit_wq = create_singlethread_workqueue("nfit");
2471 if (!nfit_wq) 2697 if (!nfit_wq)
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index c75576b2d50e..11cb38348aef 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -21,13 +21,25 @@
21#include <linux/acpi.h> 21#include <linux/acpi.h>
22#include <acpi/acuuid.h> 22#include <acpi/acuuid.h>
23 23
24/* ACPI 6.1 */
24#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba" 25#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba"
26
27/* http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf */
25#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66" 28#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66"
29
30/* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */
31#define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6"
32#define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e"
33
26#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \ 34#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
27 | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ 35 | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
28 | ACPI_NFIT_MEM_NOT_ARMED) 36 | ACPI_NFIT_MEM_NOT_ARMED)
29 37
30enum nfit_uuids { 38enum nfit_uuids {
39 /* for simplicity alias the uuid index with the family id */
40 NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL,
41 NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
42 NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
31 NFIT_SPA_VOLATILE, 43 NFIT_SPA_VOLATILE,
32 NFIT_SPA_PM, 44 NFIT_SPA_PM,
33 NFIT_SPA_DCR, 45 NFIT_SPA_DCR,
@@ -37,15 +49,16 @@ enum nfit_uuids {
37 NFIT_SPA_PDISK, 49 NFIT_SPA_PDISK,
38 NFIT_SPA_PCD, 50 NFIT_SPA_PCD,
39 NFIT_DEV_BUS, 51 NFIT_DEV_BUS,
40 NFIT_DEV_DIMM,
41 NFIT_UUID_MAX, 52 NFIT_UUID_MAX,
42}; 53};
43 54
44enum nfit_fic { 55/*
45 NFIT_FIC_BYTE = 0x101, /* byte-addressable energy backed */ 56 * Region format interface codes are stored as an array of bytes in the
46 NFIT_FIC_BLK = 0x201, /* block-addressable non-energy backed */ 57 * NFIT DIMM Control Region structure
47 NFIT_FIC_BYTEN = 0x301, /* byte-addressable non-energy backed */ 58 */
48}; 59#define NFIT_FIC_BYTE cpu_to_be16(0x101) /* byte-addressable energy backed */
60#define NFIT_FIC_BLK cpu_to_be16(0x201) /* block-addressable non-energy backed */
61#define NFIT_FIC_BYTEN cpu_to_be16(0x301) /* byte-addressable non-energy backed */
49 62
50enum { 63enum {
51 NFIT_BLK_READ_FLUSH = 1, 64 NFIT_BLK_READ_FLUSH = 1,
@@ -109,7 +122,9 @@ struct nfit_mem {
109 struct nfit_flush *nfit_flush; 122 struct nfit_flush *nfit_flush;
110 struct list_head list; 123 struct list_head list;
111 struct acpi_device *adev; 124 struct acpi_device *adev;
125 struct acpi_nfit_desc *acpi_desc;
112 unsigned long dsm_mask; 126 unsigned long dsm_mask;
127 int family;
113}; 128};
114 129
115struct acpi_nfit_desc { 130struct acpi_nfit_desc {
@@ -132,8 +147,8 @@ struct acpi_nfit_desc {
132 size_t ars_status_size; 147 size_t ars_status_size;
133 struct work_struct work; 148 struct work_struct work;
134 unsigned int cancel:1; 149 unsigned int cancel:1;
135 unsigned long dimm_dsm_force_en; 150 unsigned long dimm_cmd_force_en;
136 unsigned long bus_dsm_force_en; 151 unsigned long bus_cmd_force_en;
137 int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, 152 int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
138 void *iobuf, u64 len, int rw); 153 void *iobuf, u64 len, int rw);
139}; 154};
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index ac832bf6f8c9..22c09952e177 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -625,7 +625,7 @@ acpi_status acpi_evaluate_lck(acpi_handle handle, int lock)
625 * some old BIOSes do expect a buffer or an integer etc. 625 * some old BIOSes do expect a buffer or an integer etc.
626 */ 626 */
627union acpi_object * 627union acpi_object *
628acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, int rev, int func, 628acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
629 union acpi_object *argv4) 629 union acpi_object *argv4)
630{ 630{
631 acpi_status ret; 631 acpi_status ret;
@@ -674,7 +674,7 @@ EXPORT_SYMBOL(acpi_evaluate_dsm);
674 * functions. Currently only support 64 functions at maximum, should be 674 * functions. Currently only support 64 functions at maximum, should be
675 * enough for now. 675 * enough for now.
676 */ 676 */
677bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, int rev, u64 funcs) 677bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
678{ 678{
679 int i; 679 int i;
680 u64 mask = 0; 680 u64 mask = 0;
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
new file mode 100644
index 000000000000..cedab7572de3
--- /dev/null
+++ b/drivers/dax/Kconfig
@@ -0,0 +1,26 @@
1menuconfig DEV_DAX
2 tristate "DAX: direct access to differentiated memory"
3 default m if NVDIMM_DAX
4 depends on TRANSPARENT_HUGEPAGE
5 help
6 Support raw access to differentiated (persistence, bandwidth,
7 latency...) memory via an mmap(2) capable character
8 device. Platform firmware or a device driver may identify a
9 platform memory resource that is differentiated from the
10 baseline memory pool. Mappings of a /dev/daxX.Y device impose
11 restrictions that make the mapping behavior deterministic.
12
13if DEV_DAX
14
15config DEV_DAX_PMEM
16 tristate "PMEM DAX: direct access to persistent memory"
17 depends on NVDIMM_DAX
18 default DEV_DAX
19 help
20 Support raw access to persistent memory. Note that this
21 driver consumes memory ranges allocated and exported by the
22 libnvdimm sub-system.
23
24 Say Y if unsure
25
26endif
diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile
new file mode 100644
index 000000000000..27c54e38478a
--- /dev/null
+++ b/drivers/dax/Makefile
@@ -0,0 +1,4 @@
1obj-$(CONFIG_DEV_DAX) += dax.o
2obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
3
4dax_pmem-y := pmem.o
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
new file mode 100644
index 000000000000..b891a129b275
--- /dev/null
+++ b/drivers/dax/dax.c
@@ -0,0 +1,575 @@
1/*
2 * Copyright(c) 2016 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/pagemap.h>
14#include <linux/module.h>
15#include <linux/device.h>
16#include <linux/pfn_t.h>
17#include <linux/slab.h>
18#include <linux/dax.h>
19#include <linux/fs.h>
20#include <linux/mm.h>
21
22static int dax_major;
23static struct class *dax_class;
24static DEFINE_IDA(dax_minor_ida);
25
26/**
27 * struct dax_region - mapping infrastructure for dax devices
28 * @id: kernel-wide unique region for a memory range
29 * @base: linear address corresponding to @res
30 * @kref: to pin while other agents have a need to do lookups
31 * @dev: parent device backing this region
32 * @align: allocation and mapping alignment for child dax devices
33 * @res: physical address range of the region
34 * @pfn_flags: identify whether the pfns are paged back or not
35 */
36struct dax_region {
37 int id;
38 struct ida ida;
39 void *base;
40 struct kref kref;
41 struct device *dev;
42 unsigned int align;
43 struct resource res;
44 unsigned long pfn_flags;
45};
46
47/**
48 * struct dax_dev - subdivision of a dax region
49 * @region - parent region
50 * @dev - device backing the character device
51 * @kref - enable this data to be tracked in filp->private_data
52 * @alive - !alive + rcu grace period == no new mappings can be established
53 * @id - child id in the region
54 * @num_resources - number of physical address extents in this device
55 * @res - array of physical address ranges
56 */
57struct dax_dev {
58 struct dax_region *region;
59 struct device *dev;
60 struct kref kref;
61 bool alive;
62 int id;
63 int num_resources;
64 struct resource res[0];
65};
66
67static void dax_region_free(struct kref *kref)
68{
69 struct dax_region *dax_region;
70
71 dax_region = container_of(kref, struct dax_region, kref);
72 kfree(dax_region);
73}
74
75void dax_region_put(struct dax_region *dax_region)
76{
77 kref_put(&dax_region->kref, dax_region_free);
78}
79EXPORT_SYMBOL_GPL(dax_region_put);
80
81static void dax_dev_free(struct kref *kref)
82{
83 struct dax_dev *dax_dev;
84
85 dax_dev = container_of(kref, struct dax_dev, kref);
86 dax_region_put(dax_dev->region);
87 kfree(dax_dev);
88}
89
90static void dax_dev_put(struct dax_dev *dax_dev)
91{
92 kref_put(&dax_dev->kref, dax_dev_free);
93}
94
95struct dax_region *alloc_dax_region(struct device *parent, int region_id,
96 struct resource *res, unsigned int align, void *addr,
97 unsigned long pfn_flags)
98{
99 struct dax_region *dax_region;
100
101 dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
102
103 if (!dax_region)
104 return NULL;
105
106 memcpy(&dax_region->res, res, sizeof(*res));
107 dax_region->pfn_flags = pfn_flags;
108 kref_init(&dax_region->kref);
109 dax_region->id = region_id;
110 ida_init(&dax_region->ida);
111 dax_region->align = align;
112 dax_region->dev = parent;
113 dax_region->base = addr;
114
115 return dax_region;
116}
117EXPORT_SYMBOL_GPL(alloc_dax_region);
118
119static ssize_t size_show(struct device *dev,
120 struct device_attribute *attr, char *buf)
121{
122 struct dax_dev *dax_dev = dev_get_drvdata(dev);
123 unsigned long long size = 0;
124 int i;
125
126 for (i = 0; i < dax_dev->num_resources; i++)
127 size += resource_size(&dax_dev->res[i]);
128
129 return sprintf(buf, "%llu\n", size);
130}
131static DEVICE_ATTR_RO(size);
132
133static struct attribute *dax_device_attributes[] = {
134 &dev_attr_size.attr,
135 NULL,
136};
137
138static const struct attribute_group dax_device_attribute_group = {
139 .attrs = dax_device_attributes,
140};
141
142static const struct attribute_group *dax_attribute_groups[] = {
143 &dax_device_attribute_group,
144 NULL,
145};
146
147static void unregister_dax_dev(void *_dev)
148{
149 struct device *dev = _dev;
150 struct dax_dev *dax_dev = dev_get_drvdata(dev);
151 struct dax_region *dax_region = dax_dev->region;
152
153 dev_dbg(dev, "%s\n", __func__);
154
155 /*
156 * Note, rcu is not protecting the liveness of dax_dev, rcu is
157 * ensuring that any fault handlers that might have seen
158 * dax_dev->alive == true, have completed. Any fault handlers
159 * that start after synchronize_rcu() has started will abort
160 * upon seeing dax_dev->alive == false.
161 */
162 dax_dev->alive = false;
163 synchronize_rcu();
164
165 get_device(dev);
166 device_unregister(dev);
167 ida_simple_remove(&dax_region->ida, dax_dev->id);
168 ida_simple_remove(&dax_minor_ida, MINOR(dev->devt));
169 put_device(dev);
170 dax_dev_put(dax_dev);
171}
172
173int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
174 int count)
175{
176 struct device *parent = dax_region->dev;
177 struct dax_dev *dax_dev;
178 struct device *dev;
179 int rc, minor;
180 dev_t dev_t;
181
182 dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL);
183 if (!dax_dev)
184 return -ENOMEM;
185 memcpy(dax_dev->res, res, sizeof(*res) * count);
186 dax_dev->num_resources = count;
187 kref_init(&dax_dev->kref);
188 dax_dev->alive = true;
189 dax_dev->region = dax_region;
190 kref_get(&dax_region->kref);
191
192 dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL);
193 if (dax_dev->id < 0) {
194 rc = dax_dev->id;
195 goto err_id;
196 }
197
198 minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL);
199 if (minor < 0) {
200 rc = minor;
201 goto err_minor;
202 }
203
204 dev_t = MKDEV(dax_major, minor);
205 dev = device_create_with_groups(dax_class, parent, dev_t, dax_dev,
206 dax_attribute_groups, "dax%d.%d", dax_region->id,
207 dax_dev->id);
208 if (IS_ERR(dev)) {
209 rc = PTR_ERR(dev);
210 goto err_create;
211 }
212 dax_dev->dev = dev;
213
214 rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev);
215 if (rc) {
216 unregister_dax_dev(dev);
217 return rc;
218 }
219
220 return 0;
221
222 err_create:
223 ida_simple_remove(&dax_minor_ida, minor);
224 err_minor:
225 ida_simple_remove(&dax_region->ida, dax_dev->id);
226 err_id:
227 dax_dev_put(dax_dev);
228
229 return rc;
230}
231EXPORT_SYMBOL_GPL(devm_create_dax_dev);
232
233/* return an unmapped area aligned to the dax region specified alignment */
234static unsigned long dax_dev_get_unmapped_area(struct file *filp,
235 unsigned long addr, unsigned long len, unsigned long pgoff,
236 unsigned long flags)
237{
238 unsigned long off, off_end, off_align, len_align, addr_align, align;
239 struct dax_dev *dax_dev = filp ? filp->private_data : NULL;
240 struct dax_region *dax_region;
241
242 if (!dax_dev || addr)
243 goto out;
244
245 dax_region = dax_dev->region;
246 align = dax_region->align;
247 off = pgoff << PAGE_SHIFT;
248 off_end = off + len;
249 off_align = round_up(off, align);
250
251 if ((off_end <= off_align) || ((off_end - off_align) < align))
252 goto out;
253
254 len_align = len + align;
255 if ((off + len_align) < off)
256 goto out;
257
258 addr_align = current->mm->get_unmapped_area(filp, addr, len_align,
259 pgoff, flags);
260 if (!IS_ERR_VALUE(addr_align)) {
261 addr_align += (off - addr_align) & (align - 1);
262 return addr_align;
263 }
264 out:
265 return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
266}
267
268static int __match_devt(struct device *dev, const void *data)
269{
270 const dev_t *devt = data;
271
272 return dev->devt == *devt;
273}
274
275static struct device *dax_dev_find(dev_t dev_t)
276{
277 return class_find_device(dax_class, NULL, &dev_t, __match_devt);
278}
279
280static int dax_dev_open(struct inode *inode, struct file *filp)
281{
282 struct dax_dev *dax_dev = NULL;
283 struct device *dev;
284
285 dev = dax_dev_find(inode->i_rdev);
286 if (!dev)
287 return -ENXIO;
288
289 device_lock(dev);
290 dax_dev = dev_get_drvdata(dev);
291 if (dax_dev) {
292 dev_dbg(dev, "%s\n", __func__);
293 filp->private_data = dax_dev;
294 kref_get(&dax_dev->kref);
295 inode->i_flags = S_DAX;
296 }
297 device_unlock(dev);
298
299 if (!dax_dev) {
300 put_device(dev);
301 return -ENXIO;
302 }
303 return 0;
304}
305
306static int dax_dev_release(struct inode *inode, struct file *filp)
307{
308 struct dax_dev *dax_dev = filp->private_data;
309 struct device *dev = dax_dev->dev;
310
311 dev_dbg(dax_dev->dev, "%s\n", __func__);
312 dax_dev_put(dax_dev);
313 put_device(dev);
314
315 return 0;
316}
317
318static int check_vma(struct dax_dev *dax_dev, struct vm_area_struct *vma,
319 const char *func)
320{
321 struct dax_region *dax_region = dax_dev->region;
322 struct device *dev = dax_dev->dev;
323 unsigned long mask;
324
325 if (!dax_dev->alive)
326 return -ENXIO;
327
328 /* prevent private / writable mappings from being established */
329 if ((vma->vm_flags & (VM_NORESERVE|VM_SHARED|VM_WRITE)) == VM_WRITE) {
330 dev_info(dev, "%s: %s: fail, attempted private mapping\n",
331 current->comm, func);
332 return -EINVAL;
333 }
334
335 mask = dax_region->align - 1;
336 if (vma->vm_start & mask || vma->vm_end & mask) {
337 dev_info(dev, "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n",
338 current->comm, func, vma->vm_start, vma->vm_end,
339 mask);
340 return -EINVAL;
341 }
342
343 if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV
344 && (vma->vm_flags & VM_DONTCOPY) == 0) {
345 dev_info(dev, "%s: %s: fail, dax range requires MADV_DONTFORK\n",
346 current->comm, func);
347 return -EINVAL;
348 }
349
350 if (!vma_is_dax(vma)) {
351 dev_info(dev, "%s: %s: fail, vma is not DAX capable\n",
352 current->comm, func);
353 return -EINVAL;
354 }
355
356 return 0;
357}
358
359static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff,
360 unsigned long size)
361{
362 struct resource *res;
363 phys_addr_t phys;
364 int i;
365
366 for (i = 0; i < dax_dev->num_resources; i++) {
367 res = &dax_dev->res[i];
368 phys = pgoff * PAGE_SIZE + res->start;
369 if (phys >= res->start && phys <= res->end)
370 break;
371 pgoff -= PHYS_PFN(resource_size(res));
372 }
373
374 if (i < dax_dev->num_resources) {
375 res = &dax_dev->res[i];
376 if (phys + size - 1 <= res->end)
377 return phys;
378 }
379
380 return -1;
381}
382
383static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
384 struct vm_fault *vmf)
385{
386 unsigned long vaddr = (unsigned long) vmf->virtual_address;
387 struct device *dev = dax_dev->dev;
388 struct dax_region *dax_region;
389 int rc = VM_FAULT_SIGBUS;
390 phys_addr_t phys;
391 pfn_t pfn;
392
393 if (check_vma(dax_dev, vma, __func__))
394 return VM_FAULT_SIGBUS;
395
396 dax_region = dax_dev->region;
397 if (dax_region->align > PAGE_SIZE) {
398 dev_dbg(dev, "%s: alignment > fault size\n", __func__);
399 return VM_FAULT_SIGBUS;
400 }
401
402 phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE);
403 if (phys == -1) {
404 dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
405 vmf->pgoff);
406 return VM_FAULT_SIGBUS;
407 }
408
409 pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
410
411 rc = vm_insert_mixed(vma, vaddr, pfn);
412
413 if (rc == -ENOMEM)
414 return VM_FAULT_OOM;
415 if (rc < 0 && rc != -EBUSY)
416 return VM_FAULT_SIGBUS;
417
418 return VM_FAULT_NOPAGE;
419}
420
421static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
422{
423 int rc;
424 struct file *filp = vma->vm_file;
425 struct dax_dev *dax_dev = filp->private_data;
426
427 dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
428 current->comm, (vmf->flags & FAULT_FLAG_WRITE)
429 ? "write" : "read", vma->vm_start, vma->vm_end);
430 rcu_read_lock();
431 rc = __dax_dev_fault(dax_dev, vma, vmf);
432 rcu_read_unlock();
433
434 return rc;
435}
436
437static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
438 struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd,
439 unsigned int flags)
440{
441 unsigned long pmd_addr = addr & PMD_MASK;
442 struct device *dev = dax_dev->dev;
443 struct dax_region *dax_region;
444 phys_addr_t phys;
445 pgoff_t pgoff;
446 pfn_t pfn;
447
448 if (check_vma(dax_dev, vma, __func__))
449 return VM_FAULT_SIGBUS;
450
451 dax_region = dax_dev->region;
452 if (dax_region->align > PMD_SIZE) {
453 dev_dbg(dev, "%s: alignment > fault size\n", __func__);
454 return VM_FAULT_SIGBUS;
455 }
456
457 /* dax pmd mappings require pfn_t_devmap() */
458 if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
459 dev_dbg(dev, "%s: alignment > fault size\n", __func__);
460 return VM_FAULT_SIGBUS;
461 }
462
463 pgoff = linear_page_index(vma, pmd_addr);
464 phys = pgoff_to_phys(dax_dev, pgoff, PAGE_SIZE);
465 if (phys == -1) {
466 dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
467 pgoff);
468 return VM_FAULT_SIGBUS;
469 }
470
471 pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
472
473 return vmf_insert_pfn_pmd(vma, addr, pmd, pfn,
474 flags & FAULT_FLAG_WRITE);
475}
476
477static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
478 pmd_t *pmd, unsigned int flags)
479{
480 int rc;
481 struct file *filp = vma->vm_file;
482 struct dax_dev *dax_dev = filp->private_data;
483
484 dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
485 current->comm, (flags & FAULT_FLAG_WRITE)
486 ? "write" : "read", vma->vm_start, vma->vm_end);
487
488 rcu_read_lock();
489 rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags);
490 rcu_read_unlock();
491
492 return rc;
493}
494
495static void dax_dev_vm_open(struct vm_area_struct *vma)
496{
497 struct file *filp = vma->vm_file;
498 struct dax_dev *dax_dev = filp->private_data;
499
500 dev_dbg(dax_dev->dev, "%s\n", __func__);
501 kref_get(&dax_dev->kref);
502}
503
504static void dax_dev_vm_close(struct vm_area_struct *vma)
505{
506 struct file *filp = vma->vm_file;
507 struct dax_dev *dax_dev = filp->private_data;
508
509 dev_dbg(dax_dev->dev, "%s\n", __func__);
510 dax_dev_put(dax_dev);
511}
512
513static const struct vm_operations_struct dax_dev_vm_ops = {
514 .fault = dax_dev_fault,
515 .pmd_fault = dax_dev_pmd_fault,
516 .open = dax_dev_vm_open,
517 .close = dax_dev_vm_close,
518};
519
520static int dax_dev_mmap(struct file *filp, struct vm_area_struct *vma)
521{
522 struct dax_dev *dax_dev = filp->private_data;
523 int rc;
524
525 dev_dbg(dax_dev->dev, "%s\n", __func__);
526
527 rc = check_vma(dax_dev, vma, __func__);
528 if (rc)
529 return rc;
530
531 kref_get(&dax_dev->kref);
532 vma->vm_ops = &dax_dev_vm_ops;
533 vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
534 return 0;
535
536}
537
538static const struct file_operations dax_fops = {
539 .llseek = noop_llseek,
540 .owner = THIS_MODULE,
541 .open = dax_dev_open,
542 .release = dax_dev_release,
543 .get_unmapped_area = dax_dev_get_unmapped_area,
544 .mmap = dax_dev_mmap,
545};
546
547static int __init dax_init(void)
548{
549 int rc;
550
551 rc = register_chrdev(0, "dax", &dax_fops);
552 if (rc < 0)
553 return rc;
554 dax_major = rc;
555
556 dax_class = class_create(THIS_MODULE, "dax");
557 if (IS_ERR(dax_class)) {
558 unregister_chrdev(dax_major, "dax");
559 return PTR_ERR(dax_class);
560 }
561
562 return 0;
563}
564
565static void __exit dax_exit(void)
566{
567 class_destroy(dax_class);
568 unregister_chrdev(dax_major, "dax");
569 ida_destroy(&dax_minor_ida);
570}
571
572MODULE_AUTHOR("Intel Corporation");
573MODULE_LICENSE("GPL v2");
574subsys_initcall(dax_init);
575module_exit(dax_exit);
diff --git a/drivers/dax/dax.h b/drivers/dax/dax.h
new file mode 100644
index 000000000000..d8b8f1f25054
--- /dev/null
+++ b/drivers/dax/dax.h
@@ -0,0 +1,24 @@
1/*
2 * Copyright(c) 2016 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#ifndef __DAX_H__
14#define __DAX_H__
15struct device;
16struct resource;
17struct dax_region;
18void dax_region_put(struct dax_region *dax_region);
19struct dax_region *alloc_dax_region(struct device *parent,
20 int region_id, struct resource *res, unsigned int align,
21 void *addr, unsigned long flags);
22int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
23 int count);
24#endif /* __DAX_H__ */
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
new file mode 100644
index 000000000000..55d510e36cd1
--- /dev/null
+++ b/drivers/dax/pmem.c
@@ -0,0 +1,158 @@
1/*
2 * Copyright(c) 2016 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/percpu-refcount.h>
14#include <linux/memremap.h>
15#include <linux/module.h>
16#include <linux/pfn_t.h>
17#include "../nvdimm/pfn.h"
18#include "../nvdimm/nd.h"
19#include "dax.h"
20
21struct dax_pmem {
22 struct device *dev;
23 struct percpu_ref ref;
24 struct completion cmp;
25};
26
27struct dax_pmem *to_dax_pmem(struct percpu_ref *ref)
28{
29 return container_of(ref, struct dax_pmem, ref);
30}
31
32static void dax_pmem_percpu_release(struct percpu_ref *ref)
33{
34 struct dax_pmem *dax_pmem = to_dax_pmem(ref);
35
36 dev_dbg(dax_pmem->dev, "%s\n", __func__);
37 complete(&dax_pmem->cmp);
38}
39
40static void dax_pmem_percpu_exit(void *data)
41{
42 struct percpu_ref *ref = data;
43 struct dax_pmem *dax_pmem = to_dax_pmem(ref);
44
45 dev_dbg(dax_pmem->dev, "%s\n", __func__);
46 percpu_ref_exit(ref);
47 wait_for_completion(&dax_pmem->cmp);
48}
49
50static void dax_pmem_percpu_kill(void *data)
51{
52 struct percpu_ref *ref = data;
53 struct dax_pmem *dax_pmem = to_dax_pmem(ref);
54
55 dev_dbg(dax_pmem->dev, "%s\n", __func__);
56 percpu_ref_kill(ref);
57}
58
59static int dax_pmem_probe(struct device *dev)
60{
61 int rc;
62 void *addr;
63 struct resource res;
64 struct nd_pfn_sb *pfn_sb;
65 struct dax_pmem *dax_pmem;
66 struct nd_region *nd_region;
67 struct nd_namespace_io *nsio;
68 struct dax_region *dax_region;
69 struct nd_namespace_common *ndns;
70 struct nd_dax *nd_dax = to_nd_dax(dev);
71 struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
72 struct vmem_altmap __altmap, *altmap = NULL;
73
74 ndns = nvdimm_namespace_common_probe(dev);
75 if (IS_ERR(ndns))
76 return PTR_ERR(ndns);
77 nsio = to_nd_namespace_io(&ndns->dev);
78
79 /* parse the 'pfn' info block via ->rw_bytes */
80 devm_nsio_enable(dev, nsio);
81 altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap);
82 if (IS_ERR(altmap))
83 return PTR_ERR(altmap);
84 devm_nsio_disable(dev, nsio);
85
86 pfn_sb = nd_pfn->pfn_sb;
87
88 if (!devm_request_mem_region(dev, nsio->res.start,
89 resource_size(&nsio->res), dev_name(dev))) {
90 dev_warn(dev, "could not reserve region %pR\n", &nsio->res);
91 return -EBUSY;
92 }
93
94 dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
95 if (!dax_pmem)
96 return -ENOMEM;
97
98 dax_pmem->dev = dev;
99 init_completion(&dax_pmem->cmp);
100 rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0,
101 GFP_KERNEL);
102 if (rc)
103 return rc;
104
105 rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
106 if (rc) {
107 dax_pmem_percpu_exit(&dax_pmem->ref);
108 return rc;
109 }
110
111 addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
112 if (IS_ERR(addr))
113 return PTR_ERR(addr);
114
115 rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref);
116 if (rc) {
117 dax_pmem_percpu_kill(&dax_pmem->ref);
118 return rc;
119 }
120
121 nd_region = to_nd_region(dev->parent);
122 dax_region = alloc_dax_region(dev, nd_region->id, &res,
123 le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP);
124 if (!dax_region)
125 return -ENOMEM;
126
127 /* TODO: support for subdividing a dax region... */
128 rc = devm_create_dax_dev(dax_region, &res, 1);
129
130 /* child dax_dev instances now own the lifetime of the dax_region */
131 dax_region_put(dax_region);
132
133 return rc;
134}
135
136static struct nd_device_driver dax_pmem_driver = {
137 .probe = dax_pmem_probe,
138 .drv = {
139 .name = "dax_pmem",
140 },
141 .type = ND_DRIVER_DAX_PMEM,
142};
143
144static int __init dax_pmem_init(void)
145{
146 return nd_driver_register(&dax_pmem_driver);
147}
148module_init(dax_pmem_init);
149
150static void __exit dax_pmem_exit(void)
151{
152 driver_unregister(&dax_pmem_driver.drv);
153}
154module_exit(dax_pmem_exit);
155
156MODULE_LICENSE("GPL v2");
157MODULE_AUTHOR("Intel Corporation");
158MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 53c11621d5b1..7c8a3bf07884 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -88,4 +88,17 @@ config NVDIMM_PFN
88 88
89 Select Y if unsure 89 Select Y if unsure
90 90
91config NVDIMM_DAX
92 bool "NVDIMM DAX: Raw access to persistent memory"
93 default LIBNVDIMM
94 depends on NVDIMM_PFN
95 help
96 Support raw device dax access to a persistent memory
97 namespace. For environments that want to hard partition
98 peristent memory, this capability provides a mechanism to
99 sub-divide a namespace into character devices that can only be
100 accessed via DAX (mmap(2)).
101
102 Select Y if unsure
103
91endif 104endif
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index ea84d3c4e8e5..909554c3f955 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -23,3 +23,4 @@ libnvdimm-y += label.o
23libnvdimm-$(CONFIG_ND_CLAIM) += claim.o 23libnvdimm-$(CONFIG_ND_CLAIM) += claim.o
24libnvdimm-$(CONFIG_BTT) += btt_devs.o 24libnvdimm-$(CONFIG_BTT) += btt_devs.o
25libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o 25libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o
26libnvdimm-$(CONFIG_NVDIMM_DAX) += dax_devs.o
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index e9ff9229d942..495e06d9f7e7 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -21,19 +21,19 @@
21#include <linux/sizes.h> 21#include <linux/sizes.h>
22#include "nd.h" 22#include "nd.h"
23 23
24struct nd_blk_device { 24static u32 nsblk_meta_size(struct nd_namespace_blk *nsblk)
25 struct request_queue *queue; 25{
26 struct gendisk *disk; 26 return nsblk->lbasize - ((nsblk->lbasize >= 4096) ? 4096 : 512);
27 struct nd_namespace_blk *nsblk; 27}
28 struct nd_blk_region *ndbr; 28
29 size_t disk_size; 29static u32 nsblk_internal_lbasize(struct nd_namespace_blk *nsblk)
30 u32 sector_size; 30{
31 u32 internal_lbasize; 31 return roundup(nsblk->lbasize, INT_LBASIZE_ALIGNMENT);
32}; 32}
33 33
34static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev) 34static u32 nsblk_sector_size(struct nd_namespace_blk *nsblk)
35{ 35{
36 return blk_dev->nsblk->lbasize - blk_dev->sector_size; 36 return nsblk->lbasize - nsblk_meta_size(nsblk);
37} 37}
38 38
39static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk, 39static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk,
@@ -57,20 +57,29 @@ static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk,
57 return SIZE_MAX; 57 return SIZE_MAX;
58} 58}
59 59
60static struct nd_blk_region *to_ndbr(struct nd_namespace_blk *nsblk)
61{
62 struct nd_region *nd_region;
63 struct device *parent;
64
65 parent = nsblk->common.dev.parent;
66 nd_region = container_of(parent, struct nd_region, dev);
67 return container_of(nd_region, struct nd_blk_region, nd_region);
68}
69
60#ifdef CONFIG_BLK_DEV_INTEGRITY 70#ifdef CONFIG_BLK_DEV_INTEGRITY
61static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, 71static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
62 struct bio_integrity_payload *bip, u64 lba, 72 struct bio_integrity_payload *bip, u64 lba, int rw)
63 int rw)
64{ 73{
65 unsigned int len = nd_blk_meta_size(blk_dev); 74 struct nd_blk_region *ndbr = to_ndbr(nsblk);
75 unsigned int len = nsblk_meta_size(nsblk);
66 resource_size_t dev_offset, ns_offset; 76 resource_size_t dev_offset, ns_offset;
67 struct nd_namespace_blk *nsblk; 77 u32 internal_lbasize, sector_size;
68 struct nd_blk_region *ndbr;
69 int err = 0; 78 int err = 0;
70 79
71 nsblk = blk_dev->nsblk; 80 internal_lbasize = nsblk_internal_lbasize(nsblk);
72 ndbr = blk_dev->ndbr; 81 sector_size = nsblk_sector_size(nsblk);
73 ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size; 82 ns_offset = lba * internal_lbasize + sector_size;
74 dev_offset = to_dev_offset(nsblk, ns_offset, len); 83 dev_offset = to_dev_offset(nsblk, ns_offset, len);
75 if (dev_offset == SIZE_MAX) 84 if (dev_offset == SIZE_MAX)
76 return -EIO; 85 return -EIO;
@@ -104,25 +113,26 @@ static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev,
104} 113}
105 114
106#else /* CONFIG_BLK_DEV_INTEGRITY */ 115#else /* CONFIG_BLK_DEV_INTEGRITY */
107static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, 116static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
108 struct bio_integrity_payload *bip, u64 lba, 117 struct bio_integrity_payload *bip, u64 lba, int rw)
109 int rw)
110{ 118{
111 return 0; 119 return 0;
112} 120}
113#endif 121#endif
114 122
115static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, 123static int nsblk_do_bvec(struct nd_namespace_blk *nsblk,
116 struct bio_integrity_payload *bip, struct page *page, 124 struct bio_integrity_payload *bip, struct page *page,
117 unsigned int len, unsigned int off, int rw, 125 unsigned int len, unsigned int off, int rw, sector_t sector)
118 sector_t sector)
119{ 126{
120 struct nd_blk_region *ndbr = blk_dev->ndbr; 127 struct nd_blk_region *ndbr = to_ndbr(nsblk);
121 resource_size_t dev_offset, ns_offset; 128 resource_size_t dev_offset, ns_offset;
129 u32 internal_lbasize, sector_size;
122 int err = 0; 130 int err = 0;
123 void *iobuf; 131 void *iobuf;
124 u64 lba; 132 u64 lba;
125 133
134 internal_lbasize = nsblk_internal_lbasize(nsblk);
135 sector_size = nsblk_sector_size(nsblk);
126 while (len) { 136 while (len) {
127 unsigned int cur_len; 137 unsigned int cur_len;
128 138
@@ -132,11 +142,11 @@ static int nd_blk_do_bvec(struct nd_blk_device *blk_dev,
132 * Block Window setup/move steps. the do_io routine is capable 142 * Block Window setup/move steps. the do_io routine is capable
133 * of handling len <= PAGE_SIZE. 143 * of handling len <= PAGE_SIZE.
134 */ 144 */
135 cur_len = bip ? min(len, blk_dev->sector_size) : len; 145 cur_len = bip ? min(len, sector_size) : len;
136 146
137 lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size); 147 lba = div_u64(sector << SECTOR_SHIFT, sector_size);
138 ns_offset = lba * blk_dev->internal_lbasize; 148 ns_offset = lba * internal_lbasize;
139 dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len); 149 dev_offset = to_dev_offset(nsblk, ns_offset, cur_len);
140 if (dev_offset == SIZE_MAX) 150 if (dev_offset == SIZE_MAX)
141 return -EIO; 151 return -EIO;
142 152
@@ -147,13 +157,13 @@ static int nd_blk_do_bvec(struct nd_blk_device *blk_dev,
147 return err; 157 return err;
148 158
149 if (bip) { 159 if (bip) {
150 err = nd_blk_rw_integrity(blk_dev, bip, lba, rw); 160 err = nd_blk_rw_integrity(nsblk, bip, lba, rw);
151 if (err) 161 if (err)
152 return err; 162 return err;
153 } 163 }
154 len -= cur_len; 164 len -= cur_len;
155 off += cur_len; 165 off += cur_len;
156 sector += blk_dev->sector_size >> SECTOR_SHIFT; 166 sector += sector_size >> SECTOR_SHIFT;
157 } 167 }
158 168
159 return err; 169 return err;
@@ -161,10 +171,8 @@ static int nd_blk_do_bvec(struct nd_blk_device *blk_dev,
161 171
162static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio) 172static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
163{ 173{
164 struct block_device *bdev = bio->bi_bdev;
165 struct gendisk *disk = bdev->bd_disk;
166 struct bio_integrity_payload *bip; 174 struct bio_integrity_payload *bip;
167 struct nd_blk_device *blk_dev; 175 struct nd_namespace_blk *nsblk;
168 struct bvec_iter iter; 176 struct bvec_iter iter;
169 unsigned long start; 177 unsigned long start;
170 struct bio_vec bvec; 178 struct bio_vec bvec;
@@ -183,17 +191,17 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
183 } 191 }
184 192
185 bip = bio_integrity(bio); 193 bip = bio_integrity(bio);
186 blk_dev = disk->private_data; 194 nsblk = q->queuedata;
187 rw = bio_data_dir(bio); 195 rw = bio_data_dir(bio);
188 do_acct = nd_iostat_start(bio, &start); 196 do_acct = nd_iostat_start(bio, &start);
189 bio_for_each_segment(bvec, bio, iter) { 197 bio_for_each_segment(bvec, bio, iter) {
190 unsigned int len = bvec.bv_len; 198 unsigned int len = bvec.bv_len;
191 199
192 BUG_ON(len > PAGE_SIZE); 200 BUG_ON(len > PAGE_SIZE);
193 err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len, 201 err = nsblk_do_bvec(nsblk, bip, bvec.bv_page, len,
194 bvec.bv_offset, rw, iter.bi_sector); 202 bvec.bv_offset, rw, iter.bi_sector);
195 if (err) { 203 if (err) {
196 dev_info(&blk_dev->nsblk->common.dev, 204 dev_dbg(&nsblk->common.dev,
197 "io error in %s sector %lld, len %d,\n", 205 "io error in %s sector %lld, len %d,\n",
198 (rw == READ) ? "READ" : "WRITE", 206 (rw == READ) ? "READ" : "WRITE",
199 (unsigned long long) iter.bi_sector, len); 207 (unsigned long long) iter.bi_sector, len);
@@ -209,17 +217,16 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
209 return BLK_QC_T_NONE; 217 return BLK_QC_T_NONE;
210} 218}
211 219
212static int nd_blk_rw_bytes(struct nd_namespace_common *ndns, 220static int nsblk_rw_bytes(struct nd_namespace_common *ndns,
213 resource_size_t offset, void *iobuf, size_t n, int rw) 221 resource_size_t offset, void *iobuf, size_t n, int rw)
214{ 222{
215 struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim); 223 struct nd_namespace_blk *nsblk = to_nd_namespace_blk(&ndns->dev);
216 struct nd_namespace_blk *nsblk = blk_dev->nsblk; 224 struct nd_blk_region *ndbr = to_ndbr(nsblk);
217 struct nd_blk_region *ndbr = blk_dev->ndbr;
218 resource_size_t dev_offset; 225 resource_size_t dev_offset;
219 226
220 dev_offset = to_dev_offset(nsblk, offset, n); 227 dev_offset = to_dev_offset(nsblk, offset, n);
221 228
222 if (unlikely(offset + n > blk_dev->disk_size)) { 229 if (unlikely(offset + n > nsblk->size)) {
223 dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); 230 dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
224 return -EFAULT; 231 return -EFAULT;
225 } 232 }
@@ -235,51 +242,65 @@ static const struct block_device_operations nd_blk_fops = {
235 .revalidate_disk = nvdimm_revalidate_disk, 242 .revalidate_disk = nvdimm_revalidate_disk,
236}; 243};
237 244
238static int nd_blk_attach_disk(struct nd_namespace_common *ndns, 245static void nd_blk_release_queue(void *q)
239 struct nd_blk_device *blk_dev) 246{
247 blk_cleanup_queue(q);
248}
249
250static void nd_blk_release_disk(void *disk)
251{
252 del_gendisk(disk);
253 put_disk(disk);
254}
255
256static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
240{ 257{
258 struct device *dev = &nsblk->common.dev;
241 resource_size_t available_disk_size; 259 resource_size_t available_disk_size;
260 struct request_queue *q;
242 struct gendisk *disk; 261 struct gendisk *disk;
243 u64 internal_nlba; 262 u64 internal_nlba;
244 263
245 internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize); 264 internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk));
246 available_disk_size = internal_nlba * blk_dev->sector_size; 265 available_disk_size = internal_nlba * nsblk_sector_size(nsblk);
247 266
248 blk_dev->queue = blk_alloc_queue(GFP_KERNEL); 267 q = blk_alloc_queue(GFP_KERNEL);
249 if (!blk_dev->queue) 268 if (!q)
250 return -ENOMEM; 269 return -ENOMEM;
270 if (devm_add_action(dev, nd_blk_release_queue, q)) {
271 blk_cleanup_queue(q);
272 return -ENOMEM;
273 }
251 274
252 blk_queue_make_request(blk_dev->queue, nd_blk_make_request); 275 blk_queue_make_request(q, nd_blk_make_request);
253 blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX); 276 blk_queue_max_hw_sectors(q, UINT_MAX);
254 blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY); 277 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
255 blk_queue_logical_block_size(blk_dev->queue, blk_dev->sector_size); 278 blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
256 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue); 279 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
280 q->queuedata = nsblk;
257 281
258 disk = blk_dev->disk = alloc_disk(0); 282 disk = alloc_disk(0);
259 if (!disk) { 283 if (!disk)
260 blk_cleanup_queue(blk_dev->queue); 284 return -ENOMEM;
285 if (devm_add_action(dev, nd_blk_release_disk, disk)) {
286 put_disk(disk);
261 return -ENOMEM; 287 return -ENOMEM;
262 } 288 }
263 289
264 disk->driverfs_dev = &ndns->dev; 290 disk->driverfs_dev = dev;
265 disk->first_minor = 0; 291 disk->first_minor = 0;
266 disk->fops = &nd_blk_fops; 292 disk->fops = &nd_blk_fops;
267 disk->private_data = blk_dev; 293 disk->queue = q;
268 disk->queue = blk_dev->queue;
269 disk->flags = GENHD_FL_EXT_DEVT; 294 disk->flags = GENHD_FL_EXT_DEVT;
270 nvdimm_namespace_disk_name(ndns, disk->disk_name); 295 nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name);
271 set_capacity(disk, 0); 296 set_capacity(disk, 0);
272 add_disk(disk); 297 add_disk(disk);
273 298
274 if (nd_blk_meta_size(blk_dev)) { 299 if (nsblk_meta_size(nsblk)) {
275 int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev)); 300 int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk));
276 301
277 if (rc) { 302 if (rc)
278 del_gendisk(disk);
279 put_disk(disk);
280 blk_cleanup_queue(blk_dev->queue);
281 return rc; 303 return rc;
282 }
283 } 304 }
284 305
285 set_capacity(disk, available_disk_size >> SECTOR_SHIFT); 306 set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
@@ -291,56 +312,29 @@ static int nd_blk_probe(struct device *dev)
291{ 312{
292 struct nd_namespace_common *ndns; 313 struct nd_namespace_common *ndns;
293 struct nd_namespace_blk *nsblk; 314 struct nd_namespace_blk *nsblk;
294 struct nd_blk_device *blk_dev;
295 int rc;
296 315
297 ndns = nvdimm_namespace_common_probe(dev); 316 ndns = nvdimm_namespace_common_probe(dev);
298 if (IS_ERR(ndns)) 317 if (IS_ERR(ndns))
299 return PTR_ERR(ndns); 318 return PTR_ERR(ndns);
300 319
301 blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL);
302 if (!blk_dev)
303 return -ENOMEM;
304
305 nsblk = to_nd_namespace_blk(&ndns->dev); 320 nsblk = to_nd_namespace_blk(&ndns->dev);
306 blk_dev->disk_size = nvdimm_namespace_capacity(ndns); 321 nsblk->size = nvdimm_namespace_capacity(ndns);
307 blk_dev->ndbr = to_nd_blk_region(dev->parent); 322 dev_set_drvdata(dev, nsblk);
308 blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev); 323
309 blk_dev->internal_lbasize = roundup(nsblk->lbasize, 324 ndns->rw_bytes = nsblk_rw_bytes;
310 INT_LBASIZE_ALIGNMENT);
311 blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512);
312 dev_set_drvdata(dev, blk_dev);
313
314 ndns->rw_bytes = nd_blk_rw_bytes;
315 if (is_nd_btt(dev)) 325 if (is_nd_btt(dev))
316 rc = nvdimm_namespace_attach_btt(ndns); 326 return nvdimm_namespace_attach_btt(ndns);
317 else if (nd_btt_probe(ndns, blk_dev) == 0) { 327 else if (nd_btt_probe(dev, ndns) == 0) {
318 /* we'll come back as btt-blk */ 328 /* we'll come back as btt-blk */
319 rc = -ENXIO; 329 return -ENXIO;
320 } else 330 } else
321 rc = nd_blk_attach_disk(ndns, blk_dev); 331 return nsblk_attach_disk(nsblk);
322 if (rc)
323 kfree(blk_dev);
324 return rc;
325}
326
327static void nd_blk_detach_disk(struct nd_blk_device *blk_dev)
328{
329 del_gendisk(blk_dev->disk);
330 put_disk(blk_dev->disk);
331 blk_cleanup_queue(blk_dev->queue);
332} 332}
333 333
334static int nd_blk_remove(struct device *dev) 334static int nd_blk_remove(struct device *dev)
335{ 335{
336 struct nd_blk_device *blk_dev = dev_get_drvdata(dev);
337
338 if (is_nd_btt(dev)) 336 if (is_nd_btt(dev))
339 nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); 337 nvdimm_namespace_detach_btt(to_nd_btt(dev));
340 else
341 nd_blk_detach_disk(blk_dev);
342 kfree(blk_dev);
343
344 return 0; 338 return 0;
345} 339}
346 340
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index f068b6513cd2..68a7c3c1eed9 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1306,7 +1306,7 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
1306 struct btt *btt; 1306 struct btt *btt;
1307 struct device *dev = &nd_btt->dev; 1307 struct device *dev = &nd_btt->dev;
1308 1308
1309 btt = kzalloc(sizeof(struct btt), GFP_KERNEL); 1309 btt = devm_kzalloc(dev, sizeof(struct btt), GFP_KERNEL);
1310 if (!btt) 1310 if (!btt)
1311 return NULL; 1311 return NULL;
1312 1312
@@ -1321,13 +1321,13 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
1321 ret = discover_arenas(btt); 1321 ret = discover_arenas(btt);
1322 if (ret) { 1322 if (ret) {
1323 dev_err(dev, "init: error in arena_discover: %d\n", ret); 1323 dev_err(dev, "init: error in arena_discover: %d\n", ret);
1324 goto out_free; 1324 return NULL;
1325 } 1325 }
1326 1326
1327 if (btt->init_state != INIT_READY && nd_region->ro) { 1327 if (btt->init_state != INIT_READY && nd_region->ro) {
1328 dev_info(dev, "%s is read-only, unable to init btt metadata\n", 1328 dev_info(dev, "%s is read-only, unable to init btt metadata\n",
1329 dev_name(&nd_region->dev)); 1329 dev_name(&nd_region->dev));
1330 goto out_free; 1330 return NULL;
1331 } else if (btt->init_state != INIT_READY) { 1331 } else if (btt->init_state != INIT_READY) {
1332 btt->num_arenas = (rawsize / ARENA_MAX_SIZE) + 1332 btt->num_arenas = (rawsize / ARENA_MAX_SIZE) +
1333 ((rawsize % ARENA_MAX_SIZE) ? 1 : 0); 1333 ((rawsize % ARENA_MAX_SIZE) ? 1 : 0);
@@ -1337,29 +1337,25 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
1337 ret = create_arenas(btt); 1337 ret = create_arenas(btt);
1338 if (ret) { 1338 if (ret) {
1339 dev_info(dev, "init: create_arenas: %d\n", ret); 1339 dev_info(dev, "init: create_arenas: %d\n", ret);
1340 goto out_free; 1340 return NULL;
1341 } 1341 }
1342 1342
1343 ret = btt_meta_init(btt); 1343 ret = btt_meta_init(btt);
1344 if (ret) { 1344 if (ret) {
1345 dev_err(dev, "init: error in meta_init: %d\n", ret); 1345 dev_err(dev, "init: error in meta_init: %d\n", ret);
1346 goto out_free; 1346 return NULL;
1347 } 1347 }
1348 } 1348 }
1349 1349
1350 ret = btt_blk_init(btt); 1350 ret = btt_blk_init(btt);
1351 if (ret) { 1351 if (ret) {
1352 dev_err(dev, "init: error in blk_init: %d\n", ret); 1352 dev_err(dev, "init: error in blk_init: %d\n", ret);
1353 goto out_free; 1353 return NULL;
1354 } 1354 }
1355 1355
1356 btt_debugfs_init(btt); 1356 btt_debugfs_init(btt);
1357 1357
1358 return btt; 1358 return btt;
1359
1360 out_free:
1361 kfree(btt);
1362 return NULL;
1363} 1359}
1364 1360
1365/** 1361/**
@@ -1377,7 +1373,6 @@ static void btt_fini(struct btt *btt)
1377 btt_blk_cleanup(btt); 1373 btt_blk_cleanup(btt);
1378 free_arenas(btt); 1374 free_arenas(btt);
1379 debugfs_remove_recursive(btt->debugfs_dir); 1375 debugfs_remove_recursive(btt->debugfs_dir);
1380 kfree(btt);
1381 } 1376 }
1382} 1377}
1383 1378
@@ -1388,11 +1383,15 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
1388 struct btt *btt; 1383 struct btt *btt;
1389 size_t rawsize; 1384 size_t rawsize;
1390 1385
1391 if (!nd_btt->uuid || !nd_btt->ndns || !nd_btt->lbasize) 1386 if (!nd_btt->uuid || !nd_btt->ndns || !nd_btt->lbasize) {
1387 dev_dbg(&nd_btt->dev, "incomplete btt configuration\n");
1392 return -ENODEV; 1388 return -ENODEV;
1389 }
1393 1390
1394 rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K; 1391 rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K;
1395 if (rawsize < ARENA_MIN_SIZE) { 1392 if (rawsize < ARENA_MIN_SIZE) {
1393 dev_dbg(&nd_btt->dev, "%s must be at least %ld bytes\n",
1394 dev_name(&ndns->dev), ARENA_MIN_SIZE + SZ_4K);
1396 return -ENXIO; 1395 return -ENXIO;
1397 } 1396 }
1398 nd_region = to_nd_region(nd_btt->dev.parent); 1397 nd_region = to_nd_region(nd_btt->dev.parent);
@@ -1406,9 +1405,8 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
1406} 1405}
1407EXPORT_SYMBOL(nvdimm_namespace_attach_btt); 1406EXPORT_SYMBOL(nvdimm_namespace_attach_btt);
1408 1407
1409int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns) 1408int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt)
1410{ 1409{
1411 struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
1412 struct btt *btt = nd_btt->btt; 1410 struct btt *btt = nd_btt->btt;
1413 1411
1414 btt_fini(btt); 1412 btt_fini(btt);
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index cb477518dd0e..816d0dae6398 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -273,10 +273,10 @@ static int __nd_btt_probe(struct nd_btt *nd_btt,
273 return 0; 273 return 0;
274} 274}
275 275
276int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) 276int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns)
277{ 277{
278 int rc; 278 int rc;
279 struct device *dev; 279 struct device *btt_dev;
280 struct btt_sb *btt_sb; 280 struct btt_sb *btt_sb;
281 struct nd_region *nd_region = to_nd_region(ndns->dev.parent); 281 struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
282 282
@@ -284,21 +284,19 @@ int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
284 return -ENODEV; 284 return -ENODEV;
285 285
286 nvdimm_bus_lock(&ndns->dev); 286 nvdimm_bus_lock(&ndns->dev);
287 dev = __nd_btt_create(nd_region, 0, NULL, ndns); 287 btt_dev = __nd_btt_create(nd_region, 0, NULL, ndns);
288 nvdimm_bus_unlock(&ndns->dev); 288 nvdimm_bus_unlock(&ndns->dev);
289 if (!dev) 289 if (!btt_dev)
290 return -ENOMEM; 290 return -ENOMEM;
291 dev_set_drvdata(dev, drvdata); 291 btt_sb = devm_kzalloc(dev, sizeof(*btt_sb), GFP_KERNEL);
292 btt_sb = kzalloc(sizeof(*btt_sb), GFP_KERNEL); 292 rc = __nd_btt_probe(to_nd_btt(btt_dev), ndns, btt_sb);
293 rc = __nd_btt_probe(to_nd_btt(dev), ndns, btt_sb); 293 dev_dbg(dev, "%s: btt: %s\n", __func__,
294 kfree(btt_sb); 294 rc == 0 ? dev_name(btt_dev) : "<none>");
295 dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__,
296 rc == 0 ? dev_name(dev) : "<none>");
297 if (rc < 0) { 295 if (rc < 0) {
298 struct nd_btt *nd_btt = to_nd_btt(dev); 296 struct nd_btt *nd_btt = to_nd_btt(btt_dev);
299 297
300 __nd_detach_ndns(dev, &nd_btt->ndns); 298 __nd_detach_ndns(btt_dev, &nd_btt->ndns);
301 put_device(dev); 299 put_device(btt_dev);
302 } 300 }
303 301
304 return rc; 302 return rc;
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 19f822d7f652..f085f8bceae8 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -40,6 +40,8 @@ static int to_nd_device_type(struct device *dev)
40 return ND_DEVICE_REGION_PMEM; 40 return ND_DEVICE_REGION_PMEM;
41 else if (is_nd_blk(dev)) 41 else if (is_nd_blk(dev))
42 return ND_DEVICE_REGION_BLK; 42 return ND_DEVICE_REGION_BLK;
43 else if (is_nd_dax(dev))
44 return ND_DEVICE_DAX_PMEM;
43 else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent)) 45 else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent))
44 return nd_region_to_nstype(to_nd_region(dev->parent)); 46 return nd_region_to_nstype(to_nd_region(dev->parent));
45 47
@@ -122,9 +124,10 @@ static int nvdimm_bus_remove(struct device *dev)
122 struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver); 124 struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
123 struct module *provider = to_bus_provider(dev); 125 struct module *provider = to_bus_provider(dev);
124 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 126 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
125 int rc; 127 int rc = 0;
126 128
127 rc = nd_drv->remove(dev); 129 if (nd_drv->remove)
130 rc = nd_drv->remove(dev);
128 nd_region_disable(nvdimm_bus, dev); 131 nd_region_disable(nvdimm_bus, dev);
129 132
130 dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name, 133 dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
@@ -246,6 +249,8 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie)
246 249
247void __nd_device_register(struct device *dev) 250void __nd_device_register(struct device *dev)
248{ 251{
252 if (!dev)
253 return;
249 dev->bus = &nvdimm_bus_type; 254 dev->bus = &nvdimm_bus_type;
250 get_device(dev); 255 get_device(dev);
251 async_schedule_domain(nd_async_device_register, dev, 256 async_schedule_domain(nd_async_device_register, dev,
@@ -292,8 +297,8 @@ int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
292 return -EINVAL; 297 return -EINVAL;
293 } 298 }
294 299
295 if (!nd_drv->probe || !nd_drv->remove) { 300 if (!nd_drv->probe) {
296 pr_debug("->probe() and ->remove() must be specified\n"); 301 pr_debug("%s ->probe() must be specified\n", mod_name);
297 return -EINVAL; 302 return -EINVAL;
298 } 303 }
299 304
@@ -439,6 +444,12 @@ static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = {
439 .out_num = 3, 444 .out_num = 3,
440 .out_sizes = { 4, 4, UINT_MAX, }, 445 .out_sizes = { 4, 4, UINT_MAX, },
441 }, 446 },
447 [ND_CMD_CALL] = {
448 .in_num = 2,
449 .in_sizes = { sizeof(struct nd_cmd_pkg), UINT_MAX, },
450 .out_num = 1,
451 .out_sizes = { UINT_MAX, },
452 },
442}; 453};
443 454
444const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd) 455const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd)
@@ -473,6 +484,12 @@ static const struct nd_cmd_desc __nd_cmd_bus_descs[] = {
473 .out_num = 3, 484 .out_num = 3,
474 .out_sizes = { 4, 4, 8, }, 485 .out_sizes = { 4, 4, 8, },
475 }, 486 },
487 [ND_CMD_CALL] = {
488 .in_num = 2,
489 .in_sizes = { sizeof(struct nd_cmd_pkg), UINT_MAX, },
490 .out_num = 1,
491 .out_sizes = { UINT_MAX, },
492 },
476}; 493};
477 494
478const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd) 495const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd)
@@ -500,6 +517,10 @@ u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
500 struct nd_cmd_vendor_hdr *hdr = buf; 517 struct nd_cmd_vendor_hdr *hdr = buf;
501 518
502 return hdr->in_length; 519 return hdr->in_length;
520 } else if (cmd == ND_CMD_CALL) {
521 struct nd_cmd_pkg *pkg = buf;
522
523 return pkg->nd_size_in;
503 } 524 }
504 525
505 return UINT_MAX; 526 return UINT_MAX;
@@ -522,6 +543,12 @@ u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
522 return out_field[1]; 543 return out_field[1];
523 else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 2) 544 else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 2)
524 return out_field[1] - 8; 545 return out_field[1] - 8;
546 else if (cmd == ND_CMD_CALL) {
547 struct nd_cmd_pkg *pkg = (struct nd_cmd_pkg *) in_field;
548
549 return pkg->nd_size_out;
550 }
551
525 552
526 return UINT_MAX; 553 return UINT_MAX;
527} 554}
@@ -588,25 +615,31 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
588 unsigned int cmd = _IOC_NR(ioctl_cmd); 615 unsigned int cmd = _IOC_NR(ioctl_cmd);
589 void __user *p = (void __user *) arg; 616 void __user *p = (void __user *) arg;
590 struct device *dev = &nvdimm_bus->dev; 617 struct device *dev = &nvdimm_bus->dev;
618 struct nd_cmd_pkg pkg;
591 const char *cmd_name, *dimm_name; 619 const char *cmd_name, *dimm_name;
592 unsigned long dsm_mask; 620 unsigned long cmd_mask;
593 void *buf; 621 void *buf;
594 int rc, i; 622 int rc, i;
595 623
596 if (nvdimm) { 624 if (nvdimm) {
597 desc = nd_cmd_dimm_desc(cmd); 625 desc = nd_cmd_dimm_desc(cmd);
598 cmd_name = nvdimm_cmd_name(cmd); 626 cmd_name = nvdimm_cmd_name(cmd);
599 dsm_mask = nvdimm->dsm_mask ? *(nvdimm->dsm_mask) : 0; 627 cmd_mask = nvdimm->cmd_mask;
600 dimm_name = dev_name(&nvdimm->dev); 628 dimm_name = dev_name(&nvdimm->dev);
601 } else { 629 } else {
602 desc = nd_cmd_bus_desc(cmd); 630 desc = nd_cmd_bus_desc(cmd);
603 cmd_name = nvdimm_bus_cmd_name(cmd); 631 cmd_name = nvdimm_bus_cmd_name(cmd);
604 dsm_mask = nd_desc->dsm_mask; 632 cmd_mask = nd_desc->cmd_mask;
605 dimm_name = "bus"; 633 dimm_name = "bus";
606 } 634 }
607 635
636 if (cmd == ND_CMD_CALL) {
637 if (copy_from_user(&pkg, p, sizeof(pkg)))
638 return -EFAULT;
639 }
640
608 if (!desc || (desc->out_num + desc->in_num == 0) || 641 if (!desc || (desc->out_num + desc->in_num == 0) ||
609 !test_bit(cmd, &dsm_mask)) 642 !test_bit(cmd, &cmd_mask))
610 return -ENOTTY; 643 return -ENOTTY;
611 644
612 /* fail write commands (when read-only) */ 645 /* fail write commands (when read-only) */
@@ -616,6 +649,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
616 case ND_CMD_SET_CONFIG_DATA: 649 case ND_CMD_SET_CONFIG_DATA:
617 case ND_CMD_ARS_START: 650 case ND_CMD_ARS_START:
618 case ND_CMD_CLEAR_ERROR: 651 case ND_CMD_CLEAR_ERROR:
652 case ND_CMD_CALL:
619 dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n", 653 dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n",
620 nvdimm ? nvdimm_cmd_name(cmd) 654 nvdimm ? nvdimm_cmd_name(cmd)
621 : nvdimm_bus_cmd_name(cmd)); 655 : nvdimm_bus_cmd_name(cmd));
@@ -643,6 +677,16 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
643 in_len += in_size; 677 in_len += in_size;
644 } 678 }
645 679
680 if (cmd == ND_CMD_CALL) {
681 dev_dbg(dev, "%s:%s, idx: %llu, in: %zu, out: %zu, len %zu\n",
682 __func__, dimm_name, pkg.nd_command,
683 in_len, out_len, buf_len);
684
685 for (i = 0; i < ARRAY_SIZE(pkg.nd_reserved2); i++)
686 if (pkg.nd_reserved2[i])
687 return -EINVAL;
688 }
689
646 /* process an output envelope */ 690 /* process an output envelope */
647 for (i = 0; i < desc->out_num; i++) { 691 for (i = 0; i < desc->out_num; i++) {
648 u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, 692 u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i,
@@ -783,6 +827,9 @@ int __init nvdimm_bus_init(void)
783{ 827{
784 int rc; 828 int rc;
785 829
830 BUILD_BUG_ON(sizeof(struct nd_smart_payload) != 128);
831 BUILD_BUG_ON(sizeof(struct nd_smart_threshold_payload) != 8);
832
786 rc = bus_register(&nvdimm_bus_type); 833 rc = bus_register(&nvdimm_bus_type);
787 if (rc) 834 if (rc)
788 return rc; 835 return rc;
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index e8f03b0e95e4..8b2e3c4fb0ad 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -12,6 +12,7 @@
12 */ 12 */
13#include <linux/device.h> 13#include <linux/device.h>
14#include <linux/sizes.h> 14#include <linux/sizes.h>
15#include <linux/pmem.h>
15#include "nd-core.h" 16#include "nd-core.h"
16#include "pfn.h" 17#include "pfn.h"
17#include "btt.h" 18#include "btt.h"
@@ -84,12 +85,33 @@ static bool is_idle(struct device *dev, struct nd_namespace_common *ndns)
84 seed = nd_region->btt_seed; 85 seed = nd_region->btt_seed;
85 else if (is_nd_pfn(dev)) 86 else if (is_nd_pfn(dev))
86 seed = nd_region->pfn_seed; 87 seed = nd_region->pfn_seed;
88 else if (is_nd_dax(dev))
89 seed = nd_region->dax_seed;
87 90
88 if (seed == dev || ndns || dev->driver) 91 if (seed == dev || ndns || dev->driver)
89 return false; 92 return false;
90 return true; 93 return true;
91} 94}
92 95
96struct nd_pfn *to_nd_pfn_safe(struct device *dev)
97{
98 /*
99 * pfn device attributes are re-used by dax device instances, so we
100 * need to be careful to correct device-to-nd_pfn conversion.
101 */
102 if (is_nd_pfn(dev))
103 return to_nd_pfn(dev);
104
105 if (is_nd_dax(dev)) {
106 struct nd_dax *nd_dax = to_nd_dax(dev);
107
108 return &nd_dax->nd_pfn;
109 }
110
111 WARN_ON(1);
112 return NULL;
113}
114
93static void nd_detach_and_reset(struct device *dev, 115static void nd_detach_and_reset(struct device *dev,
94 struct nd_namespace_common **_ndns) 116 struct nd_namespace_common **_ndns)
95{ 117{
@@ -103,8 +125,8 @@ static void nd_detach_and_reset(struct device *dev,
103 nd_btt->lbasize = 0; 125 nd_btt->lbasize = 0;
104 kfree(nd_btt->uuid); 126 kfree(nd_btt->uuid);
105 nd_btt->uuid = NULL; 127 nd_btt->uuid = NULL;
106 } else if (is_nd_pfn(dev)) { 128 } else if (is_nd_pfn(dev) || is_nd_dax(dev)) {
107 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 129 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
108 130
109 kfree(nd_pfn->uuid); 131 kfree(nd_pfn->uuid);
110 nd_pfn->uuid = NULL; 132 nd_pfn->uuid = NULL;
@@ -199,3 +221,63 @@ u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb)
199 return sum; 221 return sum;
200} 222}
201EXPORT_SYMBOL(nd_sb_checksum); 223EXPORT_SYMBOL(nd_sb_checksum);
224
225static int nsio_rw_bytes(struct nd_namespace_common *ndns,
226 resource_size_t offset, void *buf, size_t size, int rw)
227{
228 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
229
230 if (unlikely(offset + size > nsio->size)) {
231 dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
232 return -EFAULT;
233 }
234
235 if (rw == READ) {
236 unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
237
238 if (unlikely(is_bad_pmem(&nsio->bb, offset / 512, sz_align)))
239 return -EIO;
240 return memcpy_from_pmem(buf, nsio->addr + offset, size);
241 } else {
242 memcpy_to_pmem(nsio->addr + offset, buf, size);
243 wmb_pmem();
244 }
245
246 return 0;
247}
248
249int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio)
250{
251 struct resource *res = &nsio->res;
252 struct nd_namespace_common *ndns = &nsio->common;
253
254 nsio->size = resource_size(res);
255 if (!devm_request_mem_region(dev, res->start, resource_size(res),
256 dev_name(dev))) {
257 dev_warn(dev, "could not reserve region %pR\n", res);
258 return -EBUSY;
259 }
260
261 ndns->rw_bytes = nsio_rw_bytes;
262 if (devm_init_badblocks(dev, &nsio->bb))
263 return -ENOMEM;
264 nvdimm_badblocks_populate(to_nd_region(ndns->dev.parent), &nsio->bb,
265 &nsio->res);
266
267 nsio->addr = devm_memremap(dev, res->start, resource_size(res),
268 ARCH_MEMREMAP_PMEM);
269 if (IS_ERR(nsio->addr))
270 return PTR_ERR(nsio->addr);
271 return 0;
272}
273EXPORT_SYMBOL_GPL(devm_nsio_enable);
274
275void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio)
276{
277 struct resource *res = &nsio->res;
278
279 devm_memunmap(dev, nsio->addr);
280 devm_exit_badblocks(dev, &nsio->bb);
281 devm_release_mem_region(dev, res->start, resource_size(res));
282}
283EXPORT_SYMBOL_GPL(devm_nsio_disable);
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 182a93fe3712..be89764315c2 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -251,7 +251,7 @@ static ssize_t commands_show(struct device *dev,
251 struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); 251 struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
252 struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; 252 struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
253 253
254 for_each_set_bit(cmd, &nd_desc->dsm_mask, BITS_PER_LONG) 254 for_each_set_bit(cmd, &nd_desc->cmd_mask, BITS_PER_LONG)
255 len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd)); 255 len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd));
256 len += sprintf(buf + len, "\n"); 256 len += sprintf(buf + len, "\n");
257 return len; 257 return len;
@@ -648,6 +648,9 @@ static __exit void libnvdimm_exit(void)
648 nd_region_exit(); 648 nd_region_exit();
649 nvdimm_exit(); 649 nvdimm_exit();
650 nvdimm_bus_exit(); 650 nvdimm_bus_exit();
651 nd_region_devs_exit();
652 nvdimm_devs_exit();
653 ida_destroy(&nd_ida);
651} 654}
652 655
653MODULE_LICENSE("GPL v2"); 656MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c
new file mode 100644
index 000000000000..45fa82cae87c
--- /dev/null
+++ b/drivers/nvdimm/dax_devs.c
@@ -0,0 +1,134 @@
1/*
2 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/device.h>
14#include <linux/sizes.h>
15#include <linux/slab.h>
16#include <linux/mm.h>
17#include "nd-core.h"
18#include "pfn.h"
19#include "nd.h"
20
21static void nd_dax_release(struct device *dev)
22{
23 struct nd_region *nd_region = to_nd_region(dev->parent);
24 struct nd_dax *nd_dax = to_nd_dax(dev);
25 struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
26
27 dev_dbg(dev, "%s\n", __func__);
28 nd_detach_ndns(dev, &nd_pfn->ndns);
29 ida_simple_remove(&nd_region->dax_ida, nd_pfn->id);
30 kfree(nd_pfn->uuid);
31 kfree(nd_dax);
32}
33
34static struct device_type nd_dax_device_type = {
35 .name = "nd_dax",
36 .release = nd_dax_release,
37};
38
39bool is_nd_dax(struct device *dev)
40{
41 return dev ? dev->type == &nd_dax_device_type : false;
42}
43EXPORT_SYMBOL(is_nd_dax);
44
45struct nd_dax *to_nd_dax(struct device *dev)
46{
47 struct nd_dax *nd_dax = container_of(dev, struct nd_dax, nd_pfn.dev);
48
49 WARN_ON(!is_nd_dax(dev));
50 return nd_dax;
51}
52EXPORT_SYMBOL(to_nd_dax);
53
54static const struct attribute_group *nd_dax_attribute_groups[] = {
55 &nd_pfn_attribute_group,
56 &nd_device_attribute_group,
57 &nd_numa_attribute_group,
58 NULL,
59};
60
61static struct nd_dax *nd_dax_alloc(struct nd_region *nd_region)
62{
63 struct nd_pfn *nd_pfn;
64 struct nd_dax *nd_dax;
65 struct device *dev;
66
67 nd_dax = kzalloc(sizeof(*nd_dax), GFP_KERNEL);
68 if (!nd_dax)
69 return NULL;
70
71 nd_pfn = &nd_dax->nd_pfn;
72 nd_pfn->id = ida_simple_get(&nd_region->dax_ida, 0, 0, GFP_KERNEL);
73 if (nd_pfn->id < 0) {
74 kfree(nd_dax);
75 return NULL;
76 }
77
78 dev = &nd_pfn->dev;
79 dev_set_name(dev, "dax%d.%d", nd_region->id, nd_pfn->id);
80 dev->groups = nd_dax_attribute_groups;
81 dev->type = &nd_dax_device_type;
82 dev->parent = &nd_region->dev;
83
84 return nd_dax;
85}
86
87struct device *nd_dax_create(struct nd_region *nd_region)
88{
89 struct device *dev = NULL;
90 struct nd_dax *nd_dax;
91
92 if (!is_nd_pmem(&nd_region->dev))
93 return NULL;
94
95 nd_dax = nd_dax_alloc(nd_region);
96 if (nd_dax)
97 dev = nd_pfn_devinit(&nd_dax->nd_pfn, NULL);
98 __nd_device_register(dev);
99 return dev;
100}
101
102int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns)
103{
104 int rc;
105 struct nd_dax *nd_dax;
106 struct device *dax_dev;
107 struct nd_pfn *nd_pfn;
108 struct nd_pfn_sb *pfn_sb;
109 struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
110
111 if (ndns->force_raw)
112 return -ENODEV;
113
114 nvdimm_bus_lock(&ndns->dev);
115 nd_dax = nd_dax_alloc(nd_region);
116 nd_pfn = &nd_dax->nd_pfn;
117 dax_dev = nd_pfn_devinit(nd_pfn, ndns);
118 nvdimm_bus_unlock(&ndns->dev);
119 if (!dax_dev)
120 return -ENOMEM;
121 pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
122 nd_pfn->pfn_sb = pfn_sb;
123 rc = nd_pfn_validate(nd_pfn, DAX_SIG);
124 dev_dbg(dev, "%s: dax: %s\n", __func__,
125 rc == 0 ? dev_name(dax_dev) : "<none>");
126 if (rc < 0) {
127 __nd_detach_ndns(dax_dev, &nd_pfn->ndns);
128 put_device(dax_dev);
129 } else
130 __nd_device_register(dax_dev);
131
132 return rc;
133}
134EXPORT_SYMBOL(nd_dax_probe);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index c56f88217924..bbde28d3dec5 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -37,9 +37,9 @@ static int __validate_dimm(struct nvdimm_drvdata *ndd)
37 37
38 nvdimm = to_nvdimm(ndd->dev); 38 nvdimm = to_nvdimm(ndd->dev);
39 39
40 if (!nvdimm->dsm_mask) 40 if (!nvdimm->cmd_mask)
41 return -ENXIO; 41 return -ENXIO;
42 if (!test_bit(ND_CMD_GET_CONFIG_DATA, nvdimm->dsm_mask)) 42 if (!test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask))
43 return -ENXIO; 43 return -ENXIO;
44 44
45 return 0; 45 return 0;
@@ -263,6 +263,12 @@ const char *nvdimm_name(struct nvdimm *nvdimm)
263} 263}
264EXPORT_SYMBOL_GPL(nvdimm_name); 264EXPORT_SYMBOL_GPL(nvdimm_name);
265 265
266unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm)
267{
268 return nvdimm->cmd_mask;
269}
270EXPORT_SYMBOL_GPL(nvdimm_cmd_mask);
271
266void *nvdimm_provider_data(struct nvdimm *nvdimm) 272void *nvdimm_provider_data(struct nvdimm *nvdimm)
267{ 273{
268 if (nvdimm) 274 if (nvdimm)
@@ -277,10 +283,10 @@ static ssize_t commands_show(struct device *dev,
277 struct nvdimm *nvdimm = to_nvdimm(dev); 283 struct nvdimm *nvdimm = to_nvdimm(dev);
278 int cmd, len = 0; 284 int cmd, len = 0;
279 285
280 if (!nvdimm->dsm_mask) 286 if (!nvdimm->cmd_mask)
281 return sprintf(buf, "\n"); 287 return sprintf(buf, "\n");
282 288
283 for_each_set_bit(cmd, nvdimm->dsm_mask, BITS_PER_LONG) 289 for_each_set_bit(cmd, &nvdimm->cmd_mask, BITS_PER_LONG)
284 len += sprintf(buf + len, "%s ", nvdimm_cmd_name(cmd)); 290 len += sprintf(buf + len, "%s ", nvdimm_cmd_name(cmd));
285 len += sprintf(buf + len, "\n"); 291 len += sprintf(buf + len, "\n");
286 return len; 292 return len;
@@ -340,7 +346,7 @@ EXPORT_SYMBOL_GPL(nvdimm_attribute_group);
340 346
341struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, 347struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
342 const struct attribute_group **groups, unsigned long flags, 348 const struct attribute_group **groups, unsigned long flags,
343 unsigned long *dsm_mask) 349 unsigned long cmd_mask)
344{ 350{
345 struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL); 351 struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
346 struct device *dev; 352 struct device *dev;
@@ -355,7 +361,7 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
355 } 361 }
356 nvdimm->provider_data = provider_data; 362 nvdimm->provider_data = provider_data;
357 nvdimm->flags = flags; 363 nvdimm->flags = flags;
358 nvdimm->dsm_mask = dsm_mask; 364 nvdimm->cmd_mask = cmd_mask;
359 atomic_set(&nvdimm->busy, 0); 365 atomic_set(&nvdimm->busy, 0);
360 dev = &nvdimm->dev; 366 dev = &nvdimm->dev;
361 dev_set_name(dev, "nmem%d", nvdimm->id); 367 dev_set_name(dev, "nmem%d", nvdimm->id);
@@ -546,3 +552,8 @@ int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count)
546 return 0; 552 return 0;
547} 553}
548EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count); 554EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count);
555
556void __exit nvdimm_devs_exit(void)
557{
558 ida_destroy(&dimm_ida);
559}
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index f5cb88601359..c5e3196c45b0 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1288,6 +1288,8 @@ static ssize_t mode_show(struct device *dev,
1288 mode = "safe"; 1288 mode = "safe";
1289 else if (claim && is_nd_pfn(claim)) 1289 else if (claim && is_nd_pfn(claim))
1290 mode = "memory"; 1290 mode = "memory";
1291 else if (claim && is_nd_dax(claim))
1292 mode = "dax";
1291 else if (!claim && pmem_should_map_pages(dev)) 1293 else if (!claim && pmem_should_map_pages(dev))
1292 mode = "memory"; 1294 mode = "memory";
1293 else 1295 else
@@ -1379,21 +1381,19 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
1379{ 1381{
1380 struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL; 1382 struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL;
1381 struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL; 1383 struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL;
1382 struct nd_namespace_common *ndns; 1384 struct nd_dax *nd_dax = is_nd_dax(dev) ? to_nd_dax(dev) : NULL;
1385 struct nd_namespace_common *ndns = NULL;
1383 resource_size_t size; 1386 resource_size_t size;
1384 1387
1385 if (nd_btt || nd_pfn) { 1388 if (nd_btt || nd_pfn || nd_dax) {
1386 struct device *host = NULL; 1389 if (nd_btt)
1387
1388 if (nd_btt) {
1389 host = &nd_btt->dev;
1390 ndns = nd_btt->ndns; 1390 ndns = nd_btt->ndns;
1391 } else if (nd_pfn) { 1391 else if (nd_pfn)
1392 host = &nd_pfn->dev;
1393 ndns = nd_pfn->ndns; 1392 ndns = nd_pfn->ndns;
1394 } 1393 else if (nd_dax)
1394 ndns = nd_dax->nd_pfn.ndns;
1395 1395
1396 if (!ndns || !host) 1396 if (!ndns)
1397 return ERR_PTR(-ENODEV); 1397 return ERR_PTR(-ENODEV);
1398 1398
1399 /* 1399 /*
@@ -1404,12 +1404,12 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
1404 device_unlock(&ndns->dev); 1404 device_unlock(&ndns->dev);
1405 if (ndns->dev.driver) { 1405 if (ndns->dev.driver) {
1406 dev_dbg(&ndns->dev, "is active, can't bind %s\n", 1406 dev_dbg(&ndns->dev, "is active, can't bind %s\n",
1407 dev_name(host)); 1407 dev_name(dev));
1408 return ERR_PTR(-EBUSY); 1408 return ERR_PTR(-EBUSY);
1409 } 1409 }
1410 if (dev_WARN_ONCE(&ndns->dev, ndns->claim != host, 1410 if (dev_WARN_ONCE(&ndns->dev, ndns->claim != dev,
1411 "host (%s) vs claim (%s) mismatch\n", 1411 "host (%s) vs claim (%s) mismatch\n",
1412 dev_name(host), 1412 dev_name(dev),
1413 dev_name(ndns->claim))) 1413 dev_name(ndns->claim)))
1414 return ERR_PTR(-ENXIO); 1414 return ERR_PTR(-ENXIO);
1415 } else { 1415 } else {
@@ -1784,6 +1784,18 @@ void nd_region_create_blk_seed(struct nd_region *nd_region)
1784 nd_device_register(nd_region->ns_seed); 1784 nd_device_register(nd_region->ns_seed);
1785} 1785}
1786 1786
1787void nd_region_create_dax_seed(struct nd_region *nd_region)
1788{
1789 WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
1790 nd_region->dax_seed = nd_dax_create(nd_region);
1791 /*
1792 * Seed creation failures are not fatal, provisioning is simply
1793 * disabled until memory becomes available
1794 */
1795 if (!nd_region->dax_seed)
1796 dev_err(&nd_region->dev, "failed to create dax namespace\n");
1797}
1798
1787void nd_region_create_pfn_seed(struct nd_region *nd_region) 1799void nd_region_create_pfn_seed(struct nd_region *nd_region)
1788{ 1800{
1789 WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); 1801 WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 1d1500f3d8b5..284cdaa268cf 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -37,7 +37,7 @@ struct nvdimm_bus {
37struct nvdimm { 37struct nvdimm {
38 unsigned long flags; 38 unsigned long flags;
39 void *provider_data; 39 void *provider_data;
40 unsigned long *dsm_mask; 40 unsigned long cmd_mask;
41 struct device dev; 41 struct device dev;
42 atomic_t busy; 42 atomic_t busy;
43 int id; 43 int id;
@@ -49,11 +49,14 @@ bool is_nd_blk(struct device *dev);
49struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev); 49struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
50int __init nvdimm_bus_init(void); 50int __init nvdimm_bus_init(void);
51void nvdimm_bus_exit(void); 51void nvdimm_bus_exit(void);
52void nvdimm_devs_exit(void);
53void nd_region_devs_exit(void);
52void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev); 54void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev);
53struct nd_region; 55struct nd_region;
54void nd_region_create_blk_seed(struct nd_region *nd_region); 56void nd_region_create_blk_seed(struct nd_region *nd_region);
55void nd_region_create_btt_seed(struct nd_region *nd_region); 57void nd_region_create_btt_seed(struct nd_region *nd_region);
56void nd_region_create_pfn_seed(struct nd_region *nd_region); 58void nd_region_create_pfn_seed(struct nd_region *nd_region);
59void nd_region_create_dax_seed(struct nd_region *nd_region);
57void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev); 60void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev);
58int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus); 61int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus);
59void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus); 62void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
@@ -91,4 +94,5 @@ bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
91ssize_t nd_namespace_store(struct device *dev, 94ssize_t nd_namespace_store(struct device *dev,
92 struct nd_namespace_common **_ndns, const char *buf, 95 struct nd_namespace_common **_ndns, const char *buf,
93 size_t len); 96 size_t len);
97struct nd_pfn *to_nd_pfn_safe(struct device *dev);
94#endif /* __ND_CORE_H__ */ 98#endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 875c524fafb0..d0ac93c31dda 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -13,6 +13,7 @@
13#ifndef __ND_H__ 13#ifndef __ND_H__
14#define __ND_H__ 14#define __ND_H__
15#include <linux/libnvdimm.h> 15#include <linux/libnvdimm.h>
16#include <linux/badblocks.h>
16#include <linux/blkdev.h> 17#include <linux/blkdev.h>
17#include <linux/device.h> 18#include <linux/device.h>
18#include <linux/mutex.h> 19#include <linux/mutex.h>
@@ -100,10 +101,12 @@ struct nd_region {
100 struct ida ns_ida; 101 struct ida ns_ida;
101 struct ida btt_ida; 102 struct ida btt_ida;
102 struct ida pfn_ida; 103 struct ida pfn_ida;
104 struct ida dax_ida;
103 unsigned long flags; 105 unsigned long flags;
104 struct device *ns_seed; 106 struct device *ns_seed;
105 struct device *btt_seed; 107 struct device *btt_seed;
106 struct device *pfn_seed; 108 struct device *pfn_seed;
109 struct device *dax_seed;
107 u16 ndr_mappings; 110 u16 ndr_mappings;
108 u64 ndr_size; 111 u64 ndr_size;
109 u64 ndr_start; 112 u64 ndr_start;
@@ -160,6 +163,10 @@ struct nd_pfn {
160 struct nd_namespace_common *ndns; 163 struct nd_namespace_common *ndns;
161}; 164};
162 165
166struct nd_dax {
167 struct nd_pfn nd_pfn;
168};
169
163enum nd_async_mode { 170enum nd_async_mode {
164 ND_SYNC, 171 ND_SYNC,
165 ND_ASYNC, 172 ND_ASYNC,
@@ -197,11 +204,12 @@ struct nd_gen_sb {
197 204
198u64 nd_sb_checksum(struct nd_gen_sb *sb); 205u64 nd_sb_checksum(struct nd_gen_sb *sb);
199#if IS_ENABLED(CONFIG_BTT) 206#if IS_ENABLED(CONFIG_BTT)
200int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata); 207int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns);
201bool is_nd_btt(struct device *dev); 208bool is_nd_btt(struct device *dev);
202struct device *nd_btt_create(struct nd_region *nd_region); 209struct device *nd_btt_create(struct nd_region *nd_region);
203#else 210#else
204static inline int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) 211static inline int nd_btt_probe(struct device *dev,
212 struct nd_namespace_common *ndns)
205{ 213{
206 return -ENODEV; 214 return -ENODEV;
207} 215}
@@ -219,12 +227,16 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region)
219 227
220struct nd_pfn *to_nd_pfn(struct device *dev); 228struct nd_pfn *to_nd_pfn(struct device *dev);
221#if IS_ENABLED(CONFIG_NVDIMM_PFN) 229#if IS_ENABLED(CONFIG_NVDIMM_PFN)
222int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata); 230int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns);
223bool is_nd_pfn(struct device *dev); 231bool is_nd_pfn(struct device *dev);
224struct device *nd_pfn_create(struct nd_region *nd_region); 232struct device *nd_pfn_create(struct nd_region *nd_region);
225int nd_pfn_validate(struct nd_pfn *nd_pfn); 233struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
234 struct nd_namespace_common *ndns);
235int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig);
236extern struct attribute_group nd_pfn_attribute_group;
226#else 237#else
227static inline int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata) 238static inline int nd_pfn_probe(struct device *dev,
239 struct nd_namespace_common *ndns)
228{ 240{
229 return -ENODEV; 241 return -ENODEV;
230} 242}
@@ -239,12 +251,35 @@ static inline struct device *nd_pfn_create(struct nd_region *nd_region)
239 return NULL; 251 return NULL;
240} 252}
241 253
242static inline int nd_pfn_validate(struct nd_pfn *nd_pfn) 254static inline int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
243{ 255{
244 return -ENODEV; 256 return -ENODEV;
245} 257}
246#endif 258#endif
247 259
260struct nd_dax *to_nd_dax(struct device *dev);
261#if IS_ENABLED(CONFIG_NVDIMM_DAX)
262int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns);
263bool is_nd_dax(struct device *dev);
264struct device *nd_dax_create(struct nd_region *nd_region);
265#else
266static inline int nd_dax_probe(struct device *dev,
267 struct nd_namespace_common *ndns)
268{
269 return -ENODEV;
270}
271
272static inline bool is_nd_dax(struct device *dev)
273{
274 return false;
275}
276
277static inline struct device *nd_dax_create(struct nd_region *nd_region)
278{
279 return NULL;
280}
281#endif
282
248struct nd_region *to_nd_region(struct device *dev); 283struct nd_region *to_nd_region(struct device *dev);
249int nd_region_to_nstype(struct nd_region *nd_region); 284int nd_region_to_nstype(struct nd_region *nd_region);
250int nd_region_register_namespaces(struct nd_region *nd_region, int *err); 285int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
@@ -263,11 +298,32 @@ struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
263resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns); 298resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
264struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev); 299struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev);
265int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns); 300int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns);
266int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns); 301int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt);
267const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, 302const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
268 char *name); 303 char *name);
269void nvdimm_badblocks_populate(struct nd_region *nd_region, 304void nvdimm_badblocks_populate(struct nd_region *nd_region,
270 struct badblocks *bb, const struct resource *res); 305 struct badblocks *bb, const struct resource *res);
306#if IS_ENABLED(CONFIG_ND_CLAIM)
307struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
308 struct resource *res, struct vmem_altmap *altmap);
309int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio);
310void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio);
311#else
312static inline struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
313 struct resource *res, struct vmem_altmap *altmap)
314{
315 return ERR_PTR(-ENXIO);
316}
317static inline int devm_nsio_enable(struct device *dev,
318 struct nd_namespace_io *nsio)
319{
320 return -ENXIO;
321}
322static inline void devm_nsio_disable(struct device *dev,
323 struct nd_namespace_io *nsio)
324{
325}
326#endif
271int nd_blk_region_init(struct nd_region *nd_region); 327int nd_blk_region_init(struct nd_region *nd_region);
272void __nd_iostat_start(struct bio *bio, unsigned long *start); 328void __nd_iostat_start(struct bio *bio, unsigned long *start);
273static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) 329static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
@@ -281,6 +337,19 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
281 return true; 337 return true;
282} 338}
283void nd_iostat_end(struct bio *bio, unsigned long start); 339void nd_iostat_end(struct bio *bio, unsigned long start);
340static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
341 unsigned int len)
342{
343 if (bb->count) {
344 sector_t first_bad;
345 int num_bad;
346
347 return !!badblocks_check(bb, sector, len / 512, &first_bad,
348 &num_bad);
349 }
350
351 return false;
352}
284resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk); 353resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk);
285const u8 *nd_dev_to_uuid(struct device *dev); 354const u8 *nd_dev_to_uuid(struct device *dev);
286bool pmem_should_map_pages(struct device *dev); 355bool pmem_should_map_pages(struct device *dev);
diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
index 8e343a3ca873..dde9853453d3 100644
--- a/drivers/nvdimm/pfn.h
+++ b/drivers/nvdimm/pfn.h
@@ -19,6 +19,7 @@
19 19
20#define PFN_SIG_LEN 16 20#define PFN_SIG_LEN 16
21#define PFN_SIG "NVDIMM_PFN_INFO\0" 21#define PFN_SIG "NVDIMM_PFN_INFO\0"
22#define DAX_SIG "NVDIMM_DAX_INFO\0"
22 23
23struct nd_pfn_sb { 24struct nd_pfn_sb {
24 u8 signature[PFN_SIG_LEN]; 25 u8 signature[PFN_SIG_LEN];
@@ -33,7 +34,9 @@ struct nd_pfn_sb {
33 /* minor-version-1 additions for section alignment */ 34 /* minor-version-1 additions for section alignment */
34 __le32 start_pad; 35 __le32 start_pad;
35 __le32 end_trunc; 36 __le32 end_trunc;
36 u8 padding[4004]; 37 /* minor-version-2 record the base alignment of the mapping */
38 __le32 align;
39 u8 padding[4000];
37 __le64 checksum; 40 __le64 checksum;
38}; 41};
39 42
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index e071e214feba..f7718ec685fa 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. 2 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as 5 * it under the terms of version 2 of the GNU General Public License as
@@ -10,6 +10,7 @@
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details. 11 * General Public License for more details.
12 */ 12 */
13#include <linux/memremap.h>
13#include <linux/blkdev.h> 14#include <linux/blkdev.h>
14#include <linux/device.h> 15#include <linux/device.h>
15#include <linux/genhd.h> 16#include <linux/genhd.h>
@@ -56,7 +57,7 @@ EXPORT_SYMBOL(to_nd_pfn);
56static ssize_t mode_show(struct device *dev, 57static ssize_t mode_show(struct device *dev,
57 struct device_attribute *attr, char *buf) 58 struct device_attribute *attr, char *buf)
58{ 59{
59 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 60 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
60 61
61 switch (nd_pfn->mode) { 62 switch (nd_pfn->mode) {
62 case PFN_MODE_RAM: 63 case PFN_MODE_RAM:
@@ -71,7 +72,7 @@ static ssize_t mode_show(struct device *dev,
71static ssize_t mode_store(struct device *dev, 72static ssize_t mode_store(struct device *dev,
72 struct device_attribute *attr, const char *buf, size_t len) 73 struct device_attribute *attr, const char *buf, size_t len)
73{ 74{
74 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 75 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
75 ssize_t rc = 0; 76 ssize_t rc = 0;
76 77
77 device_lock(dev); 78 device_lock(dev);
@@ -105,7 +106,7 @@ static DEVICE_ATTR_RW(mode);
105static ssize_t align_show(struct device *dev, 106static ssize_t align_show(struct device *dev,
106 struct device_attribute *attr, char *buf) 107 struct device_attribute *attr, char *buf)
107{ 108{
108 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 109 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
109 110
110 return sprintf(buf, "%lx\n", nd_pfn->align); 111 return sprintf(buf, "%lx\n", nd_pfn->align);
111} 112}
@@ -133,7 +134,7 @@ static ssize_t __align_store(struct nd_pfn *nd_pfn, const char *buf)
133static ssize_t align_store(struct device *dev, 134static ssize_t align_store(struct device *dev,
134 struct device_attribute *attr, const char *buf, size_t len) 135 struct device_attribute *attr, const char *buf, size_t len)
135{ 136{
136 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 137 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
137 ssize_t rc; 138 ssize_t rc;
138 139
139 device_lock(dev); 140 device_lock(dev);
@@ -151,7 +152,7 @@ static DEVICE_ATTR_RW(align);
151static ssize_t uuid_show(struct device *dev, 152static ssize_t uuid_show(struct device *dev,
152 struct device_attribute *attr, char *buf) 153 struct device_attribute *attr, char *buf)
153{ 154{
154 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 155 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
155 156
156 if (nd_pfn->uuid) 157 if (nd_pfn->uuid)
157 return sprintf(buf, "%pUb\n", nd_pfn->uuid); 158 return sprintf(buf, "%pUb\n", nd_pfn->uuid);
@@ -161,7 +162,7 @@ static ssize_t uuid_show(struct device *dev,
161static ssize_t uuid_store(struct device *dev, 162static ssize_t uuid_store(struct device *dev,
162 struct device_attribute *attr, const char *buf, size_t len) 163 struct device_attribute *attr, const char *buf, size_t len)
163{ 164{
164 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 165 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
165 ssize_t rc; 166 ssize_t rc;
166 167
167 device_lock(dev); 168 device_lock(dev);
@@ -177,7 +178,7 @@ static DEVICE_ATTR_RW(uuid);
177static ssize_t namespace_show(struct device *dev, 178static ssize_t namespace_show(struct device *dev,
178 struct device_attribute *attr, char *buf) 179 struct device_attribute *attr, char *buf)
179{ 180{
180 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 181 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
181 ssize_t rc; 182 ssize_t rc;
182 183
183 nvdimm_bus_lock(dev); 184 nvdimm_bus_lock(dev);
@@ -190,7 +191,7 @@ static ssize_t namespace_show(struct device *dev,
190static ssize_t namespace_store(struct device *dev, 191static ssize_t namespace_store(struct device *dev,
191 struct device_attribute *attr, const char *buf, size_t len) 192 struct device_attribute *attr, const char *buf, size_t len)
192{ 193{
193 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 194 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
194 ssize_t rc; 195 ssize_t rc;
195 196
196 device_lock(dev); 197 device_lock(dev);
@@ -208,7 +209,7 @@ static DEVICE_ATTR_RW(namespace);
208static ssize_t resource_show(struct device *dev, 209static ssize_t resource_show(struct device *dev,
209 struct device_attribute *attr, char *buf) 210 struct device_attribute *attr, char *buf)
210{ 211{
211 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 212 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
212 ssize_t rc; 213 ssize_t rc;
213 214
214 device_lock(dev); 215 device_lock(dev);
@@ -234,7 +235,7 @@ static DEVICE_ATTR_RO(resource);
234static ssize_t size_show(struct device *dev, 235static ssize_t size_show(struct device *dev,
235 struct device_attribute *attr, char *buf) 236 struct device_attribute *attr, char *buf)
236{ 237{
237 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 238 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
238 ssize_t rc; 239 ssize_t rc;
239 240
240 device_lock(dev); 241 device_lock(dev);
@@ -269,7 +270,7 @@ static struct attribute *nd_pfn_attributes[] = {
269 NULL, 270 NULL,
270}; 271};
271 272
272static struct attribute_group nd_pfn_attribute_group = { 273struct attribute_group nd_pfn_attribute_group = {
273 .attrs = nd_pfn_attributes, 274 .attrs = nd_pfn_attributes,
274}; 275};
275 276
@@ -280,15 +281,31 @@ static const struct attribute_group *nd_pfn_attribute_groups[] = {
280 NULL, 281 NULL,
281}; 282};
282 283
283static struct device *__nd_pfn_create(struct nd_region *nd_region, 284struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
284 struct nd_namespace_common *ndns) 285 struct nd_namespace_common *ndns)
285{ 286{
286 struct nd_pfn *nd_pfn; 287 struct device *dev = &nd_pfn->dev;
287 struct device *dev;
288 288
289 /* we can only create pages for contiguous ranged of pmem */ 289 if (!nd_pfn)
290 if (!is_nd_pmem(&nd_region->dev)) 290 return NULL;
291
292 nd_pfn->mode = PFN_MODE_NONE;
293 nd_pfn->align = HPAGE_SIZE;
294 dev = &nd_pfn->dev;
295 device_initialize(&nd_pfn->dev);
296 if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
297 dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
298 __func__, dev_name(ndns->claim));
299 put_device(dev);
291 return NULL; 300 return NULL;
301 }
302 return dev;
303}
304
305static struct nd_pfn *nd_pfn_alloc(struct nd_region *nd_region)
306{
307 struct nd_pfn *nd_pfn;
308 struct device *dev;
292 309
293 nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); 310 nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL);
294 if (!nd_pfn) 311 if (!nd_pfn)
@@ -300,33 +317,31 @@ static struct device *__nd_pfn_create(struct nd_region *nd_region,
300 return NULL; 317 return NULL;
301 } 318 }
302 319
303 nd_pfn->mode = PFN_MODE_NONE;
304 nd_pfn->align = HPAGE_SIZE;
305 dev = &nd_pfn->dev; 320 dev = &nd_pfn->dev;
306 dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); 321 dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id);
307 dev->parent = &nd_region->dev;
308 dev->type = &nd_pfn_device_type;
309 dev->groups = nd_pfn_attribute_groups; 322 dev->groups = nd_pfn_attribute_groups;
310 device_initialize(&nd_pfn->dev); 323 dev->type = &nd_pfn_device_type;
311 if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { 324 dev->parent = &nd_region->dev;
312 dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", 325
313 __func__, dev_name(ndns->claim)); 326 return nd_pfn;
314 put_device(dev);
315 return NULL;
316 }
317 return dev;
318} 327}
319 328
320struct device *nd_pfn_create(struct nd_region *nd_region) 329struct device *nd_pfn_create(struct nd_region *nd_region)
321{ 330{
322 struct device *dev = __nd_pfn_create(nd_region, NULL); 331 struct nd_pfn *nd_pfn;
332 struct device *dev;
333
334 if (!is_nd_pmem(&nd_region->dev))
335 return NULL;
336
337 nd_pfn = nd_pfn_alloc(nd_region);
338 dev = nd_pfn_devinit(nd_pfn, NULL);
323 339
324 if (dev) 340 __nd_device_register(dev);
325 __nd_device_register(dev);
326 return dev; 341 return dev;
327} 342}
328 343
329int nd_pfn_validate(struct nd_pfn *nd_pfn) 344int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
330{ 345{
331 u64 checksum, offset; 346 u64 checksum, offset;
332 struct nd_namespace_io *nsio; 347 struct nd_namespace_io *nsio;
@@ -343,7 +358,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
343 if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb))) 358 if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb)))
344 return -ENXIO; 359 return -ENXIO;
345 360
346 if (memcmp(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN) != 0) 361 if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0)
347 return -ENODEV; 362 return -ENODEV;
348 363
349 checksum = le64_to_cpu(pfn_sb->checksum); 364 checksum = le64_to_cpu(pfn_sb->checksum);
@@ -360,6 +375,9 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
360 pfn_sb->end_trunc = 0; 375 pfn_sb->end_trunc = 0;
361 } 376 }
362 377
378 if (__le16_to_cpu(pfn_sb->version_minor) < 2)
379 pfn_sb->align = 0;
380
363 switch (le32_to_cpu(pfn_sb->mode)) { 381 switch (le32_to_cpu(pfn_sb->mode)) {
364 case PFN_MODE_RAM: 382 case PFN_MODE_RAM:
365 case PFN_MODE_PMEM: 383 case PFN_MODE_PMEM:
@@ -379,6 +397,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
379 return -ENODEV; 397 return -ENODEV;
380 } 398 }
381 399
400 if (nd_pfn->align == 0)
401 nd_pfn->align = le32_to_cpu(pfn_sb->align);
382 if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) { 402 if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) {
383 dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n", 403 dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
384 nd_pfn->align, nvdimm_namespace_capacity(ndns)); 404 nd_pfn->align, nvdimm_namespace_capacity(ndns));
@@ -399,8 +419,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
399 return -EBUSY; 419 return -EBUSY;
400 } 420 }
401 421
402 nd_pfn->align = 1UL << ilog2(offset); 422 if ((nd_pfn->align && !IS_ALIGNED(offset, nd_pfn->align))
403 if (!is_power_of_2(offset) || offset < PAGE_SIZE) { 423 || !IS_ALIGNED(offset, PAGE_SIZE)) {
404 dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n", 424 dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n",
405 offset); 425 offset);
406 return -ENXIO; 426 return -ENXIO;
@@ -410,11 +430,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
410} 430}
411EXPORT_SYMBOL(nd_pfn_validate); 431EXPORT_SYMBOL(nd_pfn_validate);
412 432
413int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata) 433int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
414{ 434{
415 int rc; 435 int rc;
416 struct device *dev;
417 struct nd_pfn *nd_pfn; 436 struct nd_pfn *nd_pfn;
437 struct device *pfn_dev;
418 struct nd_pfn_sb *pfn_sb; 438 struct nd_pfn_sb *pfn_sb;
419 struct nd_region *nd_region = to_nd_region(ndns->dev.parent); 439 struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
420 440
@@ -422,25 +442,218 @@ int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata)
422 return -ENODEV; 442 return -ENODEV;
423 443
424 nvdimm_bus_lock(&ndns->dev); 444 nvdimm_bus_lock(&ndns->dev);
425 dev = __nd_pfn_create(nd_region, ndns); 445 nd_pfn = nd_pfn_alloc(nd_region);
446 pfn_dev = nd_pfn_devinit(nd_pfn, ndns);
426 nvdimm_bus_unlock(&ndns->dev); 447 nvdimm_bus_unlock(&ndns->dev);
427 if (!dev) 448 if (!pfn_dev)
428 return -ENOMEM; 449 return -ENOMEM;
429 dev_set_drvdata(dev, drvdata); 450 pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
430 pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL); 451 nd_pfn = to_nd_pfn(pfn_dev);
431 nd_pfn = to_nd_pfn(dev);
432 nd_pfn->pfn_sb = pfn_sb; 452 nd_pfn->pfn_sb = pfn_sb;
433 rc = nd_pfn_validate(nd_pfn); 453 rc = nd_pfn_validate(nd_pfn, PFN_SIG);
434 nd_pfn->pfn_sb = NULL; 454 dev_dbg(dev, "%s: pfn: %s\n", __func__,
435 kfree(pfn_sb); 455 rc == 0 ? dev_name(pfn_dev) : "<none>");
436 dev_dbg(&ndns->dev, "%s: pfn: %s\n", __func__,
437 rc == 0 ? dev_name(dev) : "<none>");
438 if (rc < 0) { 456 if (rc < 0) {
439 __nd_detach_ndns(dev, &nd_pfn->ndns); 457 __nd_detach_ndns(pfn_dev, &nd_pfn->ndns);
440 put_device(dev); 458 put_device(pfn_dev);
441 } else 459 } else
442 __nd_device_register(&nd_pfn->dev); 460 __nd_device_register(pfn_dev);
443 461
444 return rc; 462 return rc;
445} 463}
446EXPORT_SYMBOL(nd_pfn_probe); 464EXPORT_SYMBOL(nd_pfn_probe);
465
466/*
467 * We hotplug memory at section granularity, pad the reserved area from
468 * the previous section base to the namespace base address.
469 */
470static unsigned long init_altmap_base(resource_size_t base)
471{
472 unsigned long base_pfn = PHYS_PFN(base);
473
474 return PFN_SECTION_ALIGN_DOWN(base_pfn);
475}
476
477static unsigned long init_altmap_reserve(resource_size_t base)
478{
479 unsigned long reserve = PHYS_PFN(SZ_8K);
480 unsigned long base_pfn = PHYS_PFN(base);
481
482 reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn);
483 return reserve;
484}
485
486static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
487 struct resource *res, struct vmem_altmap *altmap)
488{
489 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
490 u64 offset = le64_to_cpu(pfn_sb->dataoff);
491 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
492 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
493 struct nd_namespace_common *ndns = nd_pfn->ndns;
494 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
495 resource_size_t base = nsio->res.start + start_pad;
496 struct vmem_altmap __altmap = {
497 .base_pfn = init_altmap_base(base),
498 .reserve = init_altmap_reserve(base),
499 };
500
501 memcpy(res, &nsio->res, sizeof(*res));
502 res->start += start_pad;
503 res->end -= end_trunc;
504
505 nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
506 if (nd_pfn->mode == PFN_MODE_RAM) {
507 if (offset < SZ_8K)
508 return ERR_PTR(-EINVAL);
509 nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
510 altmap = NULL;
511 } else if (nd_pfn->mode == PFN_MODE_PMEM) {
512 nd_pfn->npfns = (resource_size(res) - offset) / PAGE_SIZE;
513 if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
514 dev_info(&nd_pfn->dev,
515 "number of pfns truncated from %lld to %ld\n",
516 le64_to_cpu(nd_pfn->pfn_sb->npfns),
517 nd_pfn->npfns);
518 memcpy(altmap, &__altmap, sizeof(*altmap));
519 altmap->free = PHYS_PFN(offset - SZ_8K);
520 altmap->alloc = 0;
521 } else
522 return ERR_PTR(-ENXIO);
523
524 return altmap;
525}
526
527static int nd_pfn_init(struct nd_pfn *nd_pfn)
528{
529 u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
530 struct nd_namespace_common *ndns = nd_pfn->ndns;
531 u32 start_pad = 0, end_trunc = 0;
532 resource_size_t start, size;
533 struct nd_namespace_io *nsio;
534 struct nd_region *nd_region;
535 struct nd_pfn_sb *pfn_sb;
536 unsigned long npfns;
537 phys_addr_t offset;
538 const char *sig;
539 u64 checksum;
540 int rc;
541
542 pfn_sb = devm_kzalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL);
543 if (!pfn_sb)
544 return -ENOMEM;
545
546 nd_pfn->pfn_sb = pfn_sb;
547 if (is_nd_dax(&nd_pfn->dev))
548 sig = DAX_SIG;
549 else
550 sig = PFN_SIG;
551 rc = nd_pfn_validate(nd_pfn, sig);
552 if (rc != -ENODEV)
553 return rc;
554
555 /* no info block, do init */;
556 nd_region = to_nd_region(nd_pfn->dev.parent);
557 if (nd_region->ro) {
558 dev_info(&nd_pfn->dev,
559 "%s is read-only, unable to init metadata\n",
560 dev_name(&nd_region->dev));
561 return -ENXIO;
562 }
563
564 memset(pfn_sb, 0, sizeof(*pfn_sb));
565
566 /*
567 * Check if pmem collides with 'System RAM' when section aligned and
568 * trim it accordingly
569 */
570 nsio = to_nd_namespace_io(&ndns->dev);
571 start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start);
572 size = resource_size(&nsio->res);
573 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
574 IORES_DESC_NONE) == REGION_MIXED) {
575 start = nsio->res.start;
576 start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
577 }
578
579 start = nsio->res.start;
580 size = PHYS_SECTION_ALIGN_UP(start + size) - start;
581 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
582 IORES_DESC_NONE) == REGION_MIXED) {
583 size = resource_size(&nsio->res);
584 end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
585 }
586
587 if (start_pad + end_trunc)
588 dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
589 dev_name(&ndns->dev), start_pad + end_trunc);
590
591 /*
592 * Note, we use 64 here for the standard size of struct page,
593 * debugging options may cause it to be larger in which case the
594 * implementation will limit the pfns advertised through
595 * ->direct_access() to those that are included in the memmap.
596 */
597 start += start_pad;
598 size = resource_size(&nsio->res);
599 npfns = (size - start_pad - end_trunc - SZ_8K) / SZ_4K;
600 if (nd_pfn->mode == PFN_MODE_PMEM) {
601 unsigned long memmap_size;
602
603 /*
604 * vmemmap_populate_hugepages() allocates the memmap array in
605 * HPAGE_SIZE chunks.
606 */
607 memmap_size = ALIGN(64 * npfns, HPAGE_SIZE);
608 offset = ALIGN(start + SZ_8K + memmap_size + dax_label_reserve,
609 nd_pfn->align) - start;
610 } else if (nd_pfn->mode == PFN_MODE_RAM)
611 offset = ALIGN(start + SZ_8K + dax_label_reserve,
612 nd_pfn->align) - start;
613 else
614 return -ENXIO;
615
616 if (offset + start_pad + end_trunc >= size) {
617 dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n",
618 dev_name(&ndns->dev));
619 return -ENXIO;
620 }
621
622 npfns = (size - offset - start_pad - end_trunc) / SZ_4K;
623 pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
624 pfn_sb->dataoff = cpu_to_le64(offset);
625 pfn_sb->npfns = cpu_to_le64(npfns);
626 memcpy(pfn_sb->signature, sig, PFN_SIG_LEN);
627 memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
628 memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
629 pfn_sb->version_major = cpu_to_le16(1);
630 pfn_sb->version_minor = cpu_to_le16(2);
631 pfn_sb->start_pad = cpu_to_le32(start_pad);
632 pfn_sb->end_trunc = cpu_to_le32(end_trunc);
633 pfn_sb->align = cpu_to_le32(nd_pfn->align);
634 checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
635 pfn_sb->checksum = cpu_to_le64(checksum);
636
637 return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
638}
639
640/*
641 * Determine the effective resource range and vmem_altmap from an nd_pfn
642 * instance.
643 */
644struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
645 struct resource *res, struct vmem_altmap *altmap)
646{
647 int rc;
648
649 if (!nd_pfn->uuid || !nd_pfn->ndns)
650 return ERR_PTR(-ENODEV);
651
652 rc = nd_pfn_init(nd_pfn);
653 if (rc)
654 return ERR_PTR(rc);
655
656 /* we need a valid pfn_sb before we can init a vmem_altmap */
657 return __nvdimm_setup_pfn(nd_pfn, res, altmap);
658}
659EXPORT_SYMBOL_GPL(nvdimm_setup_pfn);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 92f536596b24..042baec56931 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -33,10 +33,6 @@
33#include "nd.h" 33#include "nd.h"
34 34
35struct pmem_device { 35struct pmem_device {
36 struct request_queue *pmem_queue;
37 struct gendisk *pmem_disk;
38 struct nd_namespace_common *ndns;
39
40 /* One contiguous memory region per device */ 36 /* One contiguous memory region per device */
41 phys_addr_t phys_addr; 37 phys_addr_t phys_addr;
42 /* when non-zero this device is hosting a 'pfn' instance */ 38 /* when non-zero this device is hosting a 'pfn' instance */
@@ -50,23 +46,10 @@ struct pmem_device {
50 struct badblocks bb; 46 struct badblocks bb;
51}; 47};
52 48
53static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len)
54{
55 if (bb->count) {
56 sector_t first_bad;
57 int num_bad;
58
59 return !!badblocks_check(bb, sector, len / 512, &first_bad,
60 &num_bad);
61 }
62
63 return false;
64}
65
66static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, 49static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
67 unsigned int len) 50 unsigned int len)
68{ 51{
69 struct device *dev = disk_to_dev(pmem->pmem_disk); 52 struct device *dev = pmem->bb.dev;
70 sector_t sector; 53 sector_t sector;
71 long cleared; 54 long cleared;
72 55
@@ -136,8 +119,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
136 unsigned long start; 119 unsigned long start;
137 struct bio_vec bvec; 120 struct bio_vec bvec;
138 struct bvec_iter iter; 121 struct bvec_iter iter;
139 struct block_device *bdev = bio->bi_bdev; 122 struct pmem_device *pmem = q->queuedata;
140 struct pmem_device *pmem = bdev->bd_disk->private_data;
141 123
142 do_acct = nd_iostat_start(bio, &start); 124 do_acct = nd_iostat_start(bio, &start);
143 bio_for_each_segment(bvec, bio, iter) { 125 bio_for_each_segment(bvec, bio, iter) {
@@ -162,7 +144,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
162static int pmem_rw_page(struct block_device *bdev, sector_t sector, 144static int pmem_rw_page(struct block_device *bdev, sector_t sector,
163 struct page *page, int rw) 145 struct page *page, int rw)
164{ 146{
165 struct pmem_device *pmem = bdev->bd_disk->private_data; 147 struct pmem_device *pmem = bdev->bd_queue->queuedata;
166 int rc; 148 int rc;
167 149
168 rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector); 150 rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector);
@@ -184,7 +166,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
184static long pmem_direct_access(struct block_device *bdev, sector_t sector, 166static long pmem_direct_access(struct block_device *bdev, sector_t sector,
185 void __pmem **kaddr, pfn_t *pfn) 167 void __pmem **kaddr, pfn_t *pfn)
186{ 168{
187 struct pmem_device *pmem = bdev->bd_disk->private_data; 169 struct pmem_device *pmem = bdev->bd_queue->queuedata;
188 resource_size_t offset = sector * 512 + pmem->data_offset; 170 resource_size_t offset = sector * 512 + pmem->data_offset;
189 171
190 *kaddr = pmem->virt_addr + offset; 172 *kaddr = pmem->virt_addr + offset;
@@ -200,104 +182,119 @@ static const struct block_device_operations pmem_fops = {
200 .revalidate_disk = nvdimm_revalidate_disk, 182 .revalidate_disk = nvdimm_revalidate_disk,
201}; 183};
202 184
203static struct pmem_device *pmem_alloc(struct device *dev, 185static void pmem_release_queue(void *q)
204 struct resource *res, int id) 186{
187 blk_cleanup_queue(q);
188}
189
190void pmem_release_disk(void *disk)
205{ 191{
192 del_gendisk(disk);
193 put_disk(disk);
194}
195
196static int pmem_attach_disk(struct device *dev,
197 struct nd_namespace_common *ndns)
198{
199 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
200 struct vmem_altmap __altmap, *altmap = NULL;
201 struct resource *res = &nsio->res;
202 struct nd_pfn *nd_pfn = NULL;
203 int nid = dev_to_node(dev);
204 struct nd_pfn_sb *pfn_sb;
206 struct pmem_device *pmem; 205 struct pmem_device *pmem;
206 struct resource pfn_res;
207 struct request_queue *q; 207 struct request_queue *q;
208 struct gendisk *disk;
209 void *addr;
210
211 /* while nsio_rw_bytes is active, parse a pfn info block if present */
212 if (is_nd_pfn(dev)) {
213 nd_pfn = to_nd_pfn(dev);
214 altmap = nvdimm_setup_pfn(nd_pfn, &pfn_res, &__altmap);
215 if (IS_ERR(altmap))
216 return PTR_ERR(altmap);
217 }
218
219 /* we're attaching a block device, disable raw namespace access */
220 devm_nsio_disable(dev, nsio);
208 221
209 pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); 222 pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
210 if (!pmem) 223 if (!pmem)
211 return ERR_PTR(-ENOMEM); 224 return -ENOMEM;
212 225
226 dev_set_drvdata(dev, pmem);
213 pmem->phys_addr = res->start; 227 pmem->phys_addr = res->start;
214 pmem->size = resource_size(res); 228 pmem->size = resource_size(res);
215 if (!arch_has_wmb_pmem()) 229 if (!arch_has_wmb_pmem())
216 dev_warn(dev, "unable to guarantee persistence of writes\n"); 230 dev_warn(dev, "unable to guarantee persistence of writes\n");
217 231
218 if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size, 232 if (!devm_request_mem_region(dev, res->start, resource_size(res),
219 dev_name(dev))) { 233 dev_name(dev))) {
220 dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", 234 dev_warn(dev, "could not reserve region %pR\n", res);
221 &pmem->phys_addr, pmem->size); 235 return -EBUSY;
222 return ERR_PTR(-EBUSY);
223 } 236 }
224 237
225 q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); 238 q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
226 if (!q) 239 if (!q)
227 return ERR_PTR(-ENOMEM); 240 return -ENOMEM;
228 241
229 pmem->pfn_flags = PFN_DEV; 242 pmem->pfn_flags = PFN_DEV;
230 if (pmem_should_map_pages(dev)) { 243 if (is_nd_pfn(dev)) {
231 pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res, 244 addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter,
245 altmap);
246 pfn_sb = nd_pfn->pfn_sb;
247 pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
248 pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res);
249 pmem->pfn_flags |= PFN_MAP;
250 res = &pfn_res; /* for badblocks populate */
251 res->start += pmem->data_offset;
252 } else if (pmem_should_map_pages(dev)) {
253 addr = devm_memremap_pages(dev, &nsio->res,
232 &q->q_usage_counter, NULL); 254 &q->q_usage_counter, NULL);
233 pmem->pfn_flags |= PFN_MAP; 255 pmem->pfn_flags |= PFN_MAP;
234 } else 256 } else
235 pmem->virt_addr = (void __pmem *) devm_memremap(dev, 257 addr = devm_memremap(dev, pmem->phys_addr,
236 pmem->phys_addr, pmem->size, 258 pmem->size, ARCH_MEMREMAP_PMEM);
237 ARCH_MEMREMAP_PMEM);
238 259
239 if (IS_ERR(pmem->virt_addr)) { 260 /*
261 * At release time the queue must be dead before
262 * devm_memremap_pages is unwound
263 */
264 if (devm_add_action(dev, pmem_release_queue, q)) {
240 blk_cleanup_queue(q); 265 blk_cleanup_queue(q);
241 return (void __force *) pmem->virt_addr; 266 return -ENOMEM;
242 } 267 }
243 268
244 pmem->pmem_queue = q; 269 if (IS_ERR(addr))
245 return pmem; 270 return PTR_ERR(addr);
246} 271 pmem->virt_addr = (void __pmem *) addr;
247
248static void pmem_detach_disk(struct pmem_device *pmem)
249{
250 if (!pmem->pmem_disk)
251 return;
252
253 del_gendisk(pmem->pmem_disk);
254 put_disk(pmem->pmem_disk);
255 blk_cleanup_queue(pmem->pmem_queue);
256}
257 272
258static int pmem_attach_disk(struct device *dev, 273 blk_queue_make_request(q, pmem_make_request);
259 struct nd_namespace_common *ndns, struct pmem_device *pmem) 274 blk_queue_physical_block_size(q, PAGE_SIZE);
260{ 275 blk_queue_max_hw_sectors(q, UINT_MAX);
261 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 276 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
262 int nid = dev_to_node(dev); 277 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
263 struct resource bb_res; 278 q->queuedata = pmem;
264 struct gendisk *disk;
265
266 blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
267 blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE);
268 blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
269 blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
270 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
271 279
272 disk = alloc_disk_node(0, nid); 280 disk = alloc_disk_node(0, nid);
273 if (!disk) { 281 if (!disk)
274 blk_cleanup_queue(pmem->pmem_queue); 282 return -ENOMEM;
283 if (devm_add_action(dev, pmem_release_disk, disk)) {
284 put_disk(disk);
275 return -ENOMEM; 285 return -ENOMEM;
276 } 286 }
277 287
278 disk->fops = &pmem_fops; 288 disk->fops = &pmem_fops;
279 disk->private_data = pmem; 289 disk->queue = q;
280 disk->queue = pmem->pmem_queue;
281 disk->flags = GENHD_FL_EXT_DEVT; 290 disk->flags = GENHD_FL_EXT_DEVT;
282 nvdimm_namespace_disk_name(ndns, disk->disk_name); 291 nvdimm_namespace_disk_name(ndns, disk->disk_name);
283 disk->driverfs_dev = dev; 292 disk->driverfs_dev = dev;
284 set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) 293 set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
285 / 512); 294 / 512);
286 pmem->pmem_disk = disk;
287 devm_exit_badblocks(dev, &pmem->bb);
288 if (devm_init_badblocks(dev, &pmem->bb)) 295 if (devm_init_badblocks(dev, &pmem->bb))
289 return -ENOMEM; 296 return -ENOMEM;
290 bb_res.start = nsio->res.start + pmem->data_offset; 297 nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res);
291 bb_res.end = nsio->res.end;
292 if (is_nd_pfn(dev)) {
293 struct nd_pfn *nd_pfn = to_nd_pfn(dev);
294 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
295
296 bb_res.start += __le32_to_cpu(pfn_sb->start_pad);
297 bb_res.end -= __le32_to_cpu(pfn_sb->end_trunc);
298 }
299 nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb,
300 &bb_res);
301 disk->bb = &pmem->bb; 298 disk->bb = &pmem->bb;
302 add_disk(disk); 299 add_disk(disk);
303 revalidate_disk(disk); 300 revalidate_disk(disk);
@@ -305,346 +302,68 @@ static int pmem_attach_disk(struct device *dev,
305 return 0; 302 return 0;
306} 303}
307 304
308static int pmem_rw_bytes(struct nd_namespace_common *ndns,
309 resource_size_t offset, void *buf, size_t size, int rw)
310{
311 struct pmem_device *pmem = dev_get_drvdata(ndns->claim);
312
313 if (unlikely(offset + size > pmem->size)) {
314 dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
315 return -EFAULT;
316 }
317
318 if (rw == READ) {
319 unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
320
321 if (unlikely(is_bad_pmem(&pmem->bb, offset / 512, sz_align)))
322 return -EIO;
323 return memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
324 } else {
325 memcpy_to_pmem(pmem->virt_addr + offset, buf, size);
326 wmb_pmem();
327 }
328
329 return 0;
330}
331
332static int nd_pfn_init(struct nd_pfn *nd_pfn)
333{
334 struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL);
335 struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev);
336 struct nd_namespace_common *ndns = nd_pfn->ndns;
337 u32 start_pad = 0, end_trunc = 0;
338 resource_size_t start, size;
339 struct nd_namespace_io *nsio;
340 struct nd_region *nd_region;
341 unsigned long npfns;
342 phys_addr_t offset;
343 u64 checksum;
344 int rc;
345
346 if (!pfn_sb)
347 return -ENOMEM;
348
349 nd_pfn->pfn_sb = pfn_sb;
350 rc = nd_pfn_validate(nd_pfn);
351 if (rc == -ENODEV)
352 /* no info block, do init */;
353 else
354 return rc;
355
356 nd_region = to_nd_region(nd_pfn->dev.parent);
357 if (nd_region->ro) {
358 dev_info(&nd_pfn->dev,
359 "%s is read-only, unable to init metadata\n",
360 dev_name(&nd_region->dev));
361 goto err;
362 }
363
364 memset(pfn_sb, 0, sizeof(*pfn_sb));
365
366 /*
367 * Check if pmem collides with 'System RAM' when section aligned and
368 * trim it accordingly
369 */
370 nsio = to_nd_namespace_io(&ndns->dev);
371 start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start);
372 size = resource_size(&nsio->res);
373 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
374 IORES_DESC_NONE) == REGION_MIXED) {
375
376 start = nsio->res.start;
377 start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
378 }
379
380 start = nsio->res.start;
381 size = PHYS_SECTION_ALIGN_UP(start + size) - start;
382 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
383 IORES_DESC_NONE) == REGION_MIXED) {
384 size = resource_size(&nsio->res);
385 end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
386 }
387
388 if (start_pad + end_trunc)
389 dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
390 dev_name(&ndns->dev), start_pad + end_trunc);
391
392 /*
393 * Note, we use 64 here for the standard size of struct page,
394 * debugging options may cause it to be larger in which case the
395 * implementation will limit the pfns advertised through
396 * ->direct_access() to those that are included in the memmap.
397 */
398 start += start_pad;
399 npfns = (pmem->size - start_pad - end_trunc - SZ_8K) / SZ_4K;
400 if (nd_pfn->mode == PFN_MODE_PMEM) {
401 unsigned long memmap_size;
402
403 /*
404 * vmemmap_populate_hugepages() allocates the memmap array in
405 * PMD_SIZE chunks.
406 */
407 memmap_size = ALIGN(64 * npfns, PMD_SIZE);
408 offset = ALIGN(start + SZ_8K + memmap_size, nd_pfn->align)
409 - start;
410 } else if (nd_pfn->mode == PFN_MODE_RAM)
411 offset = ALIGN(start + SZ_8K, nd_pfn->align) - start;
412 else
413 goto err;
414
415 if (offset + start_pad + end_trunc >= pmem->size) {
416 dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n",
417 dev_name(&ndns->dev));
418 goto err;
419 }
420
421 npfns = (pmem->size - offset - start_pad - end_trunc) / SZ_4K;
422 pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
423 pfn_sb->dataoff = cpu_to_le64(offset);
424 pfn_sb->npfns = cpu_to_le64(npfns);
425 memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN);
426 memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
427 memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
428 pfn_sb->version_major = cpu_to_le16(1);
429 pfn_sb->version_minor = cpu_to_le16(1);
430 pfn_sb->start_pad = cpu_to_le32(start_pad);
431 pfn_sb->end_trunc = cpu_to_le32(end_trunc);
432 checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
433 pfn_sb->checksum = cpu_to_le64(checksum);
434
435 rc = nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
436 if (rc)
437 goto err;
438
439 return 0;
440 err:
441 nd_pfn->pfn_sb = NULL;
442 kfree(pfn_sb);
443 return -ENXIO;
444}
445
446static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns)
447{
448 struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
449 struct pmem_device *pmem;
450
451 /* free pmem disk */
452 pmem = dev_get_drvdata(&nd_pfn->dev);
453 pmem_detach_disk(pmem);
454
455 /* release nd_pfn resources */
456 kfree(nd_pfn->pfn_sb);
457 nd_pfn->pfn_sb = NULL;
458
459 return 0;
460}
461
462/*
463 * We hotplug memory at section granularity, pad the reserved area from
464 * the previous section base to the namespace base address.
465 */
466static unsigned long init_altmap_base(resource_size_t base)
467{
468 unsigned long base_pfn = PHYS_PFN(base);
469
470 return PFN_SECTION_ALIGN_DOWN(base_pfn);
471}
472
473static unsigned long init_altmap_reserve(resource_size_t base)
474{
475 unsigned long reserve = PHYS_PFN(SZ_8K);
476 unsigned long base_pfn = PHYS_PFN(base);
477
478 reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn);
479 return reserve;
480}
481
482static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn)
483{
484 int rc;
485 struct resource res;
486 struct request_queue *q;
487 struct pmem_device *pmem;
488 struct vmem_altmap *altmap;
489 struct device *dev = &nd_pfn->dev;
490 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
491 struct nd_namespace_common *ndns = nd_pfn->ndns;
492 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
493 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
494 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
495 resource_size_t base = nsio->res.start + start_pad;
496 struct vmem_altmap __altmap = {
497 .base_pfn = init_altmap_base(base),
498 .reserve = init_altmap_reserve(base),
499 };
500
501 pmem = dev_get_drvdata(dev);
502 pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
503 pmem->pfn_pad = start_pad + end_trunc;
504 nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
505 if (nd_pfn->mode == PFN_MODE_RAM) {
506 if (pmem->data_offset < SZ_8K)
507 return -EINVAL;
508 nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
509 altmap = NULL;
510 } else if (nd_pfn->mode == PFN_MODE_PMEM) {
511 nd_pfn->npfns = (pmem->size - pmem->pfn_pad - pmem->data_offset)
512 / PAGE_SIZE;
513 if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
514 dev_info(&nd_pfn->dev,
515 "number of pfns truncated from %lld to %ld\n",
516 le64_to_cpu(nd_pfn->pfn_sb->npfns),
517 nd_pfn->npfns);
518 altmap = & __altmap;
519 altmap->free = PHYS_PFN(pmem->data_offset - SZ_8K);
520 altmap->alloc = 0;
521 } else {
522 rc = -ENXIO;
523 goto err;
524 }
525
526 /* establish pfn range for lookup, and switch to direct map */
527 q = pmem->pmem_queue;
528 memcpy(&res, &nsio->res, sizeof(res));
529 res.start += start_pad;
530 res.end -= end_trunc;
531 devm_memunmap(dev, (void __force *) pmem->virt_addr);
532 pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &res,
533 &q->q_usage_counter, altmap);
534 pmem->pfn_flags |= PFN_MAP;
535 if (IS_ERR(pmem->virt_addr)) {
536 rc = PTR_ERR(pmem->virt_addr);
537 goto err;
538 }
539
540 /* attach pmem disk in "pfn-mode" */
541 rc = pmem_attach_disk(dev, ndns, pmem);
542 if (rc)
543 goto err;
544
545 return rc;
546 err:
547 nvdimm_namespace_detach_pfn(ndns);
548 return rc;
549
550}
551
552static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
553{
554 struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
555 int rc;
556
557 if (!nd_pfn->uuid || !nd_pfn->ndns)
558 return -ENODEV;
559
560 rc = nd_pfn_init(nd_pfn);
561 if (rc)
562 return rc;
563 /* we need a valid pfn_sb before we can init a vmem_altmap */
564 return __nvdimm_namespace_attach_pfn(nd_pfn);
565}
566
567static int nd_pmem_probe(struct device *dev) 305static int nd_pmem_probe(struct device *dev)
568{ 306{
569 struct nd_region *nd_region = to_nd_region(dev->parent);
570 struct nd_namespace_common *ndns; 307 struct nd_namespace_common *ndns;
571 struct nd_namespace_io *nsio;
572 struct pmem_device *pmem;
573 308
574 ndns = nvdimm_namespace_common_probe(dev); 309 ndns = nvdimm_namespace_common_probe(dev);
575 if (IS_ERR(ndns)) 310 if (IS_ERR(ndns))
576 return PTR_ERR(ndns); 311 return PTR_ERR(ndns);
577 312
578 nsio = to_nd_namespace_io(&ndns->dev); 313 if (devm_nsio_enable(dev, to_nd_namespace_io(&ndns->dev)))
579 pmem = pmem_alloc(dev, &nsio->res, nd_region->id); 314 return -ENXIO;
580 if (IS_ERR(pmem))
581 return PTR_ERR(pmem);
582
583 pmem->ndns = ndns;
584 dev_set_drvdata(dev, pmem);
585 ndns->rw_bytes = pmem_rw_bytes;
586 if (devm_init_badblocks(dev, &pmem->bb))
587 return -ENOMEM;
588 nvdimm_badblocks_populate(nd_region, &pmem->bb, &nsio->res);
589 315
590 if (is_nd_btt(dev)) { 316 if (is_nd_btt(dev))
591 /* btt allocates its own request_queue */
592 blk_cleanup_queue(pmem->pmem_queue);
593 pmem->pmem_queue = NULL;
594 return nvdimm_namespace_attach_btt(ndns); 317 return nvdimm_namespace_attach_btt(ndns);
595 }
596 318
597 if (is_nd_pfn(dev)) 319 if (is_nd_pfn(dev))
598 return nvdimm_namespace_attach_pfn(ndns); 320 return pmem_attach_disk(dev, ndns);
599 321
600 if (nd_btt_probe(ndns, pmem) == 0 || nd_pfn_probe(ndns, pmem) == 0) { 322 /* if we find a valid info-block we'll come back as that personality */
601 /* 323 if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0
602 * We'll come back as either btt-pmem, or pfn-pmem, so 324 || nd_dax_probe(dev, ndns) == 0)
603 * drop the queue allocation for now.
604 */
605 blk_cleanup_queue(pmem->pmem_queue);
606 return -ENXIO; 325 return -ENXIO;
607 }
608 326
609 return pmem_attach_disk(dev, ndns, pmem); 327 /* ...otherwise we're just a raw pmem device */
328 return pmem_attach_disk(dev, ndns);
610} 329}
611 330
612static int nd_pmem_remove(struct device *dev) 331static int nd_pmem_remove(struct device *dev)
613{ 332{
614 struct pmem_device *pmem = dev_get_drvdata(dev);
615
616 if (is_nd_btt(dev)) 333 if (is_nd_btt(dev))
617 nvdimm_namespace_detach_btt(pmem->ndns); 334 nvdimm_namespace_detach_btt(to_nd_btt(dev));
618 else if (is_nd_pfn(dev))
619 nvdimm_namespace_detach_pfn(pmem->ndns);
620 else
621 pmem_detach_disk(pmem);
622
623 return 0; 335 return 0;
624} 336}
625 337
626static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) 338static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
627{ 339{
628 struct pmem_device *pmem = dev_get_drvdata(dev);
629 struct nd_namespace_common *ndns = pmem->ndns;
630 struct nd_region *nd_region = to_nd_region(dev->parent); 340 struct nd_region *nd_region = to_nd_region(dev->parent);
631 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 341 struct pmem_device *pmem = dev_get_drvdata(dev);
632 struct resource res = { 342 resource_size_t offset = 0, end_trunc = 0;
633 .start = nsio->res.start + pmem->data_offset, 343 struct nd_namespace_common *ndns;
634 .end = nsio->res.end, 344 struct nd_namespace_io *nsio;
635 }; 345 struct resource res;
636 346
637 if (event != NVDIMM_REVALIDATE_POISON) 347 if (event != NVDIMM_REVALIDATE_POISON)
638 return; 348 return;
639 349
640 if (is_nd_pfn(dev)) { 350 if (is_nd_btt(dev)) {
351 struct nd_btt *nd_btt = to_nd_btt(dev);
352
353 ndns = nd_btt->ndns;
354 } else if (is_nd_pfn(dev)) {
641 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 355 struct nd_pfn *nd_pfn = to_nd_pfn(dev);
642 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 356 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
643 357
644 res.start += __le32_to_cpu(pfn_sb->start_pad); 358 ndns = nd_pfn->ndns;
645 res.end -= __le32_to_cpu(pfn_sb->end_trunc); 359 offset = pmem->data_offset + __le32_to_cpu(pfn_sb->start_pad);
646 } 360 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
361 } else
362 ndns = to_ndns(dev);
647 363
364 nsio = to_nd_namespace_io(&ndns->dev);
365 res.start = nsio->res.start + offset;
366 res.end = nsio->res.end - end_trunc;
648 nvdimm_badblocks_populate(nd_region, &pmem->bb, &res); 367 nvdimm_badblocks_populate(nd_region, &pmem->bb, &res);
649} 368}
650 369
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 4b7715e29cff..05a912359939 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -54,6 +54,7 @@ static int nd_region_probe(struct device *dev)
54 54
55 nd_region->btt_seed = nd_btt_create(nd_region); 55 nd_region->btt_seed = nd_btt_create(nd_region);
56 nd_region->pfn_seed = nd_pfn_create(nd_region); 56 nd_region->pfn_seed = nd_pfn_create(nd_region);
57 nd_region->dax_seed = nd_dax_create(nd_region);
57 if (err == 0) 58 if (err == 0)
58 return 0; 59 return 0;
59 60
@@ -86,6 +87,7 @@ static int nd_region_remove(struct device *dev)
86 nd_region->ns_seed = NULL; 87 nd_region->ns_seed = NULL;
87 nd_region->btt_seed = NULL; 88 nd_region->btt_seed = NULL;
88 nd_region->pfn_seed = NULL; 89 nd_region->pfn_seed = NULL;
90 nd_region->dax_seed = NULL;
89 dev_set_drvdata(dev, NULL); 91 dev_set_drvdata(dev, NULL);
90 nvdimm_bus_unlock(dev); 92 nvdimm_bus_unlock(dev);
91 93
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 139bf71ca549..40fcfea26fbb 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -306,6 +306,23 @@ static ssize_t pfn_seed_show(struct device *dev,
306} 306}
307static DEVICE_ATTR_RO(pfn_seed); 307static DEVICE_ATTR_RO(pfn_seed);
308 308
309static ssize_t dax_seed_show(struct device *dev,
310 struct device_attribute *attr, char *buf)
311{
312 struct nd_region *nd_region = to_nd_region(dev);
313 ssize_t rc;
314
315 nvdimm_bus_lock(dev);
316 if (nd_region->dax_seed)
317 rc = sprintf(buf, "%s\n", dev_name(nd_region->dax_seed));
318 else
319 rc = sprintf(buf, "\n");
320 nvdimm_bus_unlock(dev);
321
322 return rc;
323}
324static DEVICE_ATTR_RO(dax_seed);
325
309static ssize_t read_only_show(struct device *dev, 326static ssize_t read_only_show(struct device *dev,
310 struct device_attribute *attr, char *buf) 327 struct device_attribute *attr, char *buf)
311{ 328{
@@ -335,6 +352,7 @@ static struct attribute *nd_region_attributes[] = {
335 &dev_attr_mappings.attr, 352 &dev_attr_mappings.attr,
336 &dev_attr_btt_seed.attr, 353 &dev_attr_btt_seed.attr,
337 &dev_attr_pfn_seed.attr, 354 &dev_attr_pfn_seed.attr,
355 &dev_attr_dax_seed.attr,
338 &dev_attr_read_only.attr, 356 &dev_attr_read_only.attr,
339 &dev_attr_set_cookie.attr, 357 &dev_attr_set_cookie.attr,
340 &dev_attr_available_size.attr, 358 &dev_attr_available_size.attr,
@@ -353,6 +371,9 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
353 if (!is_nd_pmem(dev) && a == &dev_attr_pfn_seed.attr) 371 if (!is_nd_pmem(dev) && a == &dev_attr_pfn_seed.attr)
354 return 0; 372 return 0;
355 373
374 if (!is_nd_pmem(dev) && a == &dev_attr_dax_seed.attr)
375 return 0;
376
356 if (a != &dev_attr_set_cookie.attr 377 if (a != &dev_attr_set_cookie.attr
357 && a != &dev_attr_available_size.attr) 378 && a != &dev_attr_available_size.attr)
358 return a->mode; 379 return a->mode;
@@ -441,6 +462,13 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
441 nd_region_create_pfn_seed(nd_region); 462 nd_region_create_pfn_seed(nd_region);
442 nvdimm_bus_unlock(dev); 463 nvdimm_bus_unlock(dev);
443 } 464 }
465 if (is_nd_dax(dev) && probe) {
466 nd_region = to_nd_region(dev->parent);
467 nvdimm_bus_lock(dev);
468 if (nd_region->dax_seed == dev)
469 nd_region_create_dax_seed(nd_region);
470 nvdimm_bus_unlock(dev);
471 }
444} 472}
445 473
446void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev) 474void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev)
@@ -718,6 +746,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
718 ida_init(&nd_region->ns_ida); 746 ida_init(&nd_region->ns_ida);
719 ida_init(&nd_region->btt_ida); 747 ida_init(&nd_region->btt_ida);
720 ida_init(&nd_region->pfn_ida); 748 ida_init(&nd_region->pfn_ida);
749 ida_init(&nd_region->dax_ida);
721 dev = &nd_region->dev; 750 dev = &nd_region->dev;
722 dev_set_name(dev, "region%d", nd_region->id); 751 dev_set_name(dev, "region%d", nd_region->id);
723 dev->parent = &nvdimm_bus->dev; 752 dev->parent = &nvdimm_bus->dev;
@@ -764,3 +793,8 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
764 __func__); 793 __func__);
765} 794}
766EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); 795EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
796
797void __exit nd_region_devs_exit(void)
798{
799 ida_destroy(&region_ida);
800}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index a063d4d8ac39..1089dbf25925 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -29,6 +29,7 @@
29#include <linux/log2.h> 29#include <linux/log2.h>
30#include <linux/cleancache.h> 30#include <linux/cleancache.h>
31#include <linux/dax.h> 31#include <linux/dax.h>
32#include <linux/badblocks.h>
32#include <asm/uaccess.h> 33#include <asm/uaccess.h>
33#include "internal.h" 34#include "internal.h"
34 35
@@ -1159,6 +1160,33 @@ void bd_set_size(struct block_device *bdev, loff_t size)
1159} 1160}
1160EXPORT_SYMBOL(bd_set_size); 1161EXPORT_SYMBOL(bd_set_size);
1161 1162
1163static bool blkdev_dax_capable(struct block_device *bdev)
1164{
1165 struct gendisk *disk = bdev->bd_disk;
1166
1167 if (!disk->fops->direct_access || !IS_ENABLED(CONFIG_FS_DAX))
1168 return false;
1169
1170 /*
1171 * If the partition is not aligned on a page boundary, we can't
1172 * do dax I/O to it.
1173 */
1174 if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
1175 || (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
1176 return false;
1177
1178 /*
1179 * If the device has known bad blocks, force all I/O through the
1180 * driver / page cache.
1181 *
1182 * TODO: support finer grained dax error handling
1183 */
1184 if (disk->bb && disk->bb->count)
1185 return false;
1186
1187 return true;
1188}
1189
1162static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); 1190static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1163 1191
1164/* 1192/*
@@ -1720,79 +1748,13 @@ static const struct address_space_operations def_blk_aops = {
1720 .is_dirty_writeback = buffer_check_dirty_writeback, 1748 .is_dirty_writeback = buffer_check_dirty_writeback,
1721}; 1749};
1722 1750
1723#ifdef CONFIG_FS_DAX
1724/*
1725 * In the raw block case we do not need to contend with truncation nor
1726 * unwritten file extents. Without those concerns there is no need for
1727 * additional locking beyond the mmap_sem context that these routines
1728 * are already executing under.
1729 *
1730 * Note, there is no protection if the block device is dynamically
1731 * resized (partition grow/shrink) during a fault. A stable block device
1732 * size is already not enforced in the blkdev_direct_IO path.
1733 *
1734 * For DAX, it is the responsibility of the block device driver to
1735 * ensure the whole-disk device size is stable while requests are in
1736 * flight.
1737 *
1738 * Finally, unlike the filemap_page_mkwrite() case there is no
1739 * filesystem superblock to sync against freezing. We still include a
1740 * pfn_mkwrite callback for dax drivers to receive write fault
1741 * notifications.
1742 */
1743static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1744{
1745 return __dax_fault(vma, vmf, blkdev_get_block, NULL);
1746}
1747
1748static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma,
1749 struct vm_fault *vmf)
1750{
1751 return dax_pfn_mkwrite(vma, vmf);
1752}
1753
1754static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
1755 pmd_t *pmd, unsigned int flags)
1756{
1757 return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
1758}
1759
1760static const struct vm_operations_struct blkdev_dax_vm_ops = {
1761 .fault = blkdev_dax_fault,
1762 .pmd_fault = blkdev_dax_pmd_fault,
1763 .pfn_mkwrite = blkdev_dax_pfn_mkwrite,
1764};
1765
1766static const struct vm_operations_struct blkdev_default_vm_ops = {
1767 .fault = filemap_fault,
1768 .map_pages = filemap_map_pages,
1769};
1770
1771static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
1772{
1773 struct inode *bd_inode = bdev_file_inode(file);
1774
1775 file_accessed(file);
1776 if (IS_DAX(bd_inode)) {
1777 vma->vm_ops = &blkdev_dax_vm_ops;
1778 vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
1779 } else {
1780 vma->vm_ops = &blkdev_default_vm_ops;
1781 }
1782
1783 return 0;
1784}
1785#else
1786#define blkdev_mmap generic_file_mmap
1787#endif
1788
1789const struct file_operations def_blk_fops = { 1751const struct file_operations def_blk_fops = {
1790 .open = blkdev_open, 1752 .open = blkdev_open,
1791 .release = blkdev_close, 1753 .release = blkdev_close,
1792 .llseek = block_llseek, 1754 .llseek = block_llseek,
1793 .read_iter = blkdev_read_iter, 1755 .read_iter = blkdev_read_iter,
1794 .write_iter = blkdev_write_iter, 1756 .write_iter = blkdev_write_iter,
1795 .mmap = blkdev_mmap, 1757 .mmap = generic_file_mmap,
1796 .fsync = blkdev_fsync, 1758 .fsync = blkdev_fsync,
1797 .unlocked_ioctl = block_ioctl, 1759 .unlocked_ioctl = block_ioctl,
1798#ifdef CONFIG_COMPAT 1760#ifdef CONFIG_COMPAT
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 4d40e9b5d938..788c6c35291a 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -61,12 +61,12 @@ bool acpi_ata_match(acpi_handle handle);
61bool acpi_bay_match(acpi_handle handle); 61bool acpi_bay_match(acpi_handle handle);
62bool acpi_dock_match(acpi_handle handle); 62bool acpi_dock_match(acpi_handle handle);
63 63
64bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, int rev, u64 funcs); 64bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs);
65union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, 65union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
66 int rev, int func, union acpi_object *argv4); 66 u64 rev, u64 func, union acpi_object *argv4);
67 67
68static inline union acpi_object * 68static inline union acpi_object *
69acpi_evaluate_dsm_typed(acpi_handle handle, const u8 *uuid, int rev, int func, 69acpi_evaluate_dsm_typed(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
70 union acpi_object *argv4, acpi_object_type type) 70 union acpi_object *argv4, acpi_object_type type)
71{ 71{
72 union acpi_object *obj; 72 union acpi_object *obj;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b8b3c72c2aae..5f61431d8673 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2352,14 +2352,6 @@ extern struct super_block *freeze_bdev(struct block_device *);
2352extern void emergency_thaw_all(void); 2352extern void emergency_thaw_all(void);
2353extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); 2353extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
2354extern int fsync_bdev(struct block_device *); 2354extern int fsync_bdev(struct block_device *);
2355#ifdef CONFIG_FS_DAX
2356extern bool blkdev_dax_capable(struct block_device *bdev);
2357#else
2358static inline bool blkdev_dax_capable(struct block_device *bdev)
2359{
2360 return false;
2361}
2362#endif
2363 2355
2364extern struct super_block *blockdev_superblock; 2356extern struct super_block *blockdev_superblock;
2365 2357
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 833867b9ddc2..0c3c30cbbea5 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -27,7 +27,7 @@ enum {
27 /* need to set a limit somewhere, but yes, this is likely overkill */ 27 /* need to set a limit somewhere, but yes, this is likely overkill */
28 ND_IOCTL_MAX_BUFLEN = SZ_4M, 28 ND_IOCTL_MAX_BUFLEN = SZ_4M,
29 ND_CMD_MAX_ELEM = 5, 29 ND_CMD_MAX_ELEM = 5,
30 ND_CMD_MAX_ENVELOPE = 16, 30 ND_CMD_MAX_ENVELOPE = 256,
31 ND_MAX_MAPPINGS = 32, 31 ND_MAX_MAPPINGS = 32,
32 32
33 /* region flag indicating to direct-map persistent memory by default */ 33 /* region flag indicating to direct-map persistent memory by default */
@@ -68,7 +68,7 @@ struct nd_mapping {
68 68
69struct nvdimm_bus_descriptor { 69struct nvdimm_bus_descriptor {
70 const struct attribute_group **attr_groups; 70 const struct attribute_group **attr_groups;
71 unsigned long dsm_mask; 71 unsigned long cmd_mask;
72 char *provider_name; 72 char *provider_name;
73 ndctl_fn ndctl; 73 ndctl_fn ndctl;
74 int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); 74 int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
@@ -130,10 +130,11 @@ struct nd_region *to_nd_region(struct device *dev);
130struct nd_blk_region *to_nd_blk_region(struct device *dev); 130struct nd_blk_region *to_nd_blk_region(struct device *dev);
131struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); 131struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
132const char *nvdimm_name(struct nvdimm *nvdimm); 132const char *nvdimm_name(struct nvdimm *nvdimm);
133unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm);
133void *nvdimm_provider_data(struct nvdimm *nvdimm); 134void *nvdimm_provider_data(struct nvdimm *nvdimm);
134struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, 135struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
135 const struct attribute_group **groups, unsigned long flags, 136 const struct attribute_group **groups, unsigned long flags,
136 unsigned long *dsm_mask); 137 unsigned long cmd_mask);
137const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); 138const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
138const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); 139const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
139u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, 140u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
diff --git a/include/linux/nd.h b/include/linux/nd.h
index 5489ab756d1a..aee2761d294c 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -15,6 +15,7 @@
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/ndctl.h> 16#include <linux/ndctl.h>
17#include <linux/device.h> 17#include <linux/device.h>
18#include <linux/badblocks.h>
18 19
19enum nvdimm_event { 20enum nvdimm_event {
20 NVDIMM_REVALIDATE_POISON, 21 NVDIMM_REVALIDATE_POISON,
@@ -55,13 +56,19 @@ static inline struct nd_namespace_common *to_ndns(struct device *dev)
55} 56}
56 57
57/** 58/**
58 * struct nd_namespace_io - infrastructure for loading an nd_pmem instance 59 * struct nd_namespace_io - device representation of a persistent memory range
59 * @dev: namespace device created by the nd region driver 60 * @dev: namespace device created by the nd region driver
60 * @res: struct resource conversion of a NFIT SPA table 61 * @res: struct resource conversion of a NFIT SPA table
62 * @size: cached resource_size(@res) for fast path size checks
63 * @addr: virtual address to access the namespace range
64 * @bb: badblocks list for the namespace range
61 */ 65 */
62struct nd_namespace_io { 66struct nd_namespace_io {
63 struct nd_namespace_common common; 67 struct nd_namespace_common common;
64 struct resource res; 68 struct resource res;
69 resource_size_t size;
70 void __pmem *addr;
71 struct badblocks bb;
65}; 72};
66 73
67/** 74/**
@@ -82,6 +89,7 @@ struct nd_namespace_pmem {
82 * @uuid: namespace name supplied in the dimm label 89 * @uuid: namespace name supplied in the dimm label
83 * @id: ida allocated id 90 * @id: ida allocated id
84 * @lbasize: blk namespaces have a native sector size when btt not present 91 * @lbasize: blk namespaces have a native sector size when btt not present
92 * @size: sum of all the resource ranges allocated to this namespace
85 * @num_resources: number of dpa extents to claim 93 * @num_resources: number of dpa extents to claim
86 * @res: discontiguous dpa extents for given dimm 94 * @res: discontiguous dpa extents for given dimm
87 */ 95 */
@@ -91,6 +99,7 @@ struct nd_namespace_blk {
91 u8 *uuid; 99 u8 *uuid;
92 int id; 100 int id;
93 unsigned long lbasize; 101 unsigned long lbasize;
102 resource_size_t size;
94 int num_resources; 103 int num_resources;
95 struct resource **res; 104 struct resource **res;
96}; 105};
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index e21fe04acc12..3b00f7c8943f 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -222,7 +222,6 @@ struct fsxattr {
222#define BLKSECDISCARD _IO(0x12,125) 222#define BLKSECDISCARD _IO(0x12,125)
223#define BLKROTATIONAL _IO(0x12,126) 223#define BLKROTATIONAL _IO(0x12,126)
224#define BLKZEROOUT _IO(0x12,127) 224#define BLKZEROOUT _IO(0x12,127)
225#define BLKDAXGET _IO(0x12,129)
226 225
227#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ 226#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
228#define FIBMAP _IO(0x00,1) /* bmap access */ 227#define FIBMAP _IO(0x00,1) /* bmap access */
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 7cc28ab05b87..309915f74492 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2015, Intel Corporation. 2 * Copyright (c) 2014-2016, Intel Corporation.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU Lesser General Public License, 5 * under the terms and conditions of the GNU Lesser General Public License,
@@ -20,11 +20,45 @@ struct nd_cmd_smart {
20 __u8 data[128]; 20 __u8 data[128];
21} __packed; 21} __packed;
22 22
23#define ND_SMART_HEALTH_VALID (1 << 0)
24#define ND_SMART_TEMP_VALID (1 << 1)
25#define ND_SMART_SPARES_VALID (1 << 2)
26#define ND_SMART_ALARM_VALID (1 << 3)
27#define ND_SMART_USED_VALID (1 << 4)
28#define ND_SMART_SHUTDOWN_VALID (1 << 5)
29#define ND_SMART_VENDOR_VALID (1 << 6)
30#define ND_SMART_TEMP_TRIP (1 << 0)
31#define ND_SMART_SPARE_TRIP (1 << 1)
32#define ND_SMART_NON_CRITICAL_HEALTH (1 << 0)
33#define ND_SMART_CRITICAL_HEALTH (1 << 1)
34#define ND_SMART_FATAL_HEALTH (1 << 2)
35
36struct nd_smart_payload {
37 __u32 flags;
38 __u8 reserved0[4];
39 __u8 health;
40 __u16 temperature;
41 __u8 spares;
42 __u8 alarm_flags;
43 __u8 life_used;
44 __u8 shutdown_state;
45 __u8 reserved1;
46 __u32 vendor_size;
47 __u8 vendor_data[108];
48} __packed;
49
23struct nd_cmd_smart_threshold { 50struct nd_cmd_smart_threshold {
24 __u32 status; 51 __u32 status;
25 __u8 data[8]; 52 __u8 data[8];
26} __packed; 53} __packed;
27 54
55struct nd_smart_threshold_payload {
56 __u16 alarm_control;
57 __u16 temperature;
58 __u8 spares;
59 __u8 reserved[3];
60} __packed;
61
28struct nd_cmd_dimm_flags { 62struct nd_cmd_dimm_flags {
29 __u32 status; 63 __u32 status;
30 __u32 flags; 64 __u32 flags;
@@ -125,6 +159,7 @@ enum {
125 ND_CMD_VENDOR_EFFECT_LOG_SIZE = 7, 159 ND_CMD_VENDOR_EFFECT_LOG_SIZE = 7,
126 ND_CMD_VENDOR_EFFECT_LOG = 8, 160 ND_CMD_VENDOR_EFFECT_LOG = 8,
127 ND_CMD_VENDOR = 9, 161 ND_CMD_VENDOR = 9,
162 ND_CMD_CALL = 10,
128}; 163};
129 164
130enum { 165enum {
@@ -158,6 +193,7 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
158 [ND_CMD_VENDOR_EFFECT_LOG_SIZE] = "effect_size", 193 [ND_CMD_VENDOR_EFFECT_LOG_SIZE] = "effect_size",
159 [ND_CMD_VENDOR_EFFECT_LOG] = "effect_log", 194 [ND_CMD_VENDOR_EFFECT_LOG] = "effect_log",
160 [ND_CMD_VENDOR] = "vendor", 195 [ND_CMD_VENDOR] = "vendor",
196 [ND_CMD_CALL] = "cmd_call",
161 }; 197 };
162 198
163 if (cmd < ARRAY_SIZE(names) && names[cmd]) 199 if (cmd < ARRAY_SIZE(names) && names[cmd])
@@ -206,6 +242,7 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
206#define ND_DEVICE_NAMESPACE_IO 4 /* legacy persistent memory */ 242#define ND_DEVICE_NAMESPACE_IO 4 /* legacy persistent memory */
207#define ND_DEVICE_NAMESPACE_PMEM 5 /* PMEM namespace (may alias with BLK) */ 243#define ND_DEVICE_NAMESPACE_PMEM 5 /* PMEM namespace (may alias with BLK) */
208#define ND_DEVICE_NAMESPACE_BLK 6 /* BLK namespace (may alias with PMEM) */ 244#define ND_DEVICE_NAMESPACE_BLK 6 /* BLK namespace (may alias with PMEM) */
245#define ND_DEVICE_DAX_PMEM 7 /* Device DAX interface to pmem */
209 246
210enum nd_driver_flags { 247enum nd_driver_flags {
211 ND_DRIVER_DIMM = 1 << ND_DEVICE_DIMM, 248 ND_DRIVER_DIMM = 1 << ND_DEVICE_DIMM,
@@ -214,6 +251,7 @@ enum nd_driver_flags {
214 ND_DRIVER_NAMESPACE_IO = 1 << ND_DEVICE_NAMESPACE_IO, 251 ND_DRIVER_NAMESPACE_IO = 1 << ND_DEVICE_NAMESPACE_IO,
215 ND_DRIVER_NAMESPACE_PMEM = 1 << ND_DEVICE_NAMESPACE_PMEM, 252 ND_DRIVER_NAMESPACE_PMEM = 1 << ND_DEVICE_NAMESPACE_PMEM,
216 ND_DRIVER_NAMESPACE_BLK = 1 << ND_DEVICE_NAMESPACE_BLK, 253 ND_DRIVER_NAMESPACE_BLK = 1 << ND_DEVICE_NAMESPACE_BLK,
254 ND_DRIVER_DAX_PMEM = 1 << ND_DEVICE_DAX_PMEM,
217}; 255};
218 256
219enum { 257enum {
@@ -224,4 +262,44 @@ enum ars_masks {
224 ARS_STATUS_MASK = 0x0000FFFF, 262 ARS_STATUS_MASK = 0x0000FFFF,
225 ARS_EXT_STATUS_SHIFT = 16, 263 ARS_EXT_STATUS_SHIFT = 16,
226}; 264};
265
266/*
267 * struct nd_cmd_pkg
268 *
269 * is a wrapper to a quasi pass thru interface for invoking firmware
270 * associated with nvdimms.
271 *
272 * INPUT PARAMETERS
273 *
274 * nd_family corresponds to the firmware (e.g. DSM) interface.
275 *
276 * nd_command are the function index advertised by the firmware.
277 *
278 * nd_size_in is the size of the input parameters being passed to firmware
279 *
280 * OUTPUT PARAMETERS
281 *
282 * nd_fw_size is the size of the data firmware wants to return for
283 * the call. If nd_fw_size is greater than size of nd_size_out, only
284 * the first nd_size_out bytes are returned.
285 */
286
287struct nd_cmd_pkg {
288 __u64 nd_family; /* family of commands */
289 __u64 nd_command;
290 __u32 nd_size_in; /* INPUT: size of input args */
291 __u32 nd_size_out; /* INPUT: size of payload */
292 __u32 nd_reserved2[9]; /* reserved must be zero */
293 __u32 nd_fw_size; /* OUTPUT: size fw wants to return */
294 unsigned char nd_payload[]; /* Contents of call */
295};
296
297/* These NVDIMM families represent pre-standardization command sets */
298#define NVDIMM_FAMILY_INTEL 0
299#define NVDIMM_FAMILY_HPE1 1
300#define NVDIMM_FAMILY_HPE2 2
301
302#define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\
303 struct nd_cmd_pkg)
304
227#endif /* __NDCTL_H__ */ 305#endif /* __NDCTL_H__ */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 41ef7547e822..9ed58530f695 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1013,6 +1013,7 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
1013 insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write); 1013 insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write);
1014 return VM_FAULT_NOPAGE; 1014 return VM_FAULT_NOPAGE;
1015} 1015}
1016EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
1016 1017
1017static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, 1018static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
1018 pmd_t *pmd) 1019 pmd_t *pmd)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 949d80609a32..d26162e81fea 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -627,6 +627,7 @@ pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
627{ 627{
628 return vma_hugecache_offset(hstate_vma(vma), vma, address); 628 return vma_hugecache_offset(hstate_vma(vma), vma, address);
629} 629}
630EXPORT_SYMBOL_GPL(linear_hugepage_index);
630 631
631/* 632/*
632 * Return the size of the pages allocated when backing a VMA. In the majority 633 * Return the size of the pages allocated when backing a VMA. In the majority
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index a34bfd0c8928..785985677159 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -7,6 +7,7 @@ ldflags-y += --wrap=ioremap_nocache
7ldflags-y += --wrap=iounmap 7ldflags-y += --wrap=iounmap
8ldflags-y += --wrap=memunmap 8ldflags-y += --wrap=memunmap
9ldflags-y += --wrap=__devm_request_region 9ldflags-y += --wrap=__devm_request_region
10ldflags-y += --wrap=__devm_release_region
10ldflags-y += --wrap=__request_region 11ldflags-y += --wrap=__request_region
11ldflags-y += --wrap=__release_region 12ldflags-y += --wrap=__release_region
12ldflags-y += --wrap=devm_memremap_pages 13ldflags-y += --wrap=devm_memremap_pages
@@ -15,6 +16,7 @@ ldflags-y += --wrap=phys_to_pfn_t
15DRIVERS := ../../../drivers 16DRIVERS := ../../../drivers
16NVDIMM_SRC := $(DRIVERS)/nvdimm 17NVDIMM_SRC := $(DRIVERS)/nvdimm
17ACPI_SRC := $(DRIVERS)/acpi 18ACPI_SRC := $(DRIVERS)/acpi
19DAX_SRC := $(DRIVERS)/dax
18 20
19obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o 21obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
20obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o 22obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
@@ -22,6 +24,8 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o
22obj-$(CONFIG_ND_BLK) += nd_blk.o 24obj-$(CONFIG_ND_BLK) += nd_blk.o
23obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o 25obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
24obj-$(CONFIG_ACPI_NFIT) += nfit.o 26obj-$(CONFIG_ACPI_NFIT) += nfit.o
27obj-$(CONFIG_DEV_DAX) += dax.o
28obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
25 29
26nfit-y := $(ACPI_SRC)/nfit.o 30nfit-y := $(ACPI_SRC)/nfit.o
27nfit-y += config_check.o 31nfit-y += config_check.o
@@ -38,6 +42,12 @@ nd_blk-y += config_check.o
38nd_e820-y := $(NVDIMM_SRC)/e820.o 42nd_e820-y := $(NVDIMM_SRC)/e820.o
39nd_e820-y += config_check.o 43nd_e820-y += config_check.o
40 44
45dax-y := $(DAX_SRC)/dax.o
46dax-y += config_check.o
47
48dax_pmem-y := $(DAX_SRC)/pmem.o
49dax_pmem-y += config_check.o
50
41libnvdimm-y := $(NVDIMM_SRC)/core.o 51libnvdimm-y := $(NVDIMM_SRC)/core.o
42libnvdimm-y += $(NVDIMM_SRC)/bus.o 52libnvdimm-y += $(NVDIMM_SRC)/bus.o
43libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o 53libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o
@@ -49,6 +59,7 @@ libnvdimm-y += $(NVDIMM_SRC)/label.o
49libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o 59libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
50libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o 60libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
51libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o 61libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
62libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o
52libnvdimm-y += config_check.o 63libnvdimm-y += config_check.o
53 64
54obj-m += test/ 65obj-m += test/
diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c
index f2c7615554eb..adf18bfeca00 100644
--- a/tools/testing/nvdimm/config_check.c
+++ b/tools/testing/nvdimm/config_check.c
@@ -12,4 +12,6 @@ void check(void)
12 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); 12 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
13 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); 13 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
14 BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); 14 BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
15 BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));
16 BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX_PMEM));
15} 17}
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 0c1a7e65bb81..c842095f2801 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -239,13 +239,11 @@ struct resource *__wrap___devm_request_region(struct device *dev,
239} 239}
240EXPORT_SYMBOL(__wrap___devm_request_region); 240EXPORT_SYMBOL(__wrap___devm_request_region);
241 241
242void __wrap___release_region(struct resource *parent, resource_size_t start, 242static bool nfit_test_release_region(struct resource *parent,
243 resource_size_t n) 243 resource_size_t start, resource_size_t n)
244{ 244{
245 struct nfit_test_resource *nfit_res;
246
247 if (parent == &iomem_resource) { 245 if (parent == &iomem_resource) {
248 nfit_res = get_nfit_res(start); 246 struct nfit_test_resource *nfit_res = get_nfit_res(start);
249 if (nfit_res) { 247 if (nfit_res) {
250 struct resource *res = nfit_res->res + 1; 248 struct resource *res = nfit_res->res + 1;
251 249
@@ -254,11 +252,26 @@ void __wrap___release_region(struct resource *parent, resource_size_t start,
254 __func__, start, n, res); 252 __func__, start, n, res);
255 else 253 else
256 memset(res, 0, sizeof(*res)); 254 memset(res, 0, sizeof(*res));
257 return; 255 return true;
258 } 256 }
259 } 257 }
260 __release_region(parent, start, n); 258 return false;
259}
260
261void __wrap___release_region(struct resource *parent, resource_size_t start,
262 resource_size_t n)
263{
264 if (!nfit_test_release_region(parent, start, n))
265 __release_region(parent, start, n);
261} 266}
262EXPORT_SYMBOL(__wrap___release_region); 267EXPORT_SYMBOL(__wrap___release_region);
263 268
269void __wrap___devm_release_region(struct device *dev, struct resource *parent,
270 resource_size_t start, resource_size_t n)
271{
272 if (!nfit_test_release_region(parent, start, n))
273 __devm_release_region(dev, parent, start, n);
274}
275EXPORT_SYMBOL(__wrap___devm_release_region);
276
264MODULE_LICENSE("GPL v2"); 277MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 3187322eeed7..c919866853a0 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -330,12 +330,49 @@ static int nfit_test_cmd_clear_error(struct nd_cmd_clear_error *clear_err,
330 return 0; 330 return 0;
331} 331}
332 332
333static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len)
334{
335 static const struct nd_smart_payload smart_data = {
336 .flags = ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID
337 | ND_SMART_SPARES_VALID | ND_SMART_ALARM_VALID
338 | ND_SMART_USED_VALID | ND_SMART_SHUTDOWN_VALID,
339 .health = ND_SMART_NON_CRITICAL_HEALTH,
340 .temperature = 23 * 16,
341 .spares = 75,
342 .alarm_flags = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
343 .life_used = 5,
344 .shutdown_state = 0,
345 .vendor_size = 0,
346 };
347
348 if (buf_len < sizeof(*smart))
349 return -EINVAL;
350 memcpy(smart->data, &smart_data, sizeof(smart_data));
351 return 0;
352}
353
354static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t,
355 unsigned int buf_len)
356{
357 static const struct nd_smart_threshold_payload smart_t_data = {
358 .alarm_control = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
359 .temperature = 40 * 16,
360 .spares = 5,
361 };
362
363 if (buf_len < sizeof(*smart_t))
364 return -EINVAL;
365 memcpy(smart_t->data, &smart_t_data, sizeof(smart_t_data));
366 return 0;
367}
368
333static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, 369static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
334 struct nvdimm *nvdimm, unsigned int cmd, void *buf, 370 struct nvdimm *nvdimm, unsigned int cmd, void *buf,
335 unsigned int buf_len, int *cmd_rc) 371 unsigned int buf_len, int *cmd_rc)
336{ 372{
337 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); 373 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
338 struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc); 374 struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
375 unsigned int func = cmd;
339 int i, rc = 0, __cmd_rc; 376 int i, rc = 0, __cmd_rc;
340 377
341 if (!cmd_rc) 378 if (!cmd_rc)
@@ -344,8 +381,23 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
344 381
345 if (nvdimm) { 382 if (nvdimm) {
346 struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); 383 struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
384 unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
347 385
348 if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask)) 386 if (!nfit_mem)
387 return -ENOTTY;
388
389 if (cmd == ND_CMD_CALL) {
390 struct nd_cmd_pkg *call_pkg = buf;
391
392 buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
393 buf = (void *) call_pkg->nd_payload;
394 func = call_pkg->nd_command;
395 if (call_pkg->nd_family != nfit_mem->family)
396 return -ENOTTY;
397 }
398
399 if (!test_bit(cmd, &cmd_mask)
400 || !test_bit(func, &nfit_mem->dsm_mask))
349 return -ENOTTY; 401 return -ENOTTY;
350 402
351 /* lookup label space for the given dimm */ 403 /* lookup label space for the given dimm */
@@ -356,7 +408,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
356 if (i >= ARRAY_SIZE(handle)) 408 if (i >= ARRAY_SIZE(handle))
357 return -ENXIO; 409 return -ENXIO;
358 410
359 switch (cmd) { 411 switch (func) {
360 case ND_CMD_GET_CONFIG_SIZE: 412 case ND_CMD_GET_CONFIG_SIZE:
361 rc = nfit_test_cmd_get_config_size(buf, buf_len); 413 rc = nfit_test_cmd_get_config_size(buf, buf_len);
362 break; 414 break;
@@ -368,16 +420,22 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
368 rc = nfit_test_cmd_set_config_data(buf, buf_len, 420 rc = nfit_test_cmd_set_config_data(buf, buf_len,
369 t->label[i]); 421 t->label[i]);
370 break; 422 break;
423 case ND_CMD_SMART:
424 rc = nfit_test_cmd_smart(buf, buf_len);
425 break;
426 case ND_CMD_SMART_THRESHOLD:
427 rc = nfit_test_cmd_smart_threshold(buf, buf_len);
428 break;
371 default: 429 default:
372 return -ENOTTY; 430 return -ENOTTY;
373 } 431 }
374 } else { 432 } else {
375 struct ars_state *ars_state = &t->ars_state; 433 struct ars_state *ars_state = &t->ars_state;
376 434
377 if (!nd_desc || !test_bit(cmd, &nd_desc->dsm_mask)) 435 if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask))
378 return -ENOTTY; 436 return -ENOTTY;
379 437
380 switch (cmd) { 438 switch (func) {
381 case ND_CMD_ARS_CAP: 439 case ND_CMD_ARS_CAP:
382 rc = nfit_test_cmd_ars_cap(buf, buf_len); 440 rc = nfit_test_cmd_ars_cap(buf, buf_len);
383 break; 441 break;
@@ -1251,13 +1309,15 @@ static void nfit_test0_setup(struct nfit_test *t)
1251 post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE); 1309 post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE);
1252 1310
1253 acpi_desc = &t->acpi_desc; 1311 acpi_desc = &t->acpi_desc;
1254 set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); 1312 set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
1255 set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); 1313 set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
1256 set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); 1314 set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
1257 set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); 1315 set_bit(ND_CMD_SMART, &acpi_desc->dimm_cmd_force_en);
1258 set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); 1316 set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
1259 set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); 1317 set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
1260 set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); 1318 set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
1319 set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
1320 set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
1261} 1321}
1262 1322
1263static void nfit_test1_setup(struct nfit_test *t) 1323static void nfit_test1_setup(struct nfit_test *t)
@@ -1315,10 +1375,10 @@ static void nfit_test1_setup(struct nfit_test *t)
1315 post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE); 1375 post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE);
1316 1376
1317 acpi_desc = &t->acpi_desc; 1377 acpi_desc = &t->acpi_desc;
1318 set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); 1378 set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
1319 set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); 1379 set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
1320 set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); 1380 set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
1321 set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); 1381 set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
1322} 1382}
1323 1383
1324static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, 1384static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,