diff options
63 files changed, 1807 insertions, 944 deletions
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 7e3a2ebba29b..33244e3d9375 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig | |||
@@ -284,6 +284,7 @@ config CPM2 | |||
284 | config AXON_RAM | 284 | config AXON_RAM |
285 | tristate "Axon DDR2 memory device driver" | 285 | tristate "Axon DDR2 memory device driver" |
286 | depends on PPC_IBM_CELL_BLADE && BLOCK | 286 | depends on PPC_IBM_CELL_BLADE && BLOCK |
287 | select DAX | ||
287 | default m | 288 | default m |
288 | help | 289 | help |
289 | It registers one block device per Axon's DDR2 memory bank found | 290 | It registers one block device per Axon's DDR2 memory bank found |
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index f523ac883150..a7fe5fee744f 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c | |||
@@ -25,6 +25,7 @@ | |||
25 | 25 | ||
26 | #include <linux/bio.h> | 26 | #include <linux/bio.h> |
27 | #include <linux/blkdev.h> | 27 | #include <linux/blkdev.h> |
28 | #include <linux/dax.h> | ||
28 | #include <linux/device.h> | 29 | #include <linux/device.h> |
29 | #include <linux/errno.h> | 30 | #include <linux/errno.h> |
30 | #include <linux/fs.h> | 31 | #include <linux/fs.h> |
@@ -62,6 +63,7 @@ static int azfs_major, azfs_minor; | |||
62 | struct axon_ram_bank { | 63 | struct axon_ram_bank { |
63 | struct platform_device *device; | 64 | struct platform_device *device; |
64 | struct gendisk *disk; | 65 | struct gendisk *disk; |
66 | struct dax_device *dax_dev; | ||
65 | unsigned int irq_id; | 67 | unsigned int irq_id; |
66 | unsigned long ph_addr; | 68 | unsigned long ph_addr; |
67 | unsigned long io_addr; | 69 | unsigned long io_addr; |
@@ -137,25 +139,32 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) | |||
137 | return BLK_QC_T_NONE; | 139 | return BLK_QC_T_NONE; |
138 | } | 140 | } |
139 | 141 | ||
140 | /** | 142 | static const struct block_device_operations axon_ram_devops = { |
141 | * axon_ram_direct_access - direct_access() method for block device | 143 | .owner = THIS_MODULE, |
142 | * @device, @sector, @data: see block_device_operations method | 144 | }; |
143 | */ | 145 | |
144 | static long | 146 | static long |
145 | axon_ram_direct_access(struct block_device *device, sector_t sector, | 147 | __axon_ram_direct_access(struct axon_ram_bank *bank, pgoff_t pgoff, long nr_pages, |
146 | void **kaddr, pfn_t *pfn, long size) | 148 | void **kaddr, pfn_t *pfn) |
147 | { | 149 | { |
148 | struct axon_ram_bank *bank = device->bd_disk->private_data; | 150 | resource_size_t offset = pgoff * PAGE_SIZE; |
149 | loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT; | ||
150 | 151 | ||
151 | *kaddr = (void *) bank->io_addr + offset; | 152 | *kaddr = (void *) bank->io_addr + offset; |
152 | *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); | 153 | *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); |
153 | return bank->size - offset; | 154 | return (bank->size - offset) / PAGE_SIZE; |
154 | } | 155 | } |
155 | 156 | ||
156 | static const struct block_device_operations axon_ram_devops = { | 157 | static long |
157 | .owner = THIS_MODULE, | 158 | axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, |
158 | .direct_access = axon_ram_direct_access | 159 | void **kaddr, pfn_t *pfn) |
160 | { | ||
161 | struct axon_ram_bank *bank = dax_get_private(dax_dev); | ||
162 | |||
163 | return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn); | ||
164 | } | ||
165 | |||
166 | static const struct dax_operations axon_ram_dax_ops = { | ||
167 | .direct_access = axon_ram_dax_direct_access, | ||
159 | }; | 168 | }; |
160 | 169 | ||
161 | /** | 170 | /** |
@@ -219,6 +228,7 @@ static int axon_ram_probe(struct platform_device *device) | |||
219 | goto failed; | 228 | goto failed; |
220 | } | 229 | } |
221 | 230 | ||
231 | |||
222 | bank->disk->major = azfs_major; | 232 | bank->disk->major = azfs_major; |
223 | bank->disk->first_minor = azfs_minor; | 233 | bank->disk->first_minor = azfs_minor; |
224 | bank->disk->fops = &axon_ram_devops; | 234 | bank->disk->fops = &axon_ram_devops; |
@@ -227,6 +237,13 @@ static int axon_ram_probe(struct platform_device *device) | |||
227 | sprintf(bank->disk->disk_name, "%s%d", | 237 | sprintf(bank->disk->disk_name, "%s%d", |
228 | AXON_RAM_DEVICE_NAME, axon_ram_bank_id); | 238 | AXON_RAM_DEVICE_NAME, axon_ram_bank_id); |
229 | 239 | ||
240 | bank->dax_dev = alloc_dax(bank, bank->disk->disk_name, | ||
241 | &axon_ram_dax_ops); | ||
242 | if (!bank->dax_dev) { | ||
243 | rc = -ENOMEM; | ||
244 | goto failed; | ||
245 | } | ||
246 | |||
230 | bank->disk->queue = blk_alloc_queue(GFP_KERNEL); | 247 | bank->disk->queue = blk_alloc_queue(GFP_KERNEL); |
231 | if (bank->disk->queue == NULL) { | 248 | if (bank->disk->queue == NULL) { |
232 | dev_err(&device->dev, "Cannot register disk queue\n"); | 249 | dev_err(&device->dev, "Cannot register disk queue\n"); |
@@ -278,6 +295,8 @@ failed: | |||
278 | del_gendisk(bank->disk); | 295 | del_gendisk(bank->disk); |
279 | put_disk(bank->disk); | 296 | put_disk(bank->disk); |
280 | } | 297 | } |
298 | kill_dax(bank->dax_dev); | ||
299 | put_dax(bank->dax_dev); | ||
281 | device->dev.platform_data = NULL; | 300 | device->dev.platform_data = NULL; |
282 | if (bank->io_addr != 0) | 301 | if (bank->io_addr != 0) |
283 | iounmap((void __iomem *) bank->io_addr); | 302 | iounmap((void __iomem *) bank->io_addr); |
@@ -300,6 +319,8 @@ axon_ram_remove(struct platform_device *device) | |||
300 | 319 | ||
301 | device_remove_file(&device->dev, &dev_attr_ecc); | 320 | device_remove_file(&device->dev, &dev_attr_ecc); |
302 | free_irq(bank->irq_id, device); | 321 | free_irq(bank->irq_id, device); |
322 | kill_dax(bank->dax_dev); | ||
323 | put_dax(bank->dax_dev); | ||
303 | del_gendisk(bank->disk); | 324 | del_gendisk(bank->disk); |
304 | put_disk(bank->disk); | 325 | put_disk(bank->disk); |
305 | iounmap((void __iomem *) bank->io_addr); | 326 | iounmap((void __iomem *) bank->io_addr); |
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index 529bb4a6487a..d5a22bac9988 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h | |||
@@ -44,11 +44,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) | |||
44 | BUG(); | 44 | BUG(); |
45 | } | 45 | } |
46 | 46 | ||
47 | static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) | ||
48 | { | ||
49 | return memcpy_mcsafe(dst, src, n); | ||
50 | } | ||
51 | |||
52 | /** | 47 | /** |
53 | * arch_wb_cache_pmem - write back a cache range with CLWB | 48 | * arch_wb_cache_pmem - write back a cache range with CLWB |
54 | * @vaddr: virtual start address | 49 | * @vaddr: virtual start address |
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index a164862d77e3..733bae07fb29 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h | |||
@@ -79,6 +79,7 @@ int strcmp(const char *cs, const char *ct); | |||
79 | #define memset(s, c, n) __memset(s, c, n) | 79 | #define memset(s, c, n) __memset(s, c, n) |
80 | #endif | 80 | #endif |
81 | 81 | ||
82 | #define __HAVE_ARCH_MEMCPY_MCSAFE 1 | ||
82 | __must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt); | 83 | __must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt); |
83 | DECLARE_STATIC_KEY_FALSE(mcsafe_key); | 84 | DECLARE_STATIC_KEY_FALSE(mcsafe_key); |
84 | 85 | ||
diff --git a/block/Kconfig b/block/Kconfig index 89cd28f8d051..a8ad7e77db28 100644 --- a/block/Kconfig +++ b/block/Kconfig | |||
@@ -6,6 +6,7 @@ menuconfig BLOCK | |||
6 | default y | 6 | default y |
7 | select SBITMAP | 7 | select SBITMAP |
8 | select SRCU | 8 | select SRCU |
9 | select DAX | ||
9 | help | 10 | help |
10 | Provide block layer support for the kernel. | 11 | Provide block layer support for the kernel. |
11 | 12 | ||
diff --git a/block/partition-generic.c b/block/partition-generic.c index 0171a2faad68..ff07b9143ca4 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/kmod.h> | 16 | #include <linux/kmod.h> |
17 | #include <linux/ctype.h> | 17 | #include <linux/ctype.h> |
18 | #include <linux/genhd.h> | 18 | #include <linux/genhd.h> |
19 | #include <linux/dax.h> | ||
20 | #include <linux/blktrace_api.h> | 19 | #include <linux/blktrace_api.h> |
21 | 20 | ||
22 | #include "partitions/check.h" | 21 | #include "partitions/check.h" |
@@ -630,24 +629,12 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) | |||
630 | return 0; | 629 | return 0; |
631 | } | 630 | } |
632 | 631 | ||
633 | static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n) | ||
634 | { | ||
635 | struct address_space *mapping = bdev->bd_inode->i_mapping; | ||
636 | |||
637 | return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), | ||
638 | NULL); | ||
639 | } | ||
640 | |||
641 | unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) | 632 | unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) |
642 | { | 633 | { |
634 | struct address_space *mapping = bdev->bd_inode->i_mapping; | ||
643 | struct page *page; | 635 | struct page *page; |
644 | 636 | ||
645 | /* don't populate page cache for dax capable devices */ | 637 | page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), NULL); |
646 | if (IS_DAX(bdev->bd_inode)) | ||
647 | page = read_dax_sector(bdev, n); | ||
648 | else | ||
649 | page = read_pagecache_sector(bdev, n); | ||
650 | |||
651 | if (!IS_ERR(page)) { | 638 | if (!IS_ERR(page)) { |
652 | if (PageError(page)) | 639 | if (PageError(page)) |
653 | goto fail; | 640 | goto fail; |
diff --git a/drivers/Makefile b/drivers/Makefile index 8f8bdc9e3d29..903b19199b69 100644 --- a/drivers/Makefile +++ b/drivers/Makefile | |||
@@ -71,7 +71,7 @@ obj-$(CONFIG_PARPORT) += parport/ | |||
71 | obj-$(CONFIG_NVM) += lightnvm/ | 71 | obj-$(CONFIG_NVM) += lightnvm/ |
72 | obj-y += base/ block/ misc/ mfd/ nfc/ | 72 | obj-y += base/ block/ misc/ mfd/ nfc/ |
73 | obj-$(CONFIG_LIBNVDIMM) += nvdimm/ | 73 | obj-$(CONFIG_LIBNVDIMM) += nvdimm/ |
74 | obj-$(CONFIG_DEV_DAX) += dax/ | 74 | obj-$(CONFIG_DAX) += dax/ |
75 | obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ | 75 | obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ |
76 | obj-$(CONFIG_NUBUS) += nubus/ | 76 | obj-$(CONFIG_NUBUS) += nubus/ |
77 | obj-y += macintosh/ | 77 | obj-y += macintosh/ |
diff --git a/drivers/acpi/nfit/Kconfig b/drivers/acpi/nfit/Kconfig index dd0d53c52552..6d3351452ea2 100644 --- a/drivers/acpi/nfit/Kconfig +++ b/drivers/acpi/nfit/Kconfig | |||
@@ -12,15 +12,3 @@ config ACPI_NFIT | |||
12 | 12 | ||
13 | To compile this driver as a module, choose M here: | 13 | To compile this driver as a module, choose M here: |
14 | the module will be called nfit. | 14 | the module will be called nfit. |
15 | |||
16 | config ACPI_NFIT_DEBUG | ||
17 | bool "NFIT DSM debug" | ||
18 | depends on ACPI_NFIT | ||
19 | depends on DYNAMIC_DEBUG | ||
20 | default n | ||
21 | help | ||
22 | Enabling this option causes the nfit driver to dump the | ||
23 | input and output buffers of _DSM operations on the ACPI0012 | ||
24 | device and its children. This can be very verbose, so leave | ||
25 | it disabled unless you are debugging a hardware / firmware | ||
26 | issue. | ||
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index c8ea9d698cd0..656acb5d7166 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c | |||
@@ -49,7 +49,16 @@ MODULE_PARM_DESC(scrub_overflow_abort, | |||
49 | static bool disable_vendor_specific; | 49 | static bool disable_vendor_specific; |
50 | module_param(disable_vendor_specific, bool, S_IRUGO); | 50 | module_param(disable_vendor_specific, bool, S_IRUGO); |
51 | MODULE_PARM_DESC(disable_vendor_specific, | 51 | MODULE_PARM_DESC(disable_vendor_specific, |
52 | "Limit commands to the publicly specified set\n"); | 52 | "Limit commands to the publicly specified set"); |
53 | |||
54 | static unsigned long override_dsm_mask; | ||
55 | module_param(override_dsm_mask, ulong, S_IRUGO); | ||
56 | MODULE_PARM_DESC(override_dsm_mask, "Bitmask of allowed NVDIMM DSM functions"); | ||
57 | |||
58 | static int default_dsm_family = -1; | ||
59 | module_param(default_dsm_family, int, S_IRUGO); | ||
60 | MODULE_PARM_DESC(default_dsm_family, | ||
61 | "Try this DSM type first when identifying NVDIMM family"); | ||
53 | 62 | ||
54 | LIST_HEAD(acpi_descs); | 63 | LIST_HEAD(acpi_descs); |
55 | DEFINE_MUTEX(acpi_desc_lock); | 64 | DEFINE_MUTEX(acpi_desc_lock); |
@@ -175,14 +184,29 @@ static int xlat_bus_status(void *buf, unsigned int cmd, u32 status) | |||
175 | return 0; | 184 | return 0; |
176 | } | 185 | } |
177 | 186 | ||
187 | static int xlat_nvdimm_status(void *buf, unsigned int cmd, u32 status) | ||
188 | { | ||
189 | switch (cmd) { | ||
190 | case ND_CMD_GET_CONFIG_SIZE: | ||
191 | if (status >> 16 & ND_CONFIG_LOCKED) | ||
192 | return -EACCES; | ||
193 | break; | ||
194 | default: | ||
195 | break; | ||
196 | } | ||
197 | |||
198 | /* all other non-zero status results in an error */ | ||
199 | if (status) | ||
200 | return -EIO; | ||
201 | return 0; | ||
202 | } | ||
203 | |||
178 | static int xlat_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd, | 204 | static int xlat_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd, |
179 | u32 status) | 205 | u32 status) |
180 | { | 206 | { |
181 | if (!nvdimm) | 207 | if (!nvdimm) |
182 | return xlat_bus_status(buf, cmd, status); | 208 | return xlat_bus_status(buf, cmd, status); |
183 | if (status) | 209 | return xlat_nvdimm_status(buf, cmd, status); |
184 | return -EIO; | ||
185 | return 0; | ||
186 | } | 210 | } |
187 | 211 | ||
188 | int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, | 212 | int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, |
@@ -259,14 +283,11 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, | |||
259 | in_buf.buffer.length = call_pkg->nd_size_in; | 283 | in_buf.buffer.length = call_pkg->nd_size_in; |
260 | } | 284 | } |
261 | 285 | ||
262 | if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) { | 286 | dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n", |
263 | dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n", | 287 | __func__, dimm_name, cmd, func, in_buf.buffer.length); |
264 | __func__, dimm_name, cmd, func, | 288 | print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4, |
265 | in_buf.buffer.length); | ||
266 | print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4, | ||
267 | in_buf.buffer.pointer, | 289 | in_buf.buffer.pointer, |
268 | min_t(u32, 256, in_buf.buffer.length), true); | 290 | min_t(u32, 256, in_buf.buffer.length), true); |
269 | } | ||
270 | 291 | ||
271 | out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj); | 292 | out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj); |
272 | if (!out_obj) { | 293 | if (!out_obj) { |
@@ -298,13 +319,11 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, | |||
298 | goto out; | 319 | goto out; |
299 | } | 320 | } |
300 | 321 | ||
301 | if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) { | 322 | dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__, dimm_name, |
302 | dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__, | 323 | cmd_name, out_obj->buffer.length); |
303 | dimm_name, cmd_name, out_obj->buffer.length); | 324 | print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4, |
304 | print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, | 325 | out_obj->buffer.pointer, |
305 | 4, out_obj->buffer.pointer, min_t(u32, 128, | 326 | min_t(u32, 128, out_obj->buffer.length), true); |
306 | out_obj->buffer.length), true); | ||
307 | } | ||
308 | 327 | ||
309 | for (i = 0, offset = 0; i < desc->out_num; i++) { | 328 | for (i = 0, offset = 0; i < desc->out_num; i++) { |
310 | u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf, | 329 | u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf, |
@@ -448,9 +467,9 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc, | |||
448 | INIT_LIST_HEAD(&nfit_memdev->list); | 467 | INIT_LIST_HEAD(&nfit_memdev->list); |
449 | memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev)); | 468 | memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev)); |
450 | list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs); | 469 | list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs); |
451 | dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n", | 470 | dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d flags: %#x\n", |
452 | __func__, memdev->device_handle, memdev->range_index, | 471 | __func__, memdev->device_handle, memdev->range_index, |
453 | memdev->region_index); | 472 | memdev->region_index, memdev->flags); |
454 | return true; | 473 | return true; |
455 | } | 474 | } |
456 | 475 | ||
@@ -729,28 +748,38 @@ static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc, | |||
729 | } | 748 | } |
730 | } | 749 | } |
731 | 750 | ||
732 | static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc, | 751 | static int __nfit_mem_init(struct acpi_nfit_desc *acpi_desc, |
733 | struct acpi_nfit_system_address *spa) | 752 | struct acpi_nfit_system_address *spa) |
734 | { | 753 | { |
735 | struct nfit_mem *nfit_mem, *found; | 754 | struct nfit_mem *nfit_mem, *found; |
736 | struct nfit_memdev *nfit_memdev; | 755 | struct nfit_memdev *nfit_memdev; |
737 | int type = nfit_spa_type(spa); | 756 | int type = spa ? nfit_spa_type(spa) : 0; |
738 | 757 | ||
739 | switch (type) { | 758 | switch (type) { |
740 | case NFIT_SPA_DCR: | 759 | case NFIT_SPA_DCR: |
741 | case NFIT_SPA_PM: | 760 | case NFIT_SPA_PM: |
742 | break; | 761 | break; |
743 | default: | 762 | default: |
744 | return 0; | 763 | if (spa) |
764 | return 0; | ||
745 | } | 765 | } |
746 | 766 | ||
767 | /* | ||
768 | * This loop runs in two modes, when a dimm is mapped the loop | ||
769 | * adds memdev associations to an existing dimm, or creates a | ||
770 | * dimm. In the unmapped dimm case this loop sweeps for memdev | ||
771 | * instances with an invalid / zero range_index and adds those | ||
772 | * dimms without spa associations. | ||
773 | */ | ||
747 | list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { | 774 | list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { |
748 | struct nfit_flush *nfit_flush; | 775 | struct nfit_flush *nfit_flush; |
749 | struct nfit_dcr *nfit_dcr; | 776 | struct nfit_dcr *nfit_dcr; |
750 | u32 device_handle; | 777 | u32 device_handle; |
751 | u16 dcr; | 778 | u16 dcr; |
752 | 779 | ||
753 | if (nfit_memdev->memdev->range_index != spa->range_index) | 780 | if (spa && nfit_memdev->memdev->range_index != spa->range_index) |
781 | continue; | ||
782 | if (!spa && nfit_memdev->memdev->range_index) | ||
754 | continue; | 783 | continue; |
755 | found = NULL; | 784 | found = NULL; |
756 | dcr = nfit_memdev->memdev->region_index; | 785 | dcr = nfit_memdev->memdev->region_index; |
@@ -835,14 +864,15 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc, | |||
835 | break; | 864 | break; |
836 | } | 865 | } |
837 | nfit_mem_init_bdw(acpi_desc, nfit_mem, spa); | 866 | nfit_mem_init_bdw(acpi_desc, nfit_mem, spa); |
838 | } else { | 867 | } else if (type == NFIT_SPA_PM) { |
839 | /* | 868 | /* |
840 | * A single dimm may belong to multiple SPA-PM | 869 | * A single dimm may belong to multiple SPA-PM |
841 | * ranges, record at least one in addition to | 870 | * ranges, record at least one in addition to |
842 | * any SPA-DCR range. | 871 | * any SPA-DCR range. |
843 | */ | 872 | */ |
844 | nfit_mem->memdev_pmem = nfit_memdev->memdev; | 873 | nfit_mem->memdev_pmem = nfit_memdev->memdev; |
845 | } | 874 | } else |
875 | nfit_mem->memdev_dcr = nfit_memdev->memdev; | ||
846 | } | 876 | } |
847 | 877 | ||
848 | return 0; | 878 | return 0; |
@@ -866,6 +896,8 @@ static int nfit_mem_cmp(void *priv, struct list_head *_a, struct list_head *_b) | |||
866 | static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc) | 896 | static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc) |
867 | { | 897 | { |
868 | struct nfit_spa *nfit_spa; | 898 | struct nfit_spa *nfit_spa; |
899 | int rc; | ||
900 | |||
869 | 901 | ||
870 | /* | 902 | /* |
871 | * For each SPA-DCR or SPA-PMEM address range find its | 903 | * For each SPA-DCR or SPA-PMEM address range find its |
@@ -876,13 +908,20 @@ static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc) | |||
876 | * BDWs are optional. | 908 | * BDWs are optional. |
877 | */ | 909 | */ |
878 | list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { | 910 | list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { |
879 | int rc; | 911 | rc = __nfit_mem_init(acpi_desc, nfit_spa->spa); |
880 | |||
881 | rc = nfit_mem_dcr_init(acpi_desc, nfit_spa->spa); | ||
882 | if (rc) | 912 | if (rc) |
883 | return rc; | 913 | return rc; |
884 | } | 914 | } |
885 | 915 | ||
916 | /* | ||
917 | * If a DIMM has failed to be mapped into SPA there will be no | ||
918 | * SPA entries above. Find and register all the unmapped DIMMs | ||
919 | * for reporting and recovery purposes. | ||
920 | */ | ||
921 | rc = __nfit_mem_init(acpi_desc, NULL); | ||
922 | if (rc) | ||
923 | return rc; | ||
924 | |||
886 | list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp); | 925 | list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp); |
887 | 926 | ||
888 | return 0; | 927 | return 0; |
@@ -1237,12 +1276,14 @@ static ssize_t flags_show(struct device *dev, | |||
1237 | { | 1276 | { |
1238 | u16 flags = to_nfit_memdev(dev)->flags; | 1277 | u16 flags = to_nfit_memdev(dev)->flags; |
1239 | 1278 | ||
1240 | return sprintf(buf, "%s%s%s%s%s\n", | 1279 | return sprintf(buf, "%s%s%s%s%s%s%s\n", |
1241 | flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "", | 1280 | flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "", |
1242 | flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore_fail " : "", | 1281 | flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore_fail " : "", |
1243 | flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush_fail " : "", | 1282 | flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush_fail " : "", |
1244 | flags & ACPI_NFIT_MEM_NOT_ARMED ? "not_armed " : "", | 1283 | flags & ACPI_NFIT_MEM_NOT_ARMED ? "not_armed " : "", |
1245 | flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart_event " : ""); | 1284 | flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart_event " : "", |
1285 | flags & ACPI_NFIT_MEM_MAP_FAILED ? "map_fail " : "", | ||
1286 | flags & ACPI_NFIT_MEM_HEALTH_ENABLED ? "smart_notify " : ""); | ||
1246 | } | 1287 | } |
1247 | static DEVICE_ATTR_RO(flags); | 1288 | static DEVICE_ATTR_RO(flags); |
1248 | 1289 | ||
@@ -1290,8 +1331,16 @@ static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj, | |||
1290 | struct device *dev = container_of(kobj, struct device, kobj); | 1331 | struct device *dev = container_of(kobj, struct device, kobj); |
1291 | struct nvdimm *nvdimm = to_nvdimm(dev); | 1332 | struct nvdimm *nvdimm = to_nvdimm(dev); |
1292 | 1333 | ||
1293 | if (!to_nfit_dcr(dev)) | 1334 | if (!to_nfit_dcr(dev)) { |
1335 | /* Without a dcr only the memdev attributes can be surfaced */ | ||
1336 | if (a == &dev_attr_handle.attr || a == &dev_attr_phys_id.attr | ||
1337 | || a == &dev_attr_flags.attr | ||
1338 | || a == &dev_attr_family.attr | ||
1339 | || a == &dev_attr_dsm_mask.attr) | ||
1340 | return a->mode; | ||
1294 | return 0; | 1341 | return 0; |
1342 | } | ||
1343 | |||
1295 | if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1) | 1344 | if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1) |
1296 | return 0; | 1345 | return 0; |
1297 | return a->mode; | 1346 | return a->mode; |
@@ -1368,6 +1417,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, | |||
1368 | unsigned long dsm_mask; | 1417 | unsigned long dsm_mask; |
1369 | const u8 *uuid; | 1418 | const u8 *uuid; |
1370 | int i; | 1419 | int i; |
1420 | int family = -1; | ||
1371 | 1421 | ||
1372 | /* nfit test assumes 1:1 relationship between commands and dsms */ | 1422 | /* nfit test assumes 1:1 relationship between commands and dsms */ |
1373 | nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en; | 1423 | nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en; |
@@ -1398,11 +1448,14 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, | |||
1398 | */ | 1448 | */ |
1399 | for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++) | 1449 | for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++) |
1400 | if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) | 1450 | if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) |
1401 | break; | 1451 | if (family < 0 || i == default_dsm_family) |
1452 | family = i; | ||
1402 | 1453 | ||
1403 | /* limit the supported commands to those that are publicly documented */ | 1454 | /* limit the supported commands to those that are publicly documented */ |
1404 | nfit_mem->family = i; | 1455 | nfit_mem->family = family; |
1405 | if (nfit_mem->family == NVDIMM_FAMILY_INTEL) { | 1456 | if (override_dsm_mask && !disable_vendor_specific) |
1457 | dsm_mask = override_dsm_mask; | ||
1458 | else if (nfit_mem->family == NVDIMM_FAMILY_INTEL) { | ||
1406 | dsm_mask = 0x3fe; | 1459 | dsm_mask = 0x3fe; |
1407 | if (disable_vendor_specific) | 1460 | if (disable_vendor_specific) |
1408 | dsm_mask &= ~(1 << ND_CMD_VENDOR); | 1461 | dsm_mask &= ~(1 << ND_CMD_VENDOR); |
@@ -1462,6 +1515,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) | |||
1462 | list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { | 1515 | list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { |
1463 | struct acpi_nfit_flush_address *flush; | 1516 | struct acpi_nfit_flush_address *flush; |
1464 | unsigned long flags = 0, cmd_mask; | 1517 | unsigned long flags = 0, cmd_mask; |
1518 | struct nfit_memdev *nfit_memdev; | ||
1465 | u32 device_handle; | 1519 | u32 device_handle; |
1466 | u16 mem_flags; | 1520 | u16 mem_flags; |
1467 | 1521 | ||
@@ -1473,11 +1527,22 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) | |||
1473 | } | 1527 | } |
1474 | 1528 | ||
1475 | if (nfit_mem->bdw && nfit_mem->memdev_pmem) | 1529 | if (nfit_mem->bdw && nfit_mem->memdev_pmem) |
1476 | flags |= NDD_ALIASING; | 1530 | set_bit(NDD_ALIASING, &flags); |
1531 | |||
1532 | /* collate flags across all memdevs for this dimm */ | ||
1533 | list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { | ||
1534 | struct acpi_nfit_memory_map *dimm_memdev; | ||
1535 | |||
1536 | dimm_memdev = __to_nfit_memdev(nfit_mem); | ||
1537 | if (dimm_memdev->device_handle | ||
1538 | != nfit_memdev->memdev->device_handle) | ||
1539 | continue; | ||
1540 | dimm_memdev->flags |= nfit_memdev->memdev->flags; | ||
1541 | } | ||
1477 | 1542 | ||
1478 | mem_flags = __to_nfit_memdev(nfit_mem)->flags; | 1543 | mem_flags = __to_nfit_memdev(nfit_mem)->flags; |
1479 | if (mem_flags & ACPI_NFIT_MEM_NOT_ARMED) | 1544 | if (mem_flags & ACPI_NFIT_MEM_NOT_ARMED) |
1480 | flags |= NDD_UNARMED; | 1545 | set_bit(NDD_UNARMED, &flags); |
1481 | 1546 | ||
1482 | rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle); | 1547 | rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle); |
1483 | if (rc) | 1548 | if (rc) |
@@ -1507,12 +1572,13 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) | |||
1507 | if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0) | 1572 | if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0) |
1508 | continue; | 1573 | continue; |
1509 | 1574 | ||
1510 | dev_info(acpi_desc->dev, "%s flags:%s%s%s%s\n", | 1575 | dev_info(acpi_desc->dev, "%s flags:%s%s%s%s%s\n", |
1511 | nvdimm_name(nvdimm), | 1576 | nvdimm_name(nvdimm), |
1512 | mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "", | 1577 | mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "", |
1513 | mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"", | 1578 | mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"", |
1514 | mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? " flush_fail" : "", | 1579 | mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? " flush_fail" : "", |
1515 | mem_flags & ACPI_NFIT_MEM_NOT_ARMED ? " not_armed" : ""); | 1580 | mem_flags & ACPI_NFIT_MEM_NOT_ARMED ? " not_armed" : "", |
1581 | mem_flags & ACPI_NFIT_MEM_MAP_FAILED ? " map_fail" : ""); | ||
1516 | 1582 | ||
1517 | } | 1583 | } |
1518 | 1584 | ||
@@ -1783,8 +1849,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, | |||
1783 | mmio_flush_range((void __force *) | 1849 | mmio_flush_range((void __force *) |
1784 | mmio->addr.aperture + offset, c); | 1850 | mmio->addr.aperture + offset, c); |
1785 | 1851 | ||
1786 | memcpy_from_pmem(iobuf + copied, | 1852 | memcpy(iobuf + copied, mmio->addr.aperture + offset, c); |
1787 | mmio->addr.aperture + offset, c); | ||
1788 | } | 1853 | } |
1789 | 1854 | ||
1790 | copied += c; | 1855 | copied += c; |
@@ -2525,6 +2590,7 @@ static void acpi_nfit_scrub(struct work_struct *work) | |||
2525 | acpi_nfit_register_region(acpi_desc, nfit_spa); | 2590 | acpi_nfit_register_region(acpi_desc, nfit_spa); |
2526 | } | 2591 | } |
2527 | } | 2592 | } |
2593 | acpi_desc->init_complete = 1; | ||
2528 | 2594 | ||
2529 | list_for_each_entry(nfit_spa, &acpi_desc->spas, list) | 2595 | list_for_each_entry(nfit_spa, &acpi_desc->spas, list) |
2530 | acpi_nfit_async_scrub(acpi_desc, nfit_spa); | 2596 | acpi_nfit_async_scrub(acpi_desc, nfit_spa); |
@@ -2547,7 +2613,8 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) | |||
2547 | return rc; | 2613 | return rc; |
2548 | } | 2614 | } |
2549 | 2615 | ||
2550 | queue_work(nfit_wq, &acpi_desc->work); | 2616 | if (!acpi_desc->cancel) |
2617 | queue_work(nfit_wq, &acpi_desc->work); | ||
2551 | return 0; | 2618 | return 0; |
2552 | } | 2619 | } |
2553 | 2620 | ||
@@ -2593,32 +2660,11 @@ static int acpi_nfit_desc_init_scrub_attr(struct acpi_nfit_desc *acpi_desc) | |||
2593 | return 0; | 2660 | return 0; |
2594 | } | 2661 | } |
2595 | 2662 | ||
2596 | static void acpi_nfit_destruct(void *data) | 2663 | static void acpi_nfit_unregister(void *data) |
2597 | { | 2664 | { |
2598 | struct acpi_nfit_desc *acpi_desc = data; | 2665 | struct acpi_nfit_desc *acpi_desc = data; |
2599 | struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus); | ||
2600 | |||
2601 | /* | ||
2602 | * Destruct under acpi_desc_lock so that nfit_handle_mce does not | ||
2603 | * race teardown | ||
2604 | */ | ||
2605 | mutex_lock(&acpi_desc_lock); | ||
2606 | acpi_desc->cancel = 1; | ||
2607 | /* | ||
2608 | * Bounce the nvdimm bus lock to make sure any in-flight | ||
2609 | * acpi_nfit_ars_rescan() submissions have had a chance to | ||
2610 | * either submit or see ->cancel set. | ||
2611 | */ | ||
2612 | device_lock(bus_dev); | ||
2613 | device_unlock(bus_dev); | ||
2614 | 2666 | ||
2615 | flush_workqueue(nfit_wq); | ||
2616 | if (acpi_desc->scrub_count_state) | ||
2617 | sysfs_put(acpi_desc->scrub_count_state); | ||
2618 | nvdimm_bus_unregister(acpi_desc->nvdimm_bus); | 2667 | nvdimm_bus_unregister(acpi_desc->nvdimm_bus); |
2619 | acpi_desc->nvdimm_bus = NULL; | ||
2620 | list_del(&acpi_desc->list); | ||
2621 | mutex_unlock(&acpi_desc_lock); | ||
2622 | } | 2668 | } |
2623 | 2669 | ||
2624 | int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz) | 2670 | int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz) |
@@ -2636,7 +2682,7 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz) | |||
2636 | if (!acpi_desc->nvdimm_bus) | 2682 | if (!acpi_desc->nvdimm_bus) |
2637 | return -ENOMEM; | 2683 | return -ENOMEM; |
2638 | 2684 | ||
2639 | rc = devm_add_action_or_reset(dev, acpi_nfit_destruct, | 2685 | rc = devm_add_action_or_reset(dev, acpi_nfit_unregister, |
2640 | acpi_desc); | 2686 | acpi_desc); |
2641 | if (rc) | 2687 | if (rc) |
2642 | return rc; | 2688 | return rc; |
@@ -2728,6 +2774,13 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) | |||
2728 | device_lock(dev); | 2774 | device_lock(dev); |
2729 | device_unlock(dev); | 2775 | device_unlock(dev); |
2730 | 2776 | ||
2777 | /* bounce the init_mutex to make init_complete valid */ | ||
2778 | mutex_lock(&acpi_desc->init_mutex); | ||
2779 | if (acpi_desc->cancel || acpi_desc->init_complete) { | ||
2780 | mutex_unlock(&acpi_desc->init_mutex); | ||
2781 | return 0; | ||
2782 | } | ||
2783 | |||
2731 | /* | 2784 | /* |
2732 | * Scrub work could take 10s of seconds, userspace may give up so we | 2785 | * Scrub work could take 10s of seconds, userspace may give up so we |
2733 | * need to be interruptible while waiting. | 2786 | * need to be interruptible while waiting. |
@@ -2735,6 +2788,7 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) | |||
2735 | INIT_WORK_ONSTACK(&flush.work, flush_probe); | 2788 | INIT_WORK_ONSTACK(&flush.work, flush_probe); |
2736 | COMPLETION_INITIALIZER_ONSTACK(flush.cmp); | 2789 | COMPLETION_INITIALIZER_ONSTACK(flush.cmp); |
2737 | queue_work(nfit_wq, &flush.work); | 2790 | queue_work(nfit_wq, &flush.work); |
2791 | mutex_unlock(&acpi_desc->init_mutex); | ||
2738 | 2792 | ||
2739 | rc = wait_for_completion_interruptible(&flush.cmp); | 2793 | rc = wait_for_completion_interruptible(&flush.cmp); |
2740 | cancel_work_sync(&flush.work); | 2794 | cancel_work_sync(&flush.work); |
@@ -2771,10 +2825,12 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc) | |||
2771 | if (work_busy(&acpi_desc->work)) | 2825 | if (work_busy(&acpi_desc->work)) |
2772 | return -EBUSY; | 2826 | return -EBUSY; |
2773 | 2827 | ||
2774 | if (acpi_desc->cancel) | 2828 | mutex_lock(&acpi_desc->init_mutex); |
2829 | if (acpi_desc->cancel) { | ||
2830 | mutex_unlock(&acpi_desc->init_mutex); | ||
2775 | return 0; | 2831 | return 0; |
2832 | } | ||
2776 | 2833 | ||
2777 | mutex_lock(&acpi_desc->init_mutex); | ||
2778 | list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { | 2834 | list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { |
2779 | struct acpi_nfit_system_address *spa = nfit_spa->spa; | 2835 | struct acpi_nfit_system_address *spa = nfit_spa->spa; |
2780 | 2836 | ||
@@ -2818,6 +2874,40 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) | |||
2818 | } | 2874 | } |
2819 | EXPORT_SYMBOL_GPL(acpi_nfit_desc_init); | 2875 | EXPORT_SYMBOL_GPL(acpi_nfit_desc_init); |
2820 | 2876 | ||
2877 | static void acpi_nfit_put_table(void *table) | ||
2878 | { | ||
2879 | acpi_put_table(table); | ||
2880 | } | ||
2881 | |||
2882 | void acpi_nfit_shutdown(void *data) | ||
2883 | { | ||
2884 | struct acpi_nfit_desc *acpi_desc = data; | ||
2885 | struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus); | ||
2886 | |||
2887 | /* | ||
2888 | * Destruct under acpi_desc_lock so that nfit_handle_mce does not | ||
2889 | * race teardown | ||
2890 | */ | ||
2891 | mutex_lock(&acpi_desc_lock); | ||
2892 | list_del(&acpi_desc->list); | ||
2893 | mutex_unlock(&acpi_desc_lock); | ||
2894 | |||
2895 | mutex_lock(&acpi_desc->init_mutex); | ||
2896 | acpi_desc->cancel = 1; | ||
2897 | mutex_unlock(&acpi_desc->init_mutex); | ||
2898 | |||
2899 | /* | ||
2900 | * Bounce the nvdimm bus lock to make sure any in-flight | ||
2901 | * acpi_nfit_ars_rescan() submissions have had a chance to | ||
2902 | * either submit or see ->cancel set. | ||
2903 | */ | ||
2904 | device_lock(bus_dev); | ||
2905 | device_unlock(bus_dev); | ||
2906 | |||
2907 | flush_workqueue(nfit_wq); | ||
2908 | } | ||
2909 | EXPORT_SYMBOL_GPL(acpi_nfit_shutdown); | ||
2910 | |||
2821 | static int acpi_nfit_add(struct acpi_device *adev) | 2911 | static int acpi_nfit_add(struct acpi_device *adev) |
2822 | { | 2912 | { |
2823 | struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; | 2913 | struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; |
@@ -2834,6 +2924,10 @@ static int acpi_nfit_add(struct acpi_device *adev) | |||
2834 | dev_dbg(dev, "failed to find NFIT at startup\n"); | 2924 | dev_dbg(dev, "failed to find NFIT at startup\n"); |
2835 | return 0; | 2925 | return 0; |
2836 | } | 2926 | } |
2927 | |||
2928 | rc = devm_add_action_or_reset(dev, acpi_nfit_put_table, tbl); | ||
2929 | if (rc) | ||
2930 | return rc; | ||
2837 | sz = tbl->length; | 2931 | sz = tbl->length; |
2838 | 2932 | ||
2839 | acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); | 2933 | acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); |
@@ -2861,12 +2955,15 @@ static int acpi_nfit_add(struct acpi_device *adev) | |||
2861 | rc = acpi_nfit_init(acpi_desc, (void *) tbl | 2955 | rc = acpi_nfit_init(acpi_desc, (void *) tbl |
2862 | + sizeof(struct acpi_table_nfit), | 2956 | + sizeof(struct acpi_table_nfit), |
2863 | sz - sizeof(struct acpi_table_nfit)); | 2957 | sz - sizeof(struct acpi_table_nfit)); |
2864 | return rc; | 2958 | |
2959 | if (rc) | ||
2960 | return rc; | ||
2961 | return devm_add_action_or_reset(dev, acpi_nfit_shutdown, acpi_desc); | ||
2865 | } | 2962 | } |
2866 | 2963 | ||
2867 | static int acpi_nfit_remove(struct acpi_device *adev) | 2964 | static int acpi_nfit_remove(struct acpi_device *adev) |
2868 | { | 2965 | { |
2869 | /* see acpi_nfit_destruct */ | 2966 | /* see acpi_nfit_unregister */ |
2870 | return 0; | 2967 | return 0; |
2871 | } | 2968 | } |
2872 | 2969 | ||
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index fc29c2e9832e..58fb7d68e04a 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h | |||
@@ -37,7 +37,7 @@ | |||
37 | 37 | ||
38 | #define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \ | 38 | #define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \ |
39 | | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ | 39 | | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ |
40 | | ACPI_NFIT_MEM_NOT_ARMED) | 40 | | ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED) |
41 | 41 | ||
42 | enum nfit_uuids { | 42 | enum nfit_uuids { |
43 | /* for simplicity alias the uuid index with the family id */ | 43 | /* for simplicity alias the uuid index with the family id */ |
@@ -163,6 +163,7 @@ struct acpi_nfit_desc { | |||
163 | unsigned int scrub_count; | 163 | unsigned int scrub_count; |
164 | unsigned int scrub_mode; | 164 | unsigned int scrub_mode; |
165 | unsigned int cancel:1; | 165 | unsigned int cancel:1; |
166 | unsigned int init_complete:1; | ||
166 | unsigned long dimm_cmd_force_en; | 167 | unsigned long dimm_cmd_force_en; |
167 | unsigned long bus_cmd_force_en; | 168 | unsigned long bus_cmd_force_en; |
168 | int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, | 169 | int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, |
@@ -238,6 +239,7 @@ static inline struct acpi_nfit_desc *to_acpi_desc( | |||
238 | 239 | ||
239 | const u8 *to_nfit_uuid(enum nfit_uuids id); | 240 | const u8 *to_nfit_uuid(enum nfit_uuids id); |
240 | int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz); | 241 | int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz); |
242 | void acpi_nfit_shutdown(void *data); | ||
241 | void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event); | 243 | void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event); |
242 | void __acpi_nvdimm_notify(struct device *dev, u32 event); | 244 | void __acpi_nvdimm_notify(struct device *dev, u32 event); |
243 | int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, | 245 | int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, |
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index d545abbd5378..8ddc98279c8f 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
@@ -323,6 +323,7 @@ config BLK_DEV_SX8 | |||
323 | 323 | ||
324 | config BLK_DEV_RAM | 324 | config BLK_DEV_RAM |
325 | tristate "RAM block device support" | 325 | tristate "RAM block device support" |
326 | select DAX if BLK_DEV_RAM_DAX | ||
326 | ---help--- | 327 | ---help--- |
327 | Saying Y here will allow you to use a portion of your RAM memory as | 328 | Saying Y here will allow you to use a portion of your RAM memory as |
328 | a block device, so that you can make file systems on it, read and | 329 | a block device, so that you can make file systems on it, read and |
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 4ec84d504780..57b574f2f66a 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
22 | #ifdef CONFIG_BLK_DEV_RAM_DAX | 22 | #ifdef CONFIG_BLK_DEV_RAM_DAX |
23 | #include <linux/pfn_t.h> | 23 | #include <linux/pfn_t.h> |
24 | #include <linux/dax.h> | ||
24 | #endif | 25 | #endif |
25 | 26 | ||
26 | #include <linux/uaccess.h> | 27 | #include <linux/uaccess.h> |
@@ -41,6 +42,9 @@ struct brd_device { | |||
41 | 42 | ||
42 | struct request_queue *brd_queue; | 43 | struct request_queue *brd_queue; |
43 | struct gendisk *brd_disk; | 44 | struct gendisk *brd_disk; |
45 | #ifdef CONFIG_BLK_DEV_RAM_DAX | ||
46 | struct dax_device *dax_dev; | ||
47 | #endif | ||
44 | struct list_head brd_list; | 48 | struct list_head brd_list; |
45 | 49 | ||
46 | /* | 50 | /* |
@@ -326,30 +330,38 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, | |||
326 | } | 330 | } |
327 | 331 | ||
328 | #ifdef CONFIG_BLK_DEV_RAM_DAX | 332 | #ifdef CONFIG_BLK_DEV_RAM_DAX |
329 | static long brd_direct_access(struct block_device *bdev, sector_t sector, | 333 | static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff, |
330 | void **kaddr, pfn_t *pfn, long size) | 334 | long nr_pages, void **kaddr, pfn_t *pfn) |
331 | { | 335 | { |
332 | struct brd_device *brd = bdev->bd_disk->private_data; | ||
333 | struct page *page; | 336 | struct page *page; |
334 | 337 | ||
335 | if (!brd) | 338 | if (!brd) |
336 | return -ENODEV; | 339 | return -ENODEV; |
337 | page = brd_insert_page(brd, sector); | 340 | page = brd_insert_page(brd, PFN_PHYS(pgoff) / 512); |
338 | if (!page) | 341 | if (!page) |
339 | return -ENOSPC; | 342 | return -ENOSPC; |
340 | *kaddr = page_address(page); | 343 | *kaddr = page_address(page); |
341 | *pfn = page_to_pfn_t(page); | 344 | *pfn = page_to_pfn_t(page); |
342 | 345 | ||
343 | return PAGE_SIZE; | 346 | return 1; |
344 | } | 347 | } |
345 | #else | 348 | |
346 | #define brd_direct_access NULL | 349 | static long brd_dax_direct_access(struct dax_device *dax_dev, |
350 | pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) | ||
351 | { | ||
352 | struct brd_device *brd = dax_get_private(dax_dev); | ||
353 | |||
354 | return __brd_direct_access(brd, pgoff, nr_pages, kaddr, pfn); | ||
355 | } | ||
356 | |||
357 | static const struct dax_operations brd_dax_ops = { | ||
358 | .direct_access = brd_dax_direct_access, | ||
359 | }; | ||
347 | #endif | 360 | #endif |
348 | 361 | ||
349 | static const struct block_device_operations brd_fops = { | 362 | static const struct block_device_operations brd_fops = { |
350 | .owner = THIS_MODULE, | 363 | .owner = THIS_MODULE, |
351 | .rw_page = brd_rw_page, | 364 | .rw_page = brd_rw_page, |
352 | .direct_access = brd_direct_access, | ||
353 | }; | 365 | }; |
354 | 366 | ||
355 | /* | 367 | /* |
@@ -415,9 +427,6 @@ static struct brd_device *brd_alloc(int i) | |||
415 | * is harmless) | 427 | * is harmless) |
416 | */ | 428 | */ |
417 | blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE); | 429 | blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE); |
418 | #ifdef CONFIG_BLK_DEV_RAM_DAX | ||
419 | queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue); | ||
420 | #endif | ||
421 | disk = brd->brd_disk = alloc_disk(max_part); | 430 | disk = brd->brd_disk = alloc_disk(max_part); |
422 | if (!disk) | 431 | if (!disk) |
423 | goto out_free_queue; | 432 | goto out_free_queue; |
@@ -430,8 +439,21 @@ static struct brd_device *brd_alloc(int i) | |||
430 | sprintf(disk->disk_name, "ram%d", i); | 439 | sprintf(disk->disk_name, "ram%d", i); |
431 | set_capacity(disk, rd_size * 2); | 440 | set_capacity(disk, rd_size * 2); |
432 | 441 | ||
442 | #ifdef CONFIG_BLK_DEV_RAM_DAX | ||
443 | queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue); | ||
444 | brd->dax_dev = alloc_dax(brd, disk->disk_name, &brd_dax_ops); | ||
445 | if (!brd->dax_dev) | ||
446 | goto out_free_inode; | ||
447 | #endif | ||
448 | |||
449 | |||
433 | return brd; | 450 | return brd; |
434 | 451 | ||
452 | #ifdef CONFIG_BLK_DEV_RAM_DAX | ||
453 | out_free_inode: | ||
454 | kill_dax(brd->dax_dev); | ||
455 | put_dax(brd->dax_dev); | ||
456 | #endif | ||
435 | out_free_queue: | 457 | out_free_queue: |
436 | blk_cleanup_queue(brd->brd_queue); | 458 | blk_cleanup_queue(brd->brd_queue); |
437 | out_free_dev: | 459 | out_free_dev: |
@@ -471,6 +493,10 @@ out: | |||
471 | static void brd_del_one(struct brd_device *brd) | 493 | static void brd_del_one(struct brd_device *brd) |
472 | { | 494 | { |
473 | list_del(&brd->brd_list); | 495 | list_del(&brd->brd_list); |
496 | #ifdef CONFIG_BLK_DEV_RAM_DAX | ||
497 | kill_dax(brd->dax_dev); | ||
498 | put_dax(brd->dax_dev); | ||
499 | #endif | ||
474 | del_gendisk(brd->brd_disk); | 500 | del_gendisk(brd->brd_disk); |
475 | brd_free(brd); | 501 | brd_free(brd); |
476 | } | 502 | } |
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index 9e95bf94eb13..b7053eafd88e 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig | |||
@@ -1,8 +1,13 @@ | |||
1 | menuconfig DEV_DAX | 1 | menuconfig DAX |
2 | tristate "DAX: direct access to differentiated memory" | 2 | tristate "DAX: direct access to differentiated memory" |
3 | select SRCU | ||
3 | default m if NVDIMM_DAX | 4 | default m if NVDIMM_DAX |
5 | |||
6 | if DAX | ||
7 | |||
8 | config DEV_DAX | ||
9 | tristate "Device DAX: direct access mapping device" | ||
4 | depends on TRANSPARENT_HUGEPAGE | 10 | depends on TRANSPARENT_HUGEPAGE |
5 | select SRCU | ||
6 | help | 11 | help |
7 | Support raw access to differentiated (persistence, bandwidth, | 12 | Support raw access to differentiated (persistence, bandwidth, |
8 | latency...) memory via an mmap(2) capable character | 13 | latency...) memory via an mmap(2) capable character |
@@ -11,7 +16,6 @@ menuconfig DEV_DAX | |||
11 | baseline memory pool. Mappings of a /dev/daxX.Y device impose | 16 | baseline memory pool. Mappings of a /dev/daxX.Y device impose |
12 | restrictions that make the mapping behavior deterministic. | 17 | restrictions that make the mapping behavior deterministic. |
13 | 18 | ||
14 | if DEV_DAX | ||
15 | 19 | ||
16 | config DEV_DAX_PMEM | 20 | config DEV_DAX_PMEM |
17 | tristate "PMEM DAX: direct access to persistent memory" | 21 | tristate "PMEM DAX: direct access to persistent memory" |
diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile index 27c54e38478a..dc7422530462 100644 --- a/drivers/dax/Makefile +++ b/drivers/dax/Makefile | |||
@@ -1,4 +1,7 @@ | |||
1 | obj-$(CONFIG_DEV_DAX) += dax.o | 1 | obj-$(CONFIG_DAX) += dax.o |
2 | obj-$(CONFIG_DEV_DAX) += device_dax.o | ||
2 | obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o | 3 | obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o |
3 | 4 | ||
5 | dax-y := super.o | ||
4 | dax_pmem-y := pmem.o | 6 | dax_pmem-y := pmem.o |
7 | device_dax-y := device.o | ||
diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h new file mode 100644 index 000000000000..b6fc4f04636d --- /dev/null +++ b/drivers/dax/dax-private.h | |||
@@ -0,0 +1,57 @@ | |||
1 | /* | ||
2 | * Copyright(c) 2016 Intel Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | */ | ||
13 | #ifndef __DAX_PRIVATE_H__ | ||
14 | #define __DAX_PRIVATE_H__ | ||
15 | |||
16 | #include <linux/device.h> | ||
17 | #include <linux/cdev.h> | ||
18 | |||
19 | /** | ||
20 | * struct dax_region - mapping infrastructure for dax devices | ||
21 | * @id: kernel-wide unique region for a memory range | ||
22 | * @base: linear address corresponding to @res | ||
23 | * @kref: to pin while other agents have a need to do lookups | ||
24 | * @dev: parent device backing this region | ||
25 | * @align: allocation and mapping alignment for child dax devices | ||
26 | * @res: physical address range of the region | ||
27 | * @pfn_flags: identify whether the pfns are paged back or not | ||
28 | */ | ||
29 | struct dax_region { | ||
30 | int id; | ||
31 | struct ida ida; | ||
32 | void *base; | ||
33 | struct kref kref; | ||
34 | struct device *dev; | ||
35 | unsigned int align; | ||
36 | struct resource res; | ||
37 | unsigned long pfn_flags; | ||
38 | }; | ||
39 | |||
40 | /** | ||
41 | * struct dev_dax - instance data for a subdivision of a dax region | ||
42 | * @region - parent region | ||
43 | * @dax_dev - core dax functionality | ||
44 | * @dev - device core | ||
45 | * @id - child id in the region | ||
46 | * @num_resources - number of physical address extents in this device | ||
47 | * @res - array of physical address ranges | ||
48 | */ | ||
49 | struct dev_dax { | ||
50 | struct dax_region *region; | ||
51 | struct dax_device *dax_dev; | ||
52 | struct device dev; | ||
53 | int id; | ||
54 | int num_resources; | ||
55 | struct resource res[0]; | ||
56 | }; | ||
57 | #endif | ||
diff --git a/drivers/dax/dax.h b/drivers/dax/dax.h index ddd829ab58c0..f9e5feea742c 100644 --- a/drivers/dax/dax.h +++ b/drivers/dax/dax.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright(c) 2016 Intel Corporation. All rights reserved. | 2 | * Copyright(c) 2016 - 2017 Intel Corporation. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of version 2 of the GNU General Public License as | 5 | * it under the terms of version 2 of the GNU General Public License as |
@@ -12,14 +12,7 @@ | |||
12 | */ | 12 | */ |
13 | #ifndef __DAX_H__ | 13 | #ifndef __DAX_H__ |
14 | #define __DAX_H__ | 14 | #define __DAX_H__ |
15 | struct device; | 15 | struct dax_device; |
16 | struct dax_dev; | 16 | struct dax_device *inode_dax(struct inode *inode); |
17 | struct resource; | 17 | struct inode *dax_inode(struct dax_device *dax_dev); |
18 | struct dax_region; | ||
19 | void dax_region_put(struct dax_region *dax_region); | ||
20 | struct dax_region *alloc_dax_region(struct device *parent, | ||
21 | int region_id, struct resource *res, unsigned int align, | ||
22 | void *addr, unsigned long flags); | ||
23 | struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region, | ||
24 | struct resource *res, int count); | ||
25 | #endif /* __DAX_H__ */ | 18 | #endif /* __DAX_H__ */ |
diff --git a/drivers/dax/device-dax.h b/drivers/dax/device-dax.h new file mode 100644 index 000000000000..fdcd9769ffde --- /dev/null +++ b/drivers/dax/device-dax.h | |||
@@ -0,0 +1,25 @@ | |||
1 | /* | ||
2 | * Copyright(c) 2016 Intel Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | */ | ||
13 | #ifndef __DEVICE_DAX_H__ | ||
14 | #define __DEVICE_DAX_H__ | ||
15 | struct device; | ||
16 | struct dev_dax; | ||
17 | struct resource; | ||
18 | struct dax_region; | ||
19 | void dax_region_put(struct dax_region *dax_region); | ||
20 | struct dax_region *alloc_dax_region(struct device *parent, | ||
21 | int region_id, struct resource *res, unsigned int align, | ||
22 | void *addr, unsigned long flags); | ||
23 | struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, | ||
24 | struct resource *res, int count); | ||
25 | #endif /* __DEVICE_DAX_H__ */ | ||
diff --git a/drivers/dax/dax.c b/drivers/dax/device.c index 19795eb35579..006e657dfcb9 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/device.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright(c) 2016 Intel Corporation. All rights reserved. | 2 | * Copyright(c) 2016 - 2017 Intel Corporation. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of version 2 of the GNU General Public License as | 5 | * it under the terms of version 2 of the GNU General Public License as |
@@ -13,100 +13,38 @@ | |||
13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/device.h> | 15 | #include <linux/device.h> |
16 | #include <linux/magic.h> | ||
17 | #include <linux/mount.h> | ||
18 | #include <linux/pfn_t.h> | 16 | #include <linux/pfn_t.h> |
19 | #include <linux/hash.h> | ||
20 | #include <linux/cdev.h> | 17 | #include <linux/cdev.h> |
21 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
22 | #include <linux/dax.h> | 19 | #include <linux/dax.h> |
23 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
24 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
22 | #include "dax-private.h" | ||
25 | #include "dax.h" | 23 | #include "dax.h" |
26 | 24 | ||
27 | static dev_t dax_devt; | ||
28 | DEFINE_STATIC_SRCU(dax_srcu); | ||
29 | static struct class *dax_class; | 25 | static struct class *dax_class; |
30 | static DEFINE_IDA(dax_minor_ida); | ||
31 | static int nr_dax = CONFIG_NR_DEV_DAX; | ||
32 | module_param(nr_dax, int, S_IRUGO); | ||
33 | static struct vfsmount *dax_mnt; | ||
34 | static struct kmem_cache *dax_cache __read_mostly; | ||
35 | static struct super_block *dax_superblock __read_mostly; | ||
36 | MODULE_PARM_DESC(nr_dax, "max number of device-dax instances"); | ||
37 | |||
38 | /** | ||
39 | * struct dax_region - mapping infrastructure for dax devices | ||
40 | * @id: kernel-wide unique region for a memory range | ||
41 | * @base: linear address corresponding to @res | ||
42 | * @kref: to pin while other agents have a need to do lookups | ||
43 | * @dev: parent device backing this region | ||
44 | * @align: allocation and mapping alignment for child dax devices | ||
45 | * @res: physical address range of the region | ||
46 | * @pfn_flags: identify whether the pfns are paged back or not | ||
47 | */ | ||
48 | struct dax_region { | ||
49 | int id; | ||
50 | struct ida ida; | ||
51 | void *base; | ||
52 | struct kref kref; | ||
53 | struct device *dev; | ||
54 | unsigned int align; | ||
55 | struct resource res; | ||
56 | unsigned long pfn_flags; | ||
57 | }; | ||
58 | 26 | ||
59 | /** | 27 | /* |
60 | * struct dax_dev - subdivision of a dax region | 28 | * Rely on the fact that drvdata is set before the attributes are |
61 | * @region - parent region | 29 | * registered, and that the attributes are unregistered before drvdata |
62 | * @dev - device backing the character device | 30 | * is cleared to assume that drvdata is always valid. |
63 | * @cdev - core chardev data | ||
64 | * @alive - !alive + srcu grace period == no new mappings can be established | ||
65 | * @id - child id in the region | ||
66 | * @num_resources - number of physical address extents in this device | ||
67 | * @res - array of physical address ranges | ||
68 | */ | 31 | */ |
69 | struct dax_dev { | ||
70 | struct dax_region *region; | ||
71 | struct inode *inode; | ||
72 | struct device dev; | ||
73 | struct cdev cdev; | ||
74 | bool alive; | ||
75 | int id; | ||
76 | int num_resources; | ||
77 | struct resource res[0]; | ||
78 | }; | ||
79 | |||
80 | static ssize_t id_show(struct device *dev, | 32 | static ssize_t id_show(struct device *dev, |
81 | struct device_attribute *attr, char *buf) | 33 | struct device_attribute *attr, char *buf) |
82 | { | 34 | { |
83 | struct dax_region *dax_region; | 35 | struct dax_region *dax_region = dev_get_drvdata(dev); |
84 | ssize_t rc = -ENXIO; | ||
85 | |||
86 | device_lock(dev); | ||
87 | dax_region = dev_get_drvdata(dev); | ||
88 | if (dax_region) | ||
89 | rc = sprintf(buf, "%d\n", dax_region->id); | ||
90 | device_unlock(dev); | ||
91 | 36 | ||
92 | return rc; | 37 | return sprintf(buf, "%d\n", dax_region->id); |
93 | } | 38 | } |
94 | static DEVICE_ATTR_RO(id); | 39 | static DEVICE_ATTR_RO(id); |
95 | 40 | ||
96 | static ssize_t region_size_show(struct device *dev, | 41 | static ssize_t region_size_show(struct device *dev, |
97 | struct device_attribute *attr, char *buf) | 42 | struct device_attribute *attr, char *buf) |
98 | { | 43 | { |
99 | struct dax_region *dax_region; | 44 | struct dax_region *dax_region = dev_get_drvdata(dev); |
100 | ssize_t rc = -ENXIO; | ||
101 | |||
102 | device_lock(dev); | ||
103 | dax_region = dev_get_drvdata(dev); | ||
104 | if (dax_region) | ||
105 | rc = sprintf(buf, "%llu\n", (unsigned long long) | ||
106 | resource_size(&dax_region->res)); | ||
107 | device_unlock(dev); | ||
108 | 45 | ||
109 | return rc; | 46 | return sprintf(buf, "%llu\n", (unsigned long long) |
47 | resource_size(&dax_region->res)); | ||
110 | } | 48 | } |
111 | static struct device_attribute dev_attr_region_size = __ATTR(size, 0444, | 49 | static struct device_attribute dev_attr_region_size = __ATTR(size, 0444, |
112 | region_size_show, NULL); | 50 | region_size_show, NULL); |
@@ -114,16 +52,9 @@ static struct device_attribute dev_attr_region_size = __ATTR(size, 0444, | |||
114 | static ssize_t align_show(struct device *dev, | 52 | static ssize_t align_show(struct device *dev, |
115 | struct device_attribute *attr, char *buf) | 53 | struct device_attribute *attr, char *buf) |
116 | { | 54 | { |
117 | struct dax_region *dax_region; | 55 | struct dax_region *dax_region = dev_get_drvdata(dev); |
118 | ssize_t rc = -ENXIO; | ||
119 | |||
120 | device_lock(dev); | ||
121 | dax_region = dev_get_drvdata(dev); | ||
122 | if (dax_region) | ||
123 | rc = sprintf(buf, "%u\n", dax_region->align); | ||
124 | device_unlock(dev); | ||
125 | 56 | ||
126 | return rc; | 57 | return sprintf(buf, "%u\n", dax_region->align); |
127 | } | 58 | } |
128 | static DEVICE_ATTR_RO(align); | 59 | static DEVICE_ATTR_RO(align); |
129 | 60 | ||
@@ -144,117 +75,6 @@ static const struct attribute_group *dax_region_attribute_groups[] = { | |||
144 | NULL, | 75 | NULL, |
145 | }; | 76 | }; |
146 | 77 | ||
147 | static struct inode *dax_alloc_inode(struct super_block *sb) | ||
148 | { | ||
149 | return kmem_cache_alloc(dax_cache, GFP_KERNEL); | ||
150 | } | ||
151 | |||
152 | static void dax_i_callback(struct rcu_head *head) | ||
153 | { | ||
154 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
155 | |||
156 | kmem_cache_free(dax_cache, inode); | ||
157 | } | ||
158 | |||
159 | static void dax_destroy_inode(struct inode *inode) | ||
160 | { | ||
161 | call_rcu(&inode->i_rcu, dax_i_callback); | ||
162 | } | ||
163 | |||
164 | static const struct super_operations dax_sops = { | ||
165 | .statfs = simple_statfs, | ||
166 | .alloc_inode = dax_alloc_inode, | ||
167 | .destroy_inode = dax_destroy_inode, | ||
168 | .drop_inode = generic_delete_inode, | ||
169 | }; | ||
170 | |||
171 | static struct dentry *dax_mount(struct file_system_type *fs_type, | ||
172 | int flags, const char *dev_name, void *data) | ||
173 | { | ||
174 | return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC); | ||
175 | } | ||
176 | |||
177 | static struct file_system_type dax_type = { | ||
178 | .name = "dax", | ||
179 | .mount = dax_mount, | ||
180 | .kill_sb = kill_anon_super, | ||
181 | }; | ||
182 | |||
183 | static int dax_test(struct inode *inode, void *data) | ||
184 | { | ||
185 | return inode->i_cdev == data; | ||
186 | } | ||
187 | |||
188 | static int dax_set(struct inode *inode, void *data) | ||
189 | { | ||
190 | inode->i_cdev = data; | ||
191 | return 0; | ||
192 | } | ||
193 | |||
194 | static struct inode *dax_inode_get(struct cdev *cdev, dev_t devt) | ||
195 | { | ||
196 | struct inode *inode; | ||
197 | |||
198 | inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31), | ||
199 | dax_test, dax_set, cdev); | ||
200 | |||
201 | if (!inode) | ||
202 | return NULL; | ||
203 | |||
204 | if (inode->i_state & I_NEW) { | ||
205 | inode->i_mode = S_IFCHR; | ||
206 | inode->i_flags = S_DAX; | ||
207 | inode->i_rdev = devt; | ||
208 | mapping_set_gfp_mask(&inode->i_data, GFP_USER); | ||
209 | unlock_new_inode(inode); | ||
210 | } | ||
211 | return inode; | ||
212 | } | ||
213 | |||
214 | static void init_once(void *inode) | ||
215 | { | ||
216 | inode_init_once(inode); | ||
217 | } | ||
218 | |||
219 | static int dax_inode_init(void) | ||
220 | { | ||
221 | int rc; | ||
222 | |||
223 | dax_cache = kmem_cache_create("dax_cache", sizeof(struct inode), 0, | ||
224 | (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| | ||
225 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), | ||
226 | init_once); | ||
227 | if (!dax_cache) | ||
228 | return -ENOMEM; | ||
229 | |||
230 | rc = register_filesystem(&dax_type); | ||
231 | if (rc) | ||
232 | goto err_register_fs; | ||
233 | |||
234 | dax_mnt = kern_mount(&dax_type); | ||
235 | if (IS_ERR(dax_mnt)) { | ||
236 | rc = PTR_ERR(dax_mnt); | ||
237 | goto err_mount; | ||
238 | } | ||
239 | dax_superblock = dax_mnt->mnt_sb; | ||
240 | |||
241 | return 0; | ||
242 | |||
243 | err_mount: | ||
244 | unregister_filesystem(&dax_type); | ||
245 | err_register_fs: | ||
246 | kmem_cache_destroy(dax_cache); | ||
247 | |||
248 | return rc; | ||
249 | } | ||
250 | |||
251 | static void dax_inode_exit(void) | ||
252 | { | ||
253 | kern_unmount(dax_mnt); | ||
254 | unregister_filesystem(&dax_type); | ||
255 | kmem_cache_destroy(dax_cache); | ||
256 | } | ||
257 | |||
258 | static void dax_region_free(struct kref *kref) | 78 | static void dax_region_free(struct kref *kref) |
259 | { | 79 | { |
260 | struct dax_region *dax_region; | 80 | struct dax_region *dax_region; |
@@ -323,47 +143,47 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, | |||
323 | } | 143 | } |
324 | EXPORT_SYMBOL_GPL(alloc_dax_region); | 144 | EXPORT_SYMBOL_GPL(alloc_dax_region); |
325 | 145 | ||
326 | static struct dax_dev *to_dax_dev(struct device *dev) | 146 | static struct dev_dax *to_dev_dax(struct device *dev) |
327 | { | 147 | { |
328 | return container_of(dev, struct dax_dev, dev); | 148 | return container_of(dev, struct dev_dax, dev); |
329 | } | 149 | } |
330 | 150 | ||
331 | static ssize_t size_show(struct device *dev, | 151 | static ssize_t size_show(struct device *dev, |
332 | struct device_attribute *attr, char *buf) | 152 | struct device_attribute *attr, char *buf) |
333 | { | 153 | { |
334 | struct dax_dev *dax_dev = to_dax_dev(dev); | 154 | struct dev_dax *dev_dax = to_dev_dax(dev); |
335 | unsigned long long size = 0; | 155 | unsigned long long size = 0; |
336 | int i; | 156 | int i; |
337 | 157 | ||
338 | for (i = 0; i < dax_dev->num_resources; i++) | 158 | for (i = 0; i < dev_dax->num_resources; i++) |
339 | size += resource_size(&dax_dev->res[i]); | 159 | size += resource_size(&dev_dax->res[i]); |
340 | 160 | ||
341 | return sprintf(buf, "%llu\n", size); | 161 | return sprintf(buf, "%llu\n", size); |
342 | } | 162 | } |
343 | static DEVICE_ATTR_RO(size); | 163 | static DEVICE_ATTR_RO(size); |
344 | 164 | ||
345 | static struct attribute *dax_device_attributes[] = { | 165 | static struct attribute *dev_dax_attributes[] = { |
346 | &dev_attr_size.attr, | 166 | &dev_attr_size.attr, |
347 | NULL, | 167 | NULL, |
348 | }; | 168 | }; |
349 | 169 | ||
350 | static const struct attribute_group dax_device_attribute_group = { | 170 | static const struct attribute_group dev_dax_attribute_group = { |
351 | .attrs = dax_device_attributes, | 171 | .attrs = dev_dax_attributes, |
352 | }; | 172 | }; |
353 | 173 | ||
354 | static const struct attribute_group *dax_attribute_groups[] = { | 174 | static const struct attribute_group *dax_attribute_groups[] = { |
355 | &dax_device_attribute_group, | 175 | &dev_dax_attribute_group, |
356 | NULL, | 176 | NULL, |
357 | }; | 177 | }; |
358 | 178 | ||
359 | static int check_vma(struct dax_dev *dax_dev, struct vm_area_struct *vma, | 179 | static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, |
360 | const char *func) | 180 | const char *func) |
361 | { | 181 | { |
362 | struct dax_region *dax_region = dax_dev->region; | 182 | struct dax_region *dax_region = dev_dax->region; |
363 | struct device *dev = &dax_dev->dev; | 183 | struct device *dev = &dev_dax->dev; |
364 | unsigned long mask; | 184 | unsigned long mask; |
365 | 185 | ||
366 | if (!dax_dev->alive) | 186 | if (!dax_alive(dev_dax->dax_dev)) |
367 | return -ENXIO; | 187 | return -ENXIO; |
368 | 188 | ||
369 | /* prevent private mappings from being established */ | 189 | /* prevent private mappings from being established */ |
@@ -397,23 +217,24 @@ static int check_vma(struct dax_dev *dax_dev, struct vm_area_struct *vma, | |||
397 | return 0; | 217 | return 0; |
398 | } | 218 | } |
399 | 219 | ||
400 | static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff, | 220 | /* see "strong" declaration in tools/testing/nvdimm/dax-dev.c */ |
221 | __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, | ||
401 | unsigned long size) | 222 | unsigned long size) |
402 | { | 223 | { |
403 | struct resource *res; | 224 | struct resource *res; |
404 | phys_addr_t phys; | 225 | phys_addr_t phys; |
405 | int i; | 226 | int i; |
406 | 227 | ||
407 | for (i = 0; i < dax_dev->num_resources; i++) { | 228 | for (i = 0; i < dev_dax->num_resources; i++) { |
408 | res = &dax_dev->res[i]; | 229 | res = &dev_dax->res[i]; |
409 | phys = pgoff * PAGE_SIZE + res->start; | 230 | phys = pgoff * PAGE_SIZE + res->start; |
410 | if (phys >= res->start && phys <= res->end) | 231 | if (phys >= res->start && phys <= res->end) |
411 | break; | 232 | break; |
412 | pgoff -= PHYS_PFN(resource_size(res)); | 233 | pgoff -= PHYS_PFN(resource_size(res)); |
413 | } | 234 | } |
414 | 235 | ||
415 | if (i < dax_dev->num_resources) { | 236 | if (i < dev_dax->num_resources) { |
416 | res = &dax_dev->res[i]; | 237 | res = &dev_dax->res[i]; |
417 | if (phys + size - 1 <= res->end) | 238 | if (phys + size - 1 <= res->end) |
418 | return phys; | 239 | return phys; |
419 | } | 240 | } |
@@ -421,28 +242,29 @@ static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff, | |||
421 | return -1; | 242 | return -1; |
422 | } | 243 | } |
423 | 244 | ||
424 | static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) | 245 | static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf) |
425 | { | 246 | { |
426 | struct device *dev = &dax_dev->dev; | 247 | struct device *dev = &dev_dax->dev; |
427 | struct dax_region *dax_region; | 248 | struct dax_region *dax_region; |
428 | int rc = VM_FAULT_SIGBUS; | 249 | int rc = VM_FAULT_SIGBUS; |
429 | phys_addr_t phys; | 250 | phys_addr_t phys; |
430 | pfn_t pfn; | 251 | pfn_t pfn; |
431 | unsigned int fault_size = PAGE_SIZE; | 252 | unsigned int fault_size = PAGE_SIZE; |
432 | 253 | ||
433 | if (check_vma(dax_dev, vmf->vma, __func__)) | 254 | if (check_vma(dev_dax, vmf->vma, __func__)) |
434 | return VM_FAULT_SIGBUS; | 255 | return VM_FAULT_SIGBUS; |
435 | 256 | ||
436 | dax_region = dax_dev->region; | 257 | dax_region = dev_dax->region; |
437 | if (dax_region->align > PAGE_SIZE) { | 258 | if (dax_region->align > PAGE_SIZE) { |
438 | dev_dbg(dev, "%s: alignment > fault size\n", __func__); | 259 | dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n", |
260 | __func__, dax_region->align, fault_size); | ||
439 | return VM_FAULT_SIGBUS; | 261 | return VM_FAULT_SIGBUS; |
440 | } | 262 | } |
441 | 263 | ||
442 | if (fault_size != dax_region->align) | 264 | if (fault_size != dax_region->align) |
443 | return VM_FAULT_SIGBUS; | 265 | return VM_FAULT_SIGBUS; |
444 | 266 | ||
445 | phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE); | 267 | phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE); |
446 | if (phys == -1) { | 268 | if (phys == -1) { |
447 | dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, | 269 | dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, |
448 | vmf->pgoff); | 270 | vmf->pgoff); |
@@ -461,28 +283,29 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) | |||
461 | return VM_FAULT_NOPAGE; | 283 | return VM_FAULT_NOPAGE; |
462 | } | 284 | } |
463 | 285 | ||
464 | static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) | 286 | static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf) |
465 | { | 287 | { |
466 | unsigned long pmd_addr = vmf->address & PMD_MASK; | 288 | unsigned long pmd_addr = vmf->address & PMD_MASK; |
467 | struct device *dev = &dax_dev->dev; | 289 | struct device *dev = &dev_dax->dev; |
468 | struct dax_region *dax_region; | 290 | struct dax_region *dax_region; |
469 | phys_addr_t phys; | 291 | phys_addr_t phys; |
470 | pgoff_t pgoff; | 292 | pgoff_t pgoff; |
471 | pfn_t pfn; | 293 | pfn_t pfn; |
472 | unsigned int fault_size = PMD_SIZE; | 294 | unsigned int fault_size = PMD_SIZE; |
473 | 295 | ||
474 | if (check_vma(dax_dev, vmf->vma, __func__)) | 296 | if (check_vma(dev_dax, vmf->vma, __func__)) |
475 | return VM_FAULT_SIGBUS; | 297 | return VM_FAULT_SIGBUS; |
476 | 298 | ||
477 | dax_region = dax_dev->region; | 299 | dax_region = dev_dax->region; |
478 | if (dax_region->align > PMD_SIZE) { | 300 | if (dax_region->align > PMD_SIZE) { |
479 | dev_dbg(dev, "%s: alignment > fault size\n", __func__); | 301 | dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n", |
302 | __func__, dax_region->align, fault_size); | ||
480 | return VM_FAULT_SIGBUS; | 303 | return VM_FAULT_SIGBUS; |
481 | } | 304 | } |
482 | 305 | ||
483 | /* dax pmd mappings require pfn_t_devmap() */ | 306 | /* dax pmd mappings require pfn_t_devmap() */ |
484 | if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) { | 307 | if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) { |
485 | dev_dbg(dev, "%s: alignment > fault size\n", __func__); | 308 | dev_dbg(dev, "%s: region lacks devmap flags\n", __func__); |
486 | return VM_FAULT_SIGBUS; | 309 | return VM_FAULT_SIGBUS; |
487 | } | 310 | } |
488 | 311 | ||
@@ -497,7 +320,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) | |||
497 | return VM_FAULT_SIGBUS; | 320 | return VM_FAULT_SIGBUS; |
498 | 321 | ||
499 | pgoff = linear_page_index(vmf->vma, pmd_addr); | 322 | pgoff = linear_page_index(vmf->vma, pmd_addr); |
500 | phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE); | 323 | phys = dax_pgoff_to_phys(dev_dax, pgoff, PMD_SIZE); |
501 | if (phys == -1) { | 324 | if (phys == -1) { |
502 | dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, | 325 | dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, |
503 | pgoff); | 326 | pgoff); |
@@ -511,10 +334,10 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) | |||
511 | } | 334 | } |
512 | 335 | ||
513 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD | 336 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD |
514 | static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) | 337 | static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf) |
515 | { | 338 | { |
516 | unsigned long pud_addr = vmf->address & PUD_MASK; | 339 | unsigned long pud_addr = vmf->address & PUD_MASK; |
517 | struct device *dev = &dax_dev->dev; | 340 | struct device *dev = &dev_dax->dev; |
518 | struct dax_region *dax_region; | 341 | struct dax_region *dax_region; |
519 | phys_addr_t phys; | 342 | phys_addr_t phys; |
520 | pgoff_t pgoff; | 343 | pgoff_t pgoff; |
@@ -522,18 +345,19 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) | |||
522 | unsigned int fault_size = PUD_SIZE; | 345 | unsigned int fault_size = PUD_SIZE; |
523 | 346 | ||
524 | 347 | ||
525 | if (check_vma(dax_dev, vmf->vma, __func__)) | 348 | if (check_vma(dev_dax, vmf->vma, __func__)) |
526 | return VM_FAULT_SIGBUS; | 349 | return VM_FAULT_SIGBUS; |
527 | 350 | ||
528 | dax_region = dax_dev->region; | 351 | dax_region = dev_dax->region; |
529 | if (dax_region->align > PUD_SIZE) { | 352 | if (dax_region->align > PUD_SIZE) { |
530 | dev_dbg(dev, "%s: alignment > fault size\n", __func__); | 353 | dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n", |
354 | __func__, dax_region->align, fault_size); | ||
531 | return VM_FAULT_SIGBUS; | 355 | return VM_FAULT_SIGBUS; |
532 | } | 356 | } |
533 | 357 | ||
534 | /* dax pud mappings require pfn_t_devmap() */ | 358 | /* dax pud mappings require pfn_t_devmap() */ |
535 | if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) { | 359 | if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) { |
536 | dev_dbg(dev, "%s: alignment > fault size\n", __func__); | 360 | dev_dbg(dev, "%s: region lacks devmap flags\n", __func__); |
537 | return VM_FAULT_SIGBUS; | 361 | return VM_FAULT_SIGBUS; |
538 | } | 362 | } |
539 | 363 | ||
@@ -548,7 +372,7 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) | |||
548 | return VM_FAULT_SIGBUS; | 372 | return VM_FAULT_SIGBUS; |
549 | 373 | ||
550 | pgoff = linear_page_index(vmf->vma, pud_addr); | 374 | pgoff = linear_page_index(vmf->vma, pud_addr); |
551 | phys = pgoff_to_phys(dax_dev, pgoff, PUD_SIZE); | 375 | phys = dax_pgoff_to_phys(dev_dax, pgoff, PUD_SIZE); |
552 | if (phys == -1) { | 376 | if (phys == -1) { |
553 | dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, | 377 | dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, |
554 | pgoff); | 378 | pgoff); |
@@ -561,65 +385,71 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) | |||
561 | vmf->flags & FAULT_FLAG_WRITE); | 385 | vmf->flags & FAULT_FLAG_WRITE); |
562 | } | 386 | } |
563 | #else | 387 | #else |
564 | static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) | 388 | static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf) |
565 | { | 389 | { |
566 | return VM_FAULT_FALLBACK; | 390 | return VM_FAULT_FALLBACK; |
567 | } | 391 | } |
568 | #endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ | 392 | #endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ |
569 | 393 | ||
570 | static int dax_dev_huge_fault(struct vm_fault *vmf, | 394 | static int dev_dax_huge_fault(struct vm_fault *vmf, |
571 | enum page_entry_size pe_size) | 395 | enum page_entry_size pe_size) |
572 | { | 396 | { |
573 | int rc, id; | 397 | int rc, id; |
574 | struct file *filp = vmf->vma->vm_file; | 398 | struct file *filp = vmf->vma->vm_file; |
575 | struct dax_dev *dax_dev = filp->private_data; | 399 | struct dev_dax *dev_dax = filp->private_data; |
576 | 400 | ||
577 | dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__, | 401 | dev_dbg(&dev_dax->dev, "%s: %s: %s (%#lx - %#lx) size = %d\n", __func__, |
578 | current->comm, (vmf->flags & FAULT_FLAG_WRITE) | 402 | current->comm, (vmf->flags & FAULT_FLAG_WRITE) |
579 | ? "write" : "read", | 403 | ? "write" : "read", |
580 | vmf->vma->vm_start, vmf->vma->vm_end); | 404 | vmf->vma->vm_start, vmf->vma->vm_end, pe_size); |
581 | 405 | ||
582 | id = srcu_read_lock(&dax_srcu); | 406 | id = dax_read_lock(); |
583 | switch (pe_size) { | 407 | switch (pe_size) { |
584 | case PE_SIZE_PTE: | 408 | case PE_SIZE_PTE: |
585 | rc = __dax_dev_pte_fault(dax_dev, vmf); | 409 | rc = __dev_dax_pte_fault(dev_dax, vmf); |
586 | break; | 410 | break; |
587 | case PE_SIZE_PMD: | 411 | case PE_SIZE_PMD: |
588 | rc = __dax_dev_pmd_fault(dax_dev, vmf); | 412 | rc = __dev_dax_pmd_fault(dev_dax, vmf); |
589 | break; | 413 | break; |
590 | case PE_SIZE_PUD: | 414 | case PE_SIZE_PUD: |
591 | rc = __dax_dev_pud_fault(dax_dev, vmf); | 415 | rc = __dev_dax_pud_fault(dev_dax, vmf); |
592 | break; | 416 | break; |
593 | default: | 417 | default: |
594 | return VM_FAULT_FALLBACK; | 418 | rc = VM_FAULT_SIGBUS; |
595 | } | 419 | } |
596 | srcu_read_unlock(&dax_srcu, id); | 420 | dax_read_unlock(id); |
597 | 421 | ||
598 | return rc; | 422 | return rc; |
599 | } | 423 | } |
600 | 424 | ||
601 | static int dax_dev_fault(struct vm_fault *vmf) | 425 | static int dev_dax_fault(struct vm_fault *vmf) |
602 | { | 426 | { |
603 | return dax_dev_huge_fault(vmf, PE_SIZE_PTE); | 427 | return dev_dax_huge_fault(vmf, PE_SIZE_PTE); |
604 | } | 428 | } |
605 | 429 | ||
606 | static const struct vm_operations_struct dax_dev_vm_ops = { | 430 | static const struct vm_operations_struct dax_vm_ops = { |
607 | .fault = dax_dev_fault, | 431 | .fault = dev_dax_fault, |
608 | .huge_fault = dax_dev_huge_fault, | 432 | .huge_fault = dev_dax_huge_fault, |
609 | }; | 433 | }; |
610 | 434 | ||
611 | static int dax_mmap(struct file *filp, struct vm_area_struct *vma) | 435 | static int dax_mmap(struct file *filp, struct vm_area_struct *vma) |
612 | { | 436 | { |
613 | struct dax_dev *dax_dev = filp->private_data; | 437 | struct dev_dax *dev_dax = filp->private_data; |
614 | int rc; | 438 | int rc, id; |
615 | 439 | ||
616 | dev_dbg(&dax_dev->dev, "%s\n", __func__); | 440 | dev_dbg(&dev_dax->dev, "%s\n", __func__); |
617 | 441 | ||
618 | rc = check_vma(dax_dev, vma, __func__); | 442 | /* |
443 | * We lock to check dax_dev liveness and will re-check at | ||
444 | * fault time. | ||
445 | */ | ||
446 | id = dax_read_lock(); | ||
447 | rc = check_vma(dev_dax, vma, __func__); | ||
448 | dax_read_unlock(id); | ||
619 | if (rc) | 449 | if (rc) |
620 | return rc; | 450 | return rc; |
621 | 451 | ||
622 | vma->vm_ops = &dax_dev_vm_ops; | 452 | vma->vm_ops = &dax_vm_ops; |
623 | vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; | 453 | vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; |
624 | return 0; | 454 | return 0; |
625 | } | 455 | } |
@@ -630,13 +460,13 @@ static unsigned long dax_get_unmapped_area(struct file *filp, | |||
630 | unsigned long flags) | 460 | unsigned long flags) |
631 | { | 461 | { |
632 | unsigned long off, off_end, off_align, len_align, addr_align, align; | 462 | unsigned long off, off_end, off_align, len_align, addr_align, align; |
633 | struct dax_dev *dax_dev = filp ? filp->private_data : NULL; | 463 | struct dev_dax *dev_dax = filp ? filp->private_data : NULL; |
634 | struct dax_region *dax_region; | 464 | struct dax_region *dax_region; |
635 | 465 | ||
636 | if (!dax_dev || addr) | 466 | if (!dev_dax || addr) |
637 | goto out; | 467 | goto out; |
638 | 468 | ||
639 | dax_region = dax_dev->region; | 469 | dax_region = dev_dax->region; |
640 | align = dax_region->align; | 470 | align = dax_region->align; |
641 | off = pgoff << PAGE_SHIFT; | 471 | off = pgoff << PAGE_SHIFT; |
642 | off_end = off + len; | 472 | off_end = off + len; |
@@ -661,14 +491,15 @@ static unsigned long dax_get_unmapped_area(struct file *filp, | |||
661 | 491 | ||
662 | static int dax_open(struct inode *inode, struct file *filp) | 492 | static int dax_open(struct inode *inode, struct file *filp) |
663 | { | 493 | { |
664 | struct dax_dev *dax_dev; | 494 | struct dax_device *dax_dev = inode_dax(inode); |
495 | struct inode *__dax_inode = dax_inode(dax_dev); | ||
496 | struct dev_dax *dev_dax = dax_get_private(dax_dev); | ||
665 | 497 | ||
666 | dax_dev = container_of(inode->i_cdev, struct dax_dev, cdev); | 498 | dev_dbg(&dev_dax->dev, "%s\n", __func__); |
667 | dev_dbg(&dax_dev->dev, "%s\n", __func__); | 499 | inode->i_mapping = __dax_inode->i_mapping; |
668 | inode->i_mapping = dax_dev->inode->i_mapping; | 500 | inode->i_mapping->host = __dax_inode; |
669 | inode->i_mapping->host = dax_dev->inode; | ||
670 | filp->f_mapping = inode->i_mapping; | 501 | filp->f_mapping = inode->i_mapping; |
671 | filp->private_data = dax_dev; | 502 | filp->private_data = dev_dax; |
672 | inode->i_flags = S_DAX; | 503 | inode->i_flags = S_DAX; |
673 | 504 | ||
674 | return 0; | 505 | return 0; |
@@ -676,9 +507,9 @@ static int dax_open(struct inode *inode, struct file *filp) | |||
676 | 507 | ||
677 | static int dax_release(struct inode *inode, struct file *filp) | 508 | static int dax_release(struct inode *inode, struct file *filp) |
678 | { | 509 | { |
679 | struct dax_dev *dax_dev = filp->private_data; | 510 | struct dev_dax *dev_dax = filp->private_data; |
680 | 511 | ||
681 | dev_dbg(&dax_dev->dev, "%s\n", __func__); | 512 | dev_dbg(&dev_dax->dev, "%s\n", __func__); |
682 | return 0; | 513 | return 0; |
683 | } | 514 | } |
684 | 515 | ||
@@ -691,55 +522,54 @@ static const struct file_operations dax_fops = { | |||
691 | .mmap = dax_mmap, | 522 | .mmap = dax_mmap, |
692 | }; | 523 | }; |
693 | 524 | ||
694 | static void dax_dev_release(struct device *dev) | 525 | static void dev_dax_release(struct device *dev) |
695 | { | 526 | { |
696 | struct dax_dev *dax_dev = to_dax_dev(dev); | 527 | struct dev_dax *dev_dax = to_dev_dax(dev); |
697 | struct dax_region *dax_region = dax_dev->region; | 528 | struct dax_region *dax_region = dev_dax->region; |
529 | struct dax_device *dax_dev = dev_dax->dax_dev; | ||
698 | 530 | ||
699 | ida_simple_remove(&dax_region->ida, dax_dev->id); | 531 | ida_simple_remove(&dax_region->ida, dev_dax->id); |
700 | ida_simple_remove(&dax_minor_ida, MINOR(dev->devt)); | ||
701 | dax_region_put(dax_region); | 532 | dax_region_put(dax_region); |
702 | iput(dax_dev->inode); | 533 | put_dax(dax_dev); |
703 | kfree(dax_dev); | 534 | kfree(dev_dax); |
704 | } | 535 | } |
705 | 536 | ||
706 | static void kill_dax_dev(struct dax_dev *dax_dev) | 537 | static void kill_dev_dax(struct dev_dax *dev_dax) |
707 | { | 538 | { |
708 | /* | 539 | struct dax_device *dax_dev = dev_dax->dax_dev; |
709 | * Note, rcu is not protecting the liveness of dax_dev, rcu is | 540 | struct inode *inode = dax_inode(dax_dev); |
710 | * ensuring that any fault handlers that might have seen | 541 | |
711 | * dax_dev->alive == true, have completed. Any fault handlers | 542 | kill_dax(dax_dev); |
712 | * that start after synchronize_srcu() has started will abort | 543 | unmap_mapping_range(inode->i_mapping, 0, 0, 1); |
713 | * upon seeing dax_dev->alive == false. | ||
714 | */ | ||
715 | dax_dev->alive = false; | ||
716 | synchronize_srcu(&dax_srcu); | ||
717 | unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1); | ||
718 | } | 544 | } |
719 | 545 | ||
720 | static void unregister_dax_dev(void *dev) | 546 | static void unregister_dev_dax(void *dev) |
721 | { | 547 | { |
722 | struct dax_dev *dax_dev = to_dax_dev(dev); | 548 | struct dev_dax *dev_dax = to_dev_dax(dev); |
549 | struct dax_device *dax_dev = dev_dax->dax_dev; | ||
550 | struct inode *inode = dax_inode(dax_dev); | ||
551 | struct cdev *cdev = inode->i_cdev; | ||
723 | 552 | ||
724 | dev_dbg(dev, "%s\n", __func__); | 553 | dev_dbg(dev, "%s\n", __func__); |
725 | 554 | ||
726 | kill_dax_dev(dax_dev); | 555 | kill_dev_dax(dev_dax); |
727 | cdev_device_del(&dax_dev->cdev, dev); | 556 | cdev_device_del(cdev, dev); |
728 | put_device(dev); | 557 | put_device(dev); |
729 | } | 558 | } |
730 | 559 | ||
731 | struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region, | 560 | struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, |
732 | struct resource *res, int count) | 561 | struct resource *res, int count) |
733 | { | 562 | { |
734 | struct device *parent = dax_region->dev; | 563 | struct device *parent = dax_region->dev; |
735 | struct dax_dev *dax_dev; | 564 | struct dax_device *dax_dev; |
736 | int rc = 0, minor, i; | 565 | struct dev_dax *dev_dax; |
566 | struct inode *inode; | ||
737 | struct device *dev; | 567 | struct device *dev; |
738 | struct cdev *cdev; | 568 | struct cdev *cdev; |
739 | dev_t dev_t; | 569 | int rc = 0, i; |
740 | 570 | ||
741 | dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL); | 571 | dev_dax = kzalloc(sizeof(*dev_dax) + sizeof(*res) * count, GFP_KERNEL); |
742 | if (!dax_dev) | 572 | if (!dev_dax) |
743 | return ERR_PTR(-ENOMEM); | 573 | return ERR_PTR(-ENOMEM); |
744 | 574 | ||
745 | for (i = 0; i < count; i++) { | 575 | for (i = 0; i < count; i++) { |
@@ -749,110 +579,79 @@ struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region, | |||
749 | rc = -EINVAL; | 579 | rc = -EINVAL; |
750 | break; | 580 | break; |
751 | } | 581 | } |
752 | dax_dev->res[i].start = res[i].start; | 582 | dev_dax->res[i].start = res[i].start; |
753 | dax_dev->res[i].end = res[i].end; | 583 | dev_dax->res[i].end = res[i].end; |
754 | } | 584 | } |
755 | 585 | ||
756 | if (i < count) | 586 | if (i < count) |
757 | goto err_id; | 587 | goto err_id; |
758 | 588 | ||
759 | dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL); | 589 | dev_dax->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL); |
760 | if (dax_dev->id < 0) { | 590 | if (dev_dax->id < 0) { |
761 | rc = dax_dev->id; | 591 | rc = dev_dax->id; |
762 | goto err_id; | 592 | goto err_id; |
763 | } | 593 | } |
764 | 594 | ||
765 | minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL); | 595 | /* |
766 | if (minor < 0) { | 596 | * No 'host' or dax_operations since there is no access to this |
767 | rc = minor; | 597 | * device outside of mmap of the resulting character device. |
768 | goto err_minor; | 598 | */ |
769 | } | 599 | dax_dev = alloc_dax(dev_dax, NULL, NULL); |
770 | 600 | if (!dax_dev) | |
771 | dev_t = MKDEV(MAJOR(dax_devt), minor); | 601 | goto err_dax; |
772 | dev = &dax_dev->dev; | ||
773 | dax_dev->inode = dax_inode_get(&dax_dev->cdev, dev_t); | ||
774 | if (!dax_dev->inode) { | ||
775 | rc = -ENOMEM; | ||
776 | goto err_inode; | ||
777 | } | ||
778 | 602 | ||
779 | /* from here on we're committed to teardown via dax_dev_release() */ | 603 | /* from here on we're committed to teardown via dax_dev_release() */ |
604 | dev = &dev_dax->dev; | ||
780 | device_initialize(dev); | 605 | device_initialize(dev); |
781 | 606 | ||
782 | cdev = &dax_dev->cdev; | 607 | inode = dax_inode(dax_dev); |
608 | cdev = inode->i_cdev; | ||
783 | cdev_init(cdev, &dax_fops); | 609 | cdev_init(cdev, &dax_fops); |
784 | cdev->owner = parent->driver->owner; | 610 | cdev->owner = parent->driver->owner; |
785 | 611 | ||
786 | dax_dev->num_resources = count; | 612 | dev_dax->num_resources = count; |
787 | dax_dev->alive = true; | 613 | dev_dax->dax_dev = dax_dev; |
788 | dax_dev->region = dax_region; | 614 | dev_dax->region = dax_region; |
789 | kref_get(&dax_region->kref); | 615 | kref_get(&dax_region->kref); |
790 | 616 | ||
791 | dev->devt = dev_t; | 617 | dev->devt = inode->i_rdev; |
792 | dev->class = dax_class; | 618 | dev->class = dax_class; |
793 | dev->parent = parent; | 619 | dev->parent = parent; |
794 | dev->groups = dax_attribute_groups; | 620 | dev->groups = dax_attribute_groups; |
795 | dev->release = dax_dev_release; | 621 | dev->release = dev_dax_release; |
796 | dev_set_name(dev, "dax%d.%d", dax_region->id, dax_dev->id); | 622 | dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id); |
797 | 623 | ||
798 | rc = cdev_device_add(cdev, dev); | 624 | rc = cdev_device_add(cdev, dev); |
799 | if (rc) { | 625 | if (rc) { |
800 | kill_dax_dev(dax_dev); | 626 | kill_dev_dax(dev_dax); |
801 | put_device(dev); | 627 | put_device(dev); |
802 | return ERR_PTR(rc); | 628 | return ERR_PTR(rc); |
803 | } | 629 | } |
804 | 630 | ||
805 | rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev); | 631 | rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev); |
806 | if (rc) | 632 | if (rc) |
807 | return ERR_PTR(rc); | 633 | return ERR_PTR(rc); |
808 | 634 | ||
809 | return dax_dev; | 635 | return dev_dax; |
810 | 636 | ||
811 | err_inode: | 637 | err_dax: |
812 | ida_simple_remove(&dax_minor_ida, minor); | 638 | ida_simple_remove(&dax_region->ida, dev_dax->id); |
813 | err_minor: | ||
814 | ida_simple_remove(&dax_region->ida, dax_dev->id); | ||
815 | err_id: | 639 | err_id: |
816 | kfree(dax_dev); | 640 | kfree(dev_dax); |
817 | 641 | ||
818 | return ERR_PTR(rc); | 642 | return ERR_PTR(rc); |
819 | } | 643 | } |
820 | EXPORT_SYMBOL_GPL(devm_create_dax_dev); | 644 | EXPORT_SYMBOL_GPL(devm_create_dev_dax); |
821 | 645 | ||
822 | static int __init dax_init(void) | 646 | static int __init dax_init(void) |
823 | { | 647 | { |
824 | int rc; | ||
825 | |||
826 | rc = dax_inode_init(); | ||
827 | if (rc) | ||
828 | return rc; | ||
829 | |||
830 | nr_dax = max(nr_dax, 256); | ||
831 | rc = alloc_chrdev_region(&dax_devt, 0, nr_dax, "dax"); | ||
832 | if (rc) | ||
833 | goto err_chrdev; | ||
834 | |||
835 | dax_class = class_create(THIS_MODULE, "dax"); | 648 | dax_class = class_create(THIS_MODULE, "dax"); |
836 | if (IS_ERR(dax_class)) { | 649 | return PTR_ERR_OR_ZERO(dax_class); |
837 | rc = PTR_ERR(dax_class); | ||
838 | goto err_class; | ||
839 | } | ||
840 | |||
841 | return 0; | ||
842 | |||
843 | err_class: | ||
844 | unregister_chrdev_region(dax_devt, nr_dax); | ||
845 | err_chrdev: | ||
846 | dax_inode_exit(); | ||
847 | return rc; | ||
848 | } | 650 | } |
849 | 651 | ||
850 | static void __exit dax_exit(void) | 652 | static void __exit dax_exit(void) |
851 | { | 653 | { |
852 | class_destroy(dax_class); | 654 | class_destroy(dax_class); |
853 | unregister_chrdev_region(dax_devt, nr_dax); | ||
854 | ida_destroy(&dax_minor_ida); | ||
855 | dax_inode_exit(); | ||
856 | } | 655 | } |
857 | 656 | ||
858 | MODULE_AUTHOR("Intel Corporation"); | 657 | MODULE_AUTHOR("Intel Corporation"); |
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c index cb0d742fa23f..9f2a0b4fd801 100644 --- a/drivers/dax/pmem.c +++ b/drivers/dax/pmem.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include <linux/pfn_t.h> | 16 | #include <linux/pfn_t.h> |
17 | #include "../nvdimm/pfn.h" | 17 | #include "../nvdimm/pfn.h" |
18 | #include "../nvdimm/nd.h" | 18 | #include "../nvdimm/nd.h" |
19 | #include "dax.h" | 19 | #include "device-dax.h" |
20 | 20 | ||
21 | struct dax_pmem { | 21 | struct dax_pmem { |
22 | struct device *dev; | 22 | struct device *dev; |
@@ -61,8 +61,8 @@ static int dax_pmem_probe(struct device *dev) | |||
61 | int rc; | 61 | int rc; |
62 | void *addr; | 62 | void *addr; |
63 | struct resource res; | 63 | struct resource res; |
64 | struct dax_dev *dax_dev; | ||
65 | struct nd_pfn_sb *pfn_sb; | 64 | struct nd_pfn_sb *pfn_sb; |
65 | struct dev_dax *dev_dax; | ||
66 | struct dax_pmem *dax_pmem; | 66 | struct dax_pmem *dax_pmem; |
67 | struct nd_region *nd_region; | 67 | struct nd_region *nd_region; |
68 | struct nd_namespace_io *nsio; | 68 | struct nd_namespace_io *nsio; |
@@ -130,12 +130,12 @@ static int dax_pmem_probe(struct device *dev) | |||
130 | return -ENOMEM; | 130 | return -ENOMEM; |
131 | 131 | ||
132 | /* TODO: support for subdividing a dax region... */ | 132 | /* TODO: support for subdividing a dax region... */ |
133 | dax_dev = devm_create_dax_dev(dax_region, &res, 1); | 133 | dev_dax = devm_create_dev_dax(dax_region, &res, 1); |
134 | 134 | ||
135 | /* child dax_dev instances now own the lifetime of the dax_region */ | 135 | /* child dev_dax instances now own the lifetime of the dax_region */ |
136 | dax_region_put(dax_region); | 136 | dax_region_put(dax_region); |
137 | 137 | ||
138 | return PTR_ERR_OR_ZERO(dax_dev); | 138 | return PTR_ERR_OR_ZERO(dev_dax); |
139 | } | 139 | } |
140 | 140 | ||
141 | static struct nd_device_driver dax_pmem_driver = { | 141 | static struct nd_device_driver dax_pmem_driver = { |
diff --git a/drivers/dax/super.c b/drivers/dax/super.c new file mode 100644 index 000000000000..465dcd7317d5 --- /dev/null +++ b/drivers/dax/super.c | |||
@@ -0,0 +1,425 @@ | |||
1 | /* | ||
2 | * Copyright(c) 2017 Intel Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | */ | ||
13 | #include <linux/pagemap.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/mount.h> | ||
16 | #include <linux/magic.h> | ||
17 | #include <linux/cdev.h> | ||
18 | #include <linux/hash.h> | ||
19 | #include <linux/slab.h> | ||
20 | #include <linux/dax.h> | ||
21 | #include <linux/fs.h> | ||
22 | |||
23 | static int nr_dax = CONFIG_NR_DEV_DAX; | ||
24 | module_param(nr_dax, int, S_IRUGO); | ||
25 | MODULE_PARM_DESC(nr_dax, "max number of dax device instances"); | ||
26 | |||
27 | static dev_t dax_devt; | ||
28 | DEFINE_STATIC_SRCU(dax_srcu); | ||
29 | static struct vfsmount *dax_mnt; | ||
30 | static DEFINE_IDA(dax_minor_ida); | ||
31 | static struct kmem_cache *dax_cache __read_mostly; | ||
32 | static struct super_block *dax_superblock __read_mostly; | ||
33 | |||
34 | #define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head)) | ||
35 | static struct hlist_head dax_host_list[DAX_HASH_SIZE]; | ||
36 | static DEFINE_SPINLOCK(dax_host_lock); | ||
37 | |||
38 | int dax_read_lock(void) | ||
39 | { | ||
40 | return srcu_read_lock(&dax_srcu); | ||
41 | } | ||
42 | EXPORT_SYMBOL_GPL(dax_read_lock); | ||
43 | |||
44 | void dax_read_unlock(int id) | ||
45 | { | ||
46 | srcu_read_unlock(&dax_srcu, id); | ||
47 | } | ||
48 | EXPORT_SYMBOL_GPL(dax_read_unlock); | ||
49 | |||
50 | /** | ||
51 | * struct dax_device - anchor object for dax services | ||
52 | * @inode: core vfs | ||
53 | * @cdev: optional character interface for "device dax" | ||
54 | * @host: optional name for lookups where the device path is not available | ||
55 | * @private: dax driver private data | ||
56 | * @alive: !alive + rcu grace period == no new operations / mappings | ||
57 | */ | ||
58 | struct dax_device { | ||
59 | struct hlist_node list; | ||
60 | struct inode inode; | ||
61 | struct cdev cdev; | ||
62 | const char *host; | ||
63 | void *private; | ||
64 | bool alive; | ||
65 | const struct dax_operations *ops; | ||
66 | }; | ||
67 | |||
68 | /** | ||
69 | * dax_direct_access() - translate a device pgoff to an absolute pfn | ||
70 | * @dax_dev: a dax_device instance representing the logical memory range | ||
71 | * @pgoff: offset in pages from the start of the device to translate | ||
72 | * @nr_pages: number of consecutive pages caller can handle relative to @pfn | ||
73 | * @kaddr: output parameter that returns a virtual address mapping of pfn | ||
74 | * @pfn: output parameter that returns an absolute pfn translation of @pgoff | ||
75 | * | ||
76 | * Return: negative errno if an error occurs, otherwise the number of | ||
77 | * pages accessible at the device relative @pgoff. | ||
78 | */ | ||
79 | long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, | ||
80 | void **kaddr, pfn_t *pfn) | ||
81 | { | ||
82 | long avail; | ||
83 | |||
84 | /* | ||
85 | * The device driver is allowed to sleep, in order to make the | ||
86 | * memory directly accessible. | ||
87 | */ | ||
88 | might_sleep(); | ||
89 | |||
90 | if (!dax_dev) | ||
91 | return -EOPNOTSUPP; | ||
92 | |||
93 | if (!dax_alive(dax_dev)) | ||
94 | return -ENXIO; | ||
95 | |||
96 | if (nr_pages < 0) | ||
97 | return nr_pages; | ||
98 | |||
99 | avail = dax_dev->ops->direct_access(dax_dev, pgoff, nr_pages, | ||
100 | kaddr, pfn); | ||
101 | if (!avail) | ||
102 | return -ERANGE; | ||
103 | return min(avail, nr_pages); | ||
104 | } | ||
105 | EXPORT_SYMBOL_GPL(dax_direct_access); | ||
106 | |||
107 | bool dax_alive(struct dax_device *dax_dev) | ||
108 | { | ||
109 | lockdep_assert_held(&dax_srcu); | ||
110 | return dax_dev->alive; | ||
111 | } | ||
112 | EXPORT_SYMBOL_GPL(dax_alive); | ||
113 | |||
114 | static int dax_host_hash(const char *host) | ||
115 | { | ||
116 | return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE; | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * Note, rcu is not protecting the liveness of dax_dev, rcu is ensuring | ||
121 | * that any fault handlers or operations that might have seen | ||
122 | * dax_alive(), have completed. Any operations that start after | ||
123 | * synchronize_srcu() has run will abort upon seeing !dax_alive(). | ||
124 | */ | ||
125 | void kill_dax(struct dax_device *dax_dev) | ||
126 | { | ||
127 | if (!dax_dev) | ||
128 | return; | ||
129 | |||
130 | dax_dev->alive = false; | ||
131 | |||
132 | synchronize_srcu(&dax_srcu); | ||
133 | |||
134 | spin_lock(&dax_host_lock); | ||
135 | hlist_del_init(&dax_dev->list); | ||
136 | spin_unlock(&dax_host_lock); | ||
137 | |||
138 | dax_dev->private = NULL; | ||
139 | } | ||
140 | EXPORT_SYMBOL_GPL(kill_dax); | ||
141 | |||
142 | static struct inode *dax_alloc_inode(struct super_block *sb) | ||
143 | { | ||
144 | struct dax_device *dax_dev; | ||
145 | |||
146 | dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL); | ||
147 | return &dax_dev->inode; | ||
148 | } | ||
149 | |||
150 | static struct dax_device *to_dax_dev(struct inode *inode) | ||
151 | { | ||
152 | return container_of(inode, struct dax_device, inode); | ||
153 | } | ||
154 | |||
155 | static void dax_i_callback(struct rcu_head *head) | ||
156 | { | ||
157 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
158 | struct dax_device *dax_dev = to_dax_dev(inode); | ||
159 | |||
160 | kfree(dax_dev->host); | ||
161 | dax_dev->host = NULL; | ||
162 | ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev)); | ||
163 | kmem_cache_free(dax_cache, dax_dev); | ||
164 | } | ||
165 | |||
166 | static void dax_destroy_inode(struct inode *inode) | ||
167 | { | ||
168 | struct dax_device *dax_dev = to_dax_dev(inode); | ||
169 | |||
170 | WARN_ONCE(dax_dev->alive, | ||
171 | "kill_dax() must be called before final iput()\n"); | ||
172 | call_rcu(&inode->i_rcu, dax_i_callback); | ||
173 | } | ||
174 | |||
175 | static const struct super_operations dax_sops = { | ||
176 | .statfs = simple_statfs, | ||
177 | .alloc_inode = dax_alloc_inode, | ||
178 | .destroy_inode = dax_destroy_inode, | ||
179 | .drop_inode = generic_delete_inode, | ||
180 | }; | ||
181 | |||
182 | static struct dentry *dax_mount(struct file_system_type *fs_type, | ||
183 | int flags, const char *dev_name, void *data) | ||
184 | { | ||
185 | return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC); | ||
186 | } | ||
187 | |||
188 | static struct file_system_type dax_fs_type = { | ||
189 | .name = "dax", | ||
190 | .mount = dax_mount, | ||
191 | .kill_sb = kill_anon_super, | ||
192 | }; | ||
193 | |||
194 | static int dax_test(struct inode *inode, void *data) | ||
195 | { | ||
196 | dev_t devt = *(dev_t *) data; | ||
197 | |||
198 | return inode->i_rdev == devt; | ||
199 | } | ||
200 | |||
201 | static int dax_set(struct inode *inode, void *data) | ||
202 | { | ||
203 | dev_t devt = *(dev_t *) data; | ||
204 | |||
205 | inode->i_rdev = devt; | ||
206 | return 0; | ||
207 | } | ||
208 | |||
209 | static struct dax_device *dax_dev_get(dev_t devt) | ||
210 | { | ||
211 | struct dax_device *dax_dev; | ||
212 | struct inode *inode; | ||
213 | |||
214 | inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31), | ||
215 | dax_test, dax_set, &devt); | ||
216 | |||
217 | if (!inode) | ||
218 | return NULL; | ||
219 | |||
220 | dax_dev = to_dax_dev(inode); | ||
221 | if (inode->i_state & I_NEW) { | ||
222 | dax_dev->alive = true; | ||
223 | inode->i_cdev = &dax_dev->cdev; | ||
224 | inode->i_mode = S_IFCHR; | ||
225 | inode->i_flags = S_DAX; | ||
226 | mapping_set_gfp_mask(&inode->i_data, GFP_USER); | ||
227 | unlock_new_inode(inode); | ||
228 | } | ||
229 | |||
230 | return dax_dev; | ||
231 | } | ||
232 | |||
233 | static void dax_add_host(struct dax_device *dax_dev, const char *host) | ||
234 | { | ||
235 | int hash; | ||
236 | |||
237 | /* | ||
238 | * Unconditionally init dax_dev since it's coming from a | ||
239 | * non-zeroed slab cache | ||
240 | */ | ||
241 | INIT_HLIST_NODE(&dax_dev->list); | ||
242 | dax_dev->host = host; | ||
243 | if (!host) | ||
244 | return; | ||
245 | |||
246 | hash = dax_host_hash(host); | ||
247 | spin_lock(&dax_host_lock); | ||
248 | hlist_add_head(&dax_dev->list, &dax_host_list[hash]); | ||
249 | spin_unlock(&dax_host_lock); | ||
250 | } | ||
251 | |||
252 | struct dax_device *alloc_dax(void *private, const char *__host, | ||
253 | const struct dax_operations *ops) | ||
254 | { | ||
255 | struct dax_device *dax_dev; | ||
256 | const char *host; | ||
257 | dev_t devt; | ||
258 | int minor; | ||
259 | |||
260 | host = kstrdup(__host, GFP_KERNEL); | ||
261 | if (__host && !host) | ||
262 | return NULL; | ||
263 | |||
264 | minor = ida_simple_get(&dax_minor_ida, 0, nr_dax, GFP_KERNEL); | ||
265 | if (minor < 0) | ||
266 | goto err_minor; | ||
267 | |||
268 | devt = MKDEV(MAJOR(dax_devt), minor); | ||
269 | dax_dev = dax_dev_get(devt); | ||
270 | if (!dax_dev) | ||
271 | goto err_dev; | ||
272 | |||
273 | dax_add_host(dax_dev, host); | ||
274 | dax_dev->ops = ops; | ||
275 | dax_dev->private = private; | ||
276 | return dax_dev; | ||
277 | |||
278 | err_dev: | ||
279 | ida_simple_remove(&dax_minor_ida, minor); | ||
280 | err_minor: | ||
281 | kfree(host); | ||
282 | return NULL; | ||
283 | } | ||
284 | EXPORT_SYMBOL_GPL(alloc_dax); | ||
285 | |||
286 | void put_dax(struct dax_device *dax_dev) | ||
287 | { | ||
288 | if (!dax_dev) | ||
289 | return; | ||
290 | iput(&dax_dev->inode); | ||
291 | } | ||
292 | EXPORT_SYMBOL_GPL(put_dax); | ||
293 | |||
294 | /** | ||
295 | * dax_get_by_host() - temporary lookup mechanism for filesystem-dax | ||
296 | * @host: alternate name for the device registered by a dax driver | ||
297 | */ | ||
298 | struct dax_device *dax_get_by_host(const char *host) | ||
299 | { | ||
300 | struct dax_device *dax_dev, *found = NULL; | ||
301 | int hash, id; | ||
302 | |||
303 | if (!host) | ||
304 | return NULL; | ||
305 | |||
306 | hash = dax_host_hash(host); | ||
307 | |||
308 | id = dax_read_lock(); | ||
309 | spin_lock(&dax_host_lock); | ||
310 | hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) { | ||
311 | if (!dax_alive(dax_dev) | ||
312 | || strcmp(host, dax_dev->host) != 0) | ||
313 | continue; | ||
314 | |||
315 | if (igrab(&dax_dev->inode)) | ||
316 | found = dax_dev; | ||
317 | break; | ||
318 | } | ||
319 | spin_unlock(&dax_host_lock); | ||
320 | dax_read_unlock(id); | ||
321 | |||
322 | return found; | ||
323 | } | ||
324 | EXPORT_SYMBOL_GPL(dax_get_by_host); | ||
325 | |||
326 | /** | ||
327 | * inode_dax: convert a public inode into its dax_dev | ||
328 | * @inode: An inode with i_cdev pointing to a dax_dev | ||
329 | * | ||
330 | * Note this is not equivalent to to_dax_dev() which is for private | ||
331 | * internal use where we know the inode filesystem type == dax_fs_type. | ||
332 | */ | ||
333 | struct dax_device *inode_dax(struct inode *inode) | ||
334 | { | ||
335 | struct cdev *cdev = inode->i_cdev; | ||
336 | |||
337 | return container_of(cdev, struct dax_device, cdev); | ||
338 | } | ||
339 | EXPORT_SYMBOL_GPL(inode_dax); | ||
340 | |||
341 | struct inode *dax_inode(struct dax_device *dax_dev) | ||
342 | { | ||
343 | return &dax_dev->inode; | ||
344 | } | ||
345 | EXPORT_SYMBOL_GPL(dax_inode); | ||
346 | |||
347 | void *dax_get_private(struct dax_device *dax_dev) | ||
348 | { | ||
349 | return dax_dev->private; | ||
350 | } | ||
351 | EXPORT_SYMBOL_GPL(dax_get_private); | ||
352 | |||
353 | static void init_once(void *_dax_dev) | ||
354 | { | ||
355 | struct dax_device *dax_dev = _dax_dev; | ||
356 | struct inode *inode = &dax_dev->inode; | ||
357 | |||
358 | inode_init_once(inode); | ||
359 | } | ||
360 | |||
361 | static int __dax_fs_init(void) | ||
362 | { | ||
363 | int rc; | ||
364 | |||
365 | dax_cache = kmem_cache_create("dax_cache", sizeof(struct dax_device), 0, | ||
366 | (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| | ||
367 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), | ||
368 | init_once); | ||
369 | if (!dax_cache) | ||
370 | return -ENOMEM; | ||
371 | |||
372 | rc = register_filesystem(&dax_fs_type); | ||
373 | if (rc) | ||
374 | goto err_register_fs; | ||
375 | |||
376 | dax_mnt = kern_mount(&dax_fs_type); | ||
377 | if (IS_ERR(dax_mnt)) { | ||
378 | rc = PTR_ERR(dax_mnt); | ||
379 | goto err_mount; | ||
380 | } | ||
381 | dax_superblock = dax_mnt->mnt_sb; | ||
382 | |||
383 | return 0; | ||
384 | |||
385 | err_mount: | ||
386 | unregister_filesystem(&dax_fs_type); | ||
387 | err_register_fs: | ||
388 | kmem_cache_destroy(dax_cache); | ||
389 | |||
390 | return rc; | ||
391 | } | ||
392 | |||
393 | static void __dax_fs_exit(void) | ||
394 | { | ||
395 | kern_unmount(dax_mnt); | ||
396 | unregister_filesystem(&dax_fs_type); | ||
397 | kmem_cache_destroy(dax_cache); | ||
398 | } | ||
399 | |||
400 | static int __init dax_fs_init(void) | ||
401 | { | ||
402 | int rc; | ||
403 | |||
404 | rc = __dax_fs_init(); | ||
405 | if (rc) | ||
406 | return rc; | ||
407 | |||
408 | nr_dax = max(nr_dax, 256); | ||
409 | rc = alloc_chrdev_region(&dax_devt, 0, nr_dax, "dax"); | ||
410 | if (rc) | ||
411 | __dax_fs_exit(); | ||
412 | return rc; | ||
413 | } | ||
414 | |||
415 | static void __exit dax_fs_exit(void) | ||
416 | { | ||
417 | unregister_chrdev_region(dax_devt, nr_dax); | ||
418 | ida_destroy(&dax_minor_ida); | ||
419 | __dax_fs_exit(); | ||
420 | } | ||
421 | |||
422 | MODULE_AUTHOR("Intel Corporation"); | ||
423 | MODULE_LICENSE("GPL v2"); | ||
424 | subsys_initcall(dax_fs_init); | ||
425 | module_exit(dax_fs_exit); | ||
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 7468a22f9d10..349ff8813401 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -200,6 +200,7 @@ config BLK_DEV_DM_BUILTIN | |||
200 | config BLK_DEV_DM | 200 | config BLK_DEV_DM |
201 | tristate "Device mapper support" | 201 | tristate "Device mapper support" |
202 | select BLK_DEV_DM_BUILTIN | 202 | select BLK_DEV_DM_BUILTIN |
203 | select DAX | ||
203 | ---help--- | 204 | ---help--- |
204 | Device-mapper is a low level volume manager. It works by allowing | 205 | Device-mapper is a low level volume manager. It works by allowing |
205 | people to specify mappings for ranges of logical sectors. Various | 206 | people to specify mappings for ranges of logical sectors. Various |
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 97db4d11c05a..52ca8d059e82 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h | |||
@@ -58,6 +58,7 @@ struct mapped_device { | |||
58 | struct target_type *immutable_target_type; | 58 | struct target_type *immutable_target_type; |
59 | 59 | ||
60 | struct gendisk *disk; | 60 | struct gendisk *disk; |
61 | struct dax_device *dax_dev; | ||
61 | char name[16]; | 62 | char name[16]; |
62 | 63 | ||
63 | void *interface_ptr; | 64 | void *interface_ptr; |
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index a5120961632a..7d42a9d9f406 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/init.h> | 9 | #include <linux/init.h> |
10 | #include <linux/blkdev.h> | 10 | #include <linux/blkdev.h> |
11 | #include <linux/bio.h> | 11 | #include <linux/bio.h> |
12 | #include <linux/dax.h> | ||
12 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
13 | #include <linux/device-mapper.h> | 14 | #include <linux/device-mapper.h> |
14 | 15 | ||
@@ -142,22 +143,20 @@ static int linear_iterate_devices(struct dm_target *ti, | |||
142 | return fn(ti, lc->dev, lc->start, ti->len, data); | 143 | return fn(ti, lc->dev, lc->start, ti->len, data); |
143 | } | 144 | } |
144 | 145 | ||
145 | static long linear_direct_access(struct dm_target *ti, sector_t sector, | 146 | static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, |
146 | void **kaddr, pfn_t *pfn, long size) | 147 | long nr_pages, void **kaddr, pfn_t *pfn) |
147 | { | 148 | { |
149 | long ret; | ||
148 | struct linear_c *lc = ti->private; | 150 | struct linear_c *lc = ti->private; |
149 | struct block_device *bdev = lc->dev->bdev; | 151 | struct block_device *bdev = lc->dev->bdev; |
150 | struct blk_dax_ctl dax = { | 152 | struct dax_device *dax_dev = lc->dev->dax_dev; |
151 | .sector = linear_map_sector(ti, sector), | 153 | sector_t dev_sector, sector = pgoff * PAGE_SECTORS; |
152 | .size = size, | 154 | |
153 | }; | 155 | dev_sector = linear_map_sector(ti, sector); |
154 | long ret; | 156 | ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff); |
155 | 157 | if (ret) | |
156 | ret = bdev_direct_access(bdev, &dax); | 158 | return ret; |
157 | *kaddr = dax.addr; | 159 | return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); |
158 | *pfn = dax.pfn; | ||
159 | |||
160 | return ret; | ||
161 | } | 160 | } |
162 | 161 | ||
163 | static struct target_type linear_target = { | 162 | static struct target_type linear_target = { |
@@ -171,7 +170,7 @@ static struct target_type linear_target = { | |||
171 | .status = linear_status, | 170 | .status = linear_status, |
172 | .prepare_ioctl = linear_prepare_ioctl, | 171 | .prepare_ioctl = linear_prepare_ioctl, |
173 | .iterate_devices = linear_iterate_devices, | 172 | .iterate_devices = linear_iterate_devices, |
174 | .direct_access = linear_direct_access, | 173 | .direct_access = linear_dax_direct_access, |
175 | }; | 174 | }; |
176 | 175 | ||
177 | int __init dm_linear_init(void) | 176 | int __init dm_linear_init(void) |
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index c65feeada864..e152d9817c81 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -2302,8 +2302,8 @@ static int origin_map(struct dm_target *ti, struct bio *bio) | |||
2302 | return do_origin(o->dev, bio); | 2302 | return do_origin(o->dev, bio); |
2303 | } | 2303 | } |
2304 | 2304 | ||
2305 | static long origin_direct_access(struct dm_target *ti, sector_t sector, | 2305 | static long origin_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, |
2306 | void **kaddr, pfn_t *pfn, long size) | 2306 | long nr_pages, void **kaddr, pfn_t *pfn) |
2307 | { | 2307 | { |
2308 | DMWARN("device does not support dax."); | 2308 | DMWARN("device does not support dax."); |
2309 | return -EIO; | 2309 | return -EIO; |
@@ -2368,7 +2368,7 @@ static struct target_type origin_target = { | |||
2368 | .postsuspend = origin_postsuspend, | 2368 | .postsuspend = origin_postsuspend, |
2369 | .status = origin_status, | 2369 | .status = origin_status, |
2370 | .iterate_devices = origin_iterate_devices, | 2370 | .iterate_devices = origin_iterate_devices, |
2371 | .direct_access = origin_direct_access, | 2371 | .direct_access = origin_dax_direct_access, |
2372 | }; | 2372 | }; |
2373 | 2373 | ||
2374 | static struct target_type snapshot_target = { | 2374 | static struct target_type snapshot_target = { |
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 4b50ae115c6d..75152482f3ad 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
12 | #include <linux/blkdev.h> | 12 | #include <linux/blkdev.h> |
13 | #include <linux/bio.h> | 13 | #include <linux/bio.h> |
14 | #include <linux/dax.h> | ||
14 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
15 | #include <linux/log2.h> | 16 | #include <linux/log2.h> |
16 | 17 | ||
@@ -310,27 +311,25 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) | |||
310 | return DM_MAPIO_REMAPPED; | 311 | return DM_MAPIO_REMAPPED; |
311 | } | 312 | } |
312 | 313 | ||
313 | static long stripe_direct_access(struct dm_target *ti, sector_t sector, | 314 | static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, |
314 | void **kaddr, pfn_t *pfn, long size) | 315 | long nr_pages, void **kaddr, pfn_t *pfn) |
315 | { | 316 | { |
317 | sector_t dev_sector, sector = pgoff * PAGE_SECTORS; | ||
316 | struct stripe_c *sc = ti->private; | 318 | struct stripe_c *sc = ti->private; |
317 | uint32_t stripe; | 319 | struct dax_device *dax_dev; |
318 | struct block_device *bdev; | 320 | struct block_device *bdev; |
319 | struct blk_dax_ctl dax = { | 321 | uint32_t stripe; |
320 | .size = size, | ||
321 | }; | ||
322 | long ret; | 322 | long ret; |
323 | 323 | ||
324 | stripe_map_sector(sc, sector, &stripe, &dax.sector); | 324 | stripe_map_sector(sc, sector, &stripe, &dev_sector); |
325 | 325 | dev_sector += sc->stripe[stripe].physical_start; | |
326 | dax.sector += sc->stripe[stripe].physical_start; | 326 | dax_dev = sc->stripe[stripe].dev->dax_dev; |
327 | bdev = sc->stripe[stripe].dev->bdev; | 327 | bdev = sc->stripe[stripe].dev->bdev; |
328 | 328 | ||
329 | ret = bdev_direct_access(bdev, &dax); | 329 | ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff); |
330 | *kaddr = dax.addr; | 330 | if (ret) |
331 | *pfn = dax.pfn; | 331 | return ret; |
332 | 332 | return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); | |
333 | return ret; | ||
334 | } | 333 | } |
335 | 334 | ||
336 | /* | 335 | /* |
@@ -451,7 +450,7 @@ static struct target_type stripe_target = { | |||
451 | .status = stripe_status, | 450 | .status = stripe_status, |
452 | .iterate_devices = stripe_iterate_devices, | 451 | .iterate_devices = stripe_iterate_devices, |
453 | .io_hints = stripe_io_hints, | 452 | .io_hints = stripe_io_hints, |
454 | .direct_access = stripe_direct_access, | 453 | .direct_access = stripe_dax_direct_access, |
455 | }; | 454 | }; |
456 | 455 | ||
457 | int __init dm_stripe_init(void) | 456 | int __init dm_stripe_init(void) |
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 6264ff00dcf0..b242b750542f 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c | |||
@@ -142,8 +142,8 @@ static void io_err_release_clone_rq(struct request *clone) | |||
142 | { | 142 | { |
143 | } | 143 | } |
144 | 144 | ||
145 | static long io_err_direct_access(struct dm_target *ti, sector_t sector, | 145 | static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, |
146 | void **kaddr, pfn_t *pfn, long size) | 146 | long nr_pages, void **kaddr, pfn_t *pfn) |
147 | { | 147 | { |
148 | return -EIO; | 148 | return -EIO; |
149 | } | 149 | } |
@@ -157,7 +157,7 @@ static struct target_type error_target = { | |||
157 | .map = io_err_map, | 157 | .map = io_err_map, |
158 | .clone_and_map_rq = io_err_clone_and_map_rq, | 158 | .clone_and_map_rq = io_err_clone_and_map_rq, |
159 | .release_clone_rq = io_err_release_clone_rq, | 159 | .release_clone_rq = io_err_release_clone_rq, |
160 | .direct_access = io_err_direct_access, | 160 | .direct_access = io_err_dax_direct_access, |
161 | }; | 161 | }; |
162 | 162 | ||
163 | int __init dm_target_init(void) | 163 | int __init dm_target_init(void) |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 268edf402bbb..6ef9500226c0 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/blkpg.h> | 16 | #include <linux/blkpg.h> |
17 | #include <linux/bio.h> | 17 | #include <linux/bio.h> |
18 | #include <linux/mempool.h> | 18 | #include <linux/mempool.h> |
19 | #include <linux/dax.h> | ||
19 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
20 | #include <linux/idr.h> | 21 | #include <linux/idr.h> |
21 | #include <linux/hdreg.h> | 22 | #include <linux/hdreg.h> |
@@ -629,6 +630,7 @@ static int open_table_device(struct table_device *td, dev_t dev, | |||
629 | } | 630 | } |
630 | 631 | ||
631 | td->dm_dev.bdev = bdev; | 632 | td->dm_dev.bdev = bdev; |
633 | td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); | ||
632 | return 0; | 634 | return 0; |
633 | } | 635 | } |
634 | 636 | ||
@@ -642,7 +644,9 @@ static void close_table_device(struct table_device *td, struct mapped_device *md | |||
642 | 644 | ||
643 | bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md)); | 645 | bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md)); |
644 | blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL); | 646 | blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL); |
647 | put_dax(td->dm_dev.dax_dev); | ||
645 | td->dm_dev.bdev = NULL; | 648 | td->dm_dev.bdev = NULL; |
649 | td->dm_dev.dax_dev = NULL; | ||
646 | } | 650 | } |
647 | 651 | ||
648 | static struct table_device *find_table_device(struct list_head *l, dev_t dev, | 652 | static struct table_device *find_table_device(struct list_head *l, dev_t dev, |
@@ -920,31 +924,49 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) | |||
920 | } | 924 | } |
921 | EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); | 925 | EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); |
922 | 926 | ||
923 | static long dm_blk_direct_access(struct block_device *bdev, sector_t sector, | 927 | static struct dm_target *dm_dax_get_live_target(struct mapped_device *md, |
924 | void **kaddr, pfn_t *pfn, long size) | 928 | sector_t sector, int *srcu_idx) |
925 | { | 929 | { |
926 | struct mapped_device *md = bdev->bd_disk->private_data; | ||
927 | struct dm_table *map; | 930 | struct dm_table *map; |
928 | struct dm_target *ti; | 931 | struct dm_target *ti; |
929 | int srcu_idx; | ||
930 | long len, ret = -EIO; | ||
931 | 932 | ||
932 | map = dm_get_live_table(md, &srcu_idx); | 933 | map = dm_get_live_table(md, srcu_idx); |
933 | if (!map) | 934 | if (!map) |
934 | goto out; | 935 | return NULL; |
935 | 936 | ||
936 | ti = dm_table_find_target(map, sector); | 937 | ti = dm_table_find_target(map, sector); |
937 | if (!dm_target_is_valid(ti)) | 938 | if (!dm_target_is_valid(ti)) |
938 | goto out; | 939 | return NULL; |
940 | |||
941 | return ti; | ||
942 | } | ||
943 | |||
944 | static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, | ||
945 | long nr_pages, void **kaddr, pfn_t *pfn) | ||
946 | { | ||
947 | struct mapped_device *md = dax_get_private(dax_dev); | ||
948 | sector_t sector = pgoff * PAGE_SECTORS; | ||
949 | struct dm_target *ti; | ||
950 | long len, ret = -EIO; | ||
951 | int srcu_idx; | ||
939 | 952 | ||
940 | len = max_io_len(sector, ti) << SECTOR_SHIFT; | 953 | ti = dm_dax_get_live_target(md, sector, &srcu_idx); |
941 | size = min(len, size); | ||
942 | 954 | ||
955 | if (!ti) | ||
956 | goto out; | ||
957 | if (!ti->type->direct_access) | ||
958 | goto out; | ||
959 | len = max_io_len(sector, ti) / PAGE_SECTORS; | ||
960 | if (len < 1) | ||
961 | goto out; | ||
962 | nr_pages = min(len, nr_pages); | ||
943 | if (ti->type->direct_access) | 963 | if (ti->type->direct_access) |
944 | ret = ti->type->direct_access(ti, sector, kaddr, pfn, size); | 964 | ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn); |
945 | out: | 965 | |
966 | out: | ||
946 | dm_put_live_table(md, srcu_idx); | 967 | dm_put_live_table(md, srcu_idx); |
947 | return min(ret, size); | 968 | |
969 | return ret; | ||
948 | } | 970 | } |
949 | 971 | ||
950 | /* | 972 | /* |
@@ -1471,6 +1493,7 @@ static int next_free_minor(int *minor) | |||
1471 | } | 1493 | } |
1472 | 1494 | ||
1473 | static const struct block_device_operations dm_blk_dops; | 1495 | static const struct block_device_operations dm_blk_dops; |
1496 | static const struct dax_operations dm_dax_ops; | ||
1474 | 1497 | ||
1475 | static void dm_wq_work(struct work_struct *work); | 1498 | static void dm_wq_work(struct work_struct *work); |
1476 | 1499 | ||
@@ -1517,6 +1540,12 @@ static void cleanup_mapped_device(struct mapped_device *md) | |||
1517 | if (md->bs) | 1540 | if (md->bs) |
1518 | bioset_free(md->bs); | 1541 | bioset_free(md->bs); |
1519 | 1542 | ||
1543 | if (md->dax_dev) { | ||
1544 | kill_dax(md->dax_dev); | ||
1545 | put_dax(md->dax_dev); | ||
1546 | md->dax_dev = NULL; | ||
1547 | } | ||
1548 | |||
1520 | if (md->disk) { | 1549 | if (md->disk) { |
1521 | spin_lock(&_minor_lock); | 1550 | spin_lock(&_minor_lock); |
1522 | md->disk->private_data = NULL; | 1551 | md->disk->private_data = NULL; |
@@ -1544,6 +1573,7 @@ static void cleanup_mapped_device(struct mapped_device *md) | |||
1544 | static struct mapped_device *alloc_dev(int minor) | 1573 | static struct mapped_device *alloc_dev(int minor) |
1545 | { | 1574 | { |
1546 | int r, numa_node_id = dm_get_numa_node(); | 1575 | int r, numa_node_id = dm_get_numa_node(); |
1576 | struct dax_device *dax_dev; | ||
1547 | struct mapped_device *md; | 1577 | struct mapped_device *md; |
1548 | void *old_md; | 1578 | void *old_md; |
1549 | 1579 | ||
@@ -1608,6 +1638,12 @@ static struct mapped_device *alloc_dev(int minor) | |||
1608 | md->disk->queue = md->queue; | 1638 | md->disk->queue = md->queue; |
1609 | md->disk->private_data = md; | 1639 | md->disk->private_data = md; |
1610 | sprintf(md->disk->disk_name, "dm-%d", minor); | 1640 | sprintf(md->disk->disk_name, "dm-%d", minor); |
1641 | |||
1642 | dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops); | ||
1643 | if (!dax_dev) | ||
1644 | goto bad; | ||
1645 | md->dax_dev = dax_dev; | ||
1646 | |||
1611 | add_disk(md->disk); | 1647 | add_disk(md->disk); |
1612 | format_dev_t(md->name, MKDEV(_major, minor)); | 1648 | format_dev_t(md->name, MKDEV(_major, minor)); |
1613 | 1649 | ||
@@ -2816,12 +2852,15 @@ static const struct block_device_operations dm_blk_dops = { | |||
2816 | .open = dm_blk_open, | 2852 | .open = dm_blk_open, |
2817 | .release = dm_blk_close, | 2853 | .release = dm_blk_close, |
2818 | .ioctl = dm_blk_ioctl, | 2854 | .ioctl = dm_blk_ioctl, |
2819 | .direct_access = dm_blk_direct_access, | ||
2820 | .getgeo = dm_blk_getgeo, | 2855 | .getgeo = dm_blk_getgeo, |
2821 | .pr_ops = &dm_pr_ops, | 2856 | .pr_ops = &dm_pr_ops, |
2822 | .owner = THIS_MODULE | 2857 | .owner = THIS_MODULE |
2823 | }; | 2858 | }; |
2824 | 2859 | ||
2860 | static const struct dax_operations dm_dax_ops = { | ||
2861 | .direct_access = dm_dax_direct_access, | ||
2862 | }; | ||
2863 | |||
2825 | /* | 2864 | /* |
2826 | * module hooks | 2865 | * module hooks |
2827 | */ | 2866 | */ |
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 59e750183b7f..5bdd499b5f4f 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig | |||
@@ -20,6 +20,7 @@ if LIBNVDIMM | |||
20 | config BLK_DEV_PMEM | 20 | config BLK_DEV_PMEM |
21 | tristate "PMEM: Persistent memory block device support" | 21 | tristate "PMEM: Persistent memory block device support" |
22 | default LIBNVDIMM | 22 | default LIBNVDIMM |
23 | select DAX | ||
23 | select ND_BTT if BTT | 24 | select ND_BTT if BTT |
24 | select ND_PFN if NVDIMM_PFN | 25 | select ND_PFN if NVDIMM_PFN |
25 | help | 26 | help |
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 97dd2925ed6e..4b76af2b8715 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c | |||
@@ -314,7 +314,7 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns) | |||
314 | if (rc < 0) { | 314 | if (rc < 0) { |
315 | struct nd_btt *nd_btt = to_nd_btt(btt_dev); | 315 | struct nd_btt *nd_btt = to_nd_btt(btt_dev); |
316 | 316 | ||
317 | __nd_detach_ndns(btt_dev, &nd_btt->ndns); | 317 | nd_detach_ndns(btt_dev, &nd_btt->ndns); |
318 | put_device(btt_dev); | 318 | put_device(btt_dev); |
319 | } | 319 | } |
320 | 320 | ||
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 351bac8f6503..e9361bffe5ee 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/nd.h> | 27 | #include <linux/nd.h> |
28 | #include "nd-core.h" | 28 | #include "nd-core.h" |
29 | #include "nd.h" | 29 | #include "nd.h" |
30 | #include "pfn.h" | ||
30 | 31 | ||
31 | int nvdimm_major; | 32 | int nvdimm_major; |
32 | static int nvdimm_bus_major; | 33 | static int nvdimm_bus_major; |
@@ -171,6 +172,57 @@ void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event) | |||
171 | } | 172 | } |
172 | EXPORT_SYMBOL_GPL(nvdimm_region_notify); | 173 | EXPORT_SYMBOL_GPL(nvdimm_region_notify); |
173 | 174 | ||
175 | struct clear_badblocks_context { | ||
176 | resource_size_t phys, cleared; | ||
177 | }; | ||
178 | |||
179 | static int nvdimm_clear_badblocks_region(struct device *dev, void *data) | ||
180 | { | ||
181 | struct clear_badblocks_context *ctx = data; | ||
182 | struct nd_region *nd_region; | ||
183 | resource_size_t ndr_end; | ||
184 | sector_t sector; | ||
185 | |||
186 | /* make sure device is a region */ | ||
187 | if (!is_nd_pmem(dev)) | ||
188 | return 0; | ||
189 | |||
190 | nd_region = to_nd_region(dev); | ||
191 | ndr_end = nd_region->ndr_start + nd_region->ndr_size - 1; | ||
192 | |||
193 | /* make sure we are in the region */ | ||
194 | if (ctx->phys < nd_region->ndr_start | ||
195 | || (ctx->phys + ctx->cleared) > ndr_end) | ||
196 | return 0; | ||
197 | |||
198 | sector = (ctx->phys - nd_region->ndr_start) / 512; | ||
199 | badblocks_clear(&nd_region->bb, sector, ctx->cleared / 512); | ||
200 | |||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | static void nvdimm_clear_badblocks_regions(struct nvdimm_bus *nvdimm_bus, | ||
205 | phys_addr_t phys, u64 cleared) | ||
206 | { | ||
207 | struct clear_badblocks_context ctx = { | ||
208 | .phys = phys, | ||
209 | .cleared = cleared, | ||
210 | }; | ||
211 | |||
212 | device_for_each_child(&nvdimm_bus->dev, &ctx, | ||
213 | nvdimm_clear_badblocks_region); | ||
214 | } | ||
215 | |||
216 | static void nvdimm_account_cleared_poison(struct nvdimm_bus *nvdimm_bus, | ||
217 | phys_addr_t phys, u64 cleared) | ||
218 | { | ||
219 | if (cleared > 0) | ||
220 | nvdimm_forget_poison(nvdimm_bus, phys, cleared); | ||
221 | |||
222 | if (cleared > 0 && cleared / 512) | ||
223 | nvdimm_clear_badblocks_regions(nvdimm_bus, phys, cleared); | ||
224 | } | ||
225 | |||
174 | long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, | 226 | long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, |
175 | unsigned int len) | 227 | unsigned int len) |
176 | { | 228 | { |
@@ -218,7 +270,8 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, | |||
218 | if (cmd_rc < 0) | 270 | if (cmd_rc < 0) |
219 | return cmd_rc; | 271 | return cmd_rc; |
220 | 272 | ||
221 | nvdimm_clear_from_poison_list(nvdimm_bus, phys, len); | 273 | nvdimm_account_cleared_poison(nvdimm_bus, phys, clear_err.cleared); |
274 | |||
222 | return clear_err.cleared; | 275 | return clear_err.cleared; |
223 | } | 276 | } |
224 | EXPORT_SYMBOL_GPL(nvdimm_clear_poison); | 277 | EXPORT_SYMBOL_GPL(nvdimm_clear_poison); |
@@ -286,6 +339,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, | |||
286 | init_waitqueue_head(&nvdimm_bus->probe_wait); | 339 | init_waitqueue_head(&nvdimm_bus->probe_wait); |
287 | nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); | 340 | nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); |
288 | mutex_init(&nvdimm_bus->reconfig_mutex); | 341 | mutex_init(&nvdimm_bus->reconfig_mutex); |
342 | spin_lock_init(&nvdimm_bus->poison_lock); | ||
289 | if (nvdimm_bus->id < 0) { | 343 | if (nvdimm_bus->id < 0) { |
290 | kfree(nvdimm_bus); | 344 | kfree(nvdimm_bus); |
291 | return NULL; | 345 | return NULL; |
@@ -354,9 +408,9 @@ static int nd_bus_remove(struct device *dev) | |||
354 | nd_synchronize(); | 408 | nd_synchronize(); |
355 | device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); | 409 | device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); |
356 | 410 | ||
357 | nvdimm_bus_lock(&nvdimm_bus->dev); | 411 | spin_lock(&nvdimm_bus->poison_lock); |
358 | free_poison_list(&nvdimm_bus->poison_list); | 412 | free_poison_list(&nvdimm_bus->poison_list); |
359 | nvdimm_bus_unlock(&nvdimm_bus->dev); | 413 | spin_unlock(&nvdimm_bus->poison_lock); |
360 | 414 | ||
361 | nvdimm_bus_destroy_ndctl(nvdimm_bus); | 415 | nvdimm_bus_destroy_ndctl(nvdimm_bus); |
362 | 416 | ||
@@ -769,16 +823,55 @@ void wait_nvdimm_bus_probe_idle(struct device *dev) | |||
769 | } while (true); | 823 | } while (true); |
770 | } | 824 | } |
771 | 825 | ||
772 | static int pmem_active(struct device *dev, void *data) | 826 | static int nd_pmem_forget_poison_check(struct device *dev, void *data) |
773 | { | 827 | { |
774 | if (is_nd_pmem(dev) && dev->driver) | 828 | struct nd_cmd_clear_error *clear_err = |
829 | (struct nd_cmd_clear_error *)data; | ||
830 | struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL; | ||
831 | struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL; | ||
832 | struct nd_dax *nd_dax = is_nd_dax(dev) ? to_nd_dax(dev) : NULL; | ||
833 | struct nd_namespace_common *ndns = NULL; | ||
834 | struct nd_namespace_io *nsio; | ||
835 | resource_size_t offset = 0, end_trunc = 0, start, end, pstart, pend; | ||
836 | |||
837 | if (nd_dax || !dev->driver) | ||
838 | return 0; | ||
839 | |||
840 | start = clear_err->address; | ||
841 | end = clear_err->address + clear_err->cleared - 1; | ||
842 | |||
843 | if (nd_btt || nd_pfn || nd_dax) { | ||
844 | if (nd_btt) | ||
845 | ndns = nd_btt->ndns; | ||
846 | else if (nd_pfn) | ||
847 | ndns = nd_pfn->ndns; | ||
848 | else if (nd_dax) | ||
849 | ndns = nd_dax->nd_pfn.ndns; | ||
850 | |||
851 | if (!ndns) | ||
852 | return 0; | ||
853 | } else | ||
854 | ndns = to_ndns(dev); | ||
855 | |||
856 | nsio = to_nd_namespace_io(&ndns->dev); | ||
857 | pstart = nsio->res.start + offset; | ||
858 | pend = nsio->res.end - end_trunc; | ||
859 | |||
860 | if ((pstart >= start) && (pend <= end)) | ||
775 | return -EBUSY; | 861 | return -EBUSY; |
862 | |||
776 | return 0; | 863 | return 0; |
864 | |||
865 | } | ||
866 | |||
867 | static int nd_ns_forget_poison_check(struct device *dev, void *data) | ||
868 | { | ||
869 | return device_for_each_child(dev, data, nd_pmem_forget_poison_check); | ||
777 | } | 870 | } |
778 | 871 | ||
779 | /* set_config requires an idle interleave set */ | 872 | /* set_config requires an idle interleave set */ |
780 | static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, | 873 | static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, |
781 | struct nvdimm *nvdimm, unsigned int cmd) | 874 | struct nvdimm *nvdimm, unsigned int cmd, void *data) |
782 | { | 875 | { |
783 | struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; | 876 | struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; |
784 | 877 | ||
@@ -792,8 +885,8 @@ static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, | |||
792 | 885 | ||
793 | /* require clear error to go through the pmem driver */ | 886 | /* require clear error to go through the pmem driver */ |
794 | if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR) | 887 | if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR) |
795 | return device_for_each_child(&nvdimm_bus->dev, NULL, | 888 | return device_for_each_child(&nvdimm_bus->dev, data, |
796 | pmem_active); | 889 | nd_ns_forget_poison_check); |
797 | 890 | ||
798 | if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA) | 891 | if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA) |
799 | return 0; | 892 | return 0; |
@@ -820,7 +913,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, | |||
820 | const char *cmd_name, *dimm_name; | 913 | const char *cmd_name, *dimm_name; |
821 | unsigned long cmd_mask; | 914 | unsigned long cmd_mask; |
822 | void *buf; | 915 | void *buf; |
823 | int rc, i; | 916 | int rc, i, cmd_rc; |
824 | 917 | ||
825 | if (nvdimm) { | 918 | if (nvdimm) { |
826 | desc = nd_cmd_dimm_desc(cmd); | 919 | desc = nd_cmd_dimm_desc(cmd); |
@@ -927,13 +1020,20 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, | |||
927 | } | 1020 | } |
928 | 1021 | ||
929 | nvdimm_bus_lock(&nvdimm_bus->dev); | 1022 | nvdimm_bus_lock(&nvdimm_bus->dev); |
930 | rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd); | 1023 | rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd, buf); |
931 | if (rc) | 1024 | if (rc) |
932 | goto out_unlock; | 1025 | goto out_unlock; |
933 | 1026 | ||
934 | rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL); | 1027 | rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, &cmd_rc); |
935 | if (rc < 0) | 1028 | if (rc < 0) |
936 | goto out_unlock; | 1029 | goto out_unlock; |
1030 | |||
1031 | if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR && cmd_rc >= 0) { | ||
1032 | struct nd_cmd_clear_error *clear_err = buf; | ||
1033 | |||
1034 | nvdimm_account_cleared_poison(nvdimm_bus, clear_err->address, | ||
1035 | clear_err->cleared); | ||
1036 | } | ||
937 | nvdimm_bus_unlock(&nvdimm_bus->dev); | 1037 | nvdimm_bus_unlock(&nvdimm_bus->dev); |
938 | 1038 | ||
939 | if (copy_to_user(p, buf, buf_len)) | 1039 | if (copy_to_user(p, buf, buf_len)) |
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index ca6d572c48fc..93d128da1c92 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c | |||
@@ -21,8 +21,13 @@ | |||
21 | void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns) | 21 | void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns) |
22 | { | 22 | { |
23 | struct nd_namespace_common *ndns = *_ndns; | 23 | struct nd_namespace_common *ndns = *_ndns; |
24 | struct nvdimm_bus *nvdimm_bus; | ||
24 | 25 | ||
25 | lockdep_assert_held(&ndns->dev.mutex); | 26 | if (!ndns) |
27 | return; | ||
28 | |||
29 | nvdimm_bus = walk_to_nvdimm_bus(&ndns->dev); | ||
30 | lockdep_assert_held(&nvdimm_bus->reconfig_mutex); | ||
26 | dev_WARN_ONCE(dev, ndns->claim != dev, "%s: invalid claim\n", __func__); | 31 | dev_WARN_ONCE(dev, ndns->claim != dev, "%s: invalid claim\n", __func__); |
27 | ndns->claim = NULL; | 32 | ndns->claim = NULL; |
28 | *_ndns = NULL; | 33 | *_ndns = NULL; |
@@ -37,18 +42,20 @@ void nd_detach_ndns(struct device *dev, | |||
37 | if (!ndns) | 42 | if (!ndns) |
38 | return; | 43 | return; |
39 | get_device(&ndns->dev); | 44 | get_device(&ndns->dev); |
40 | device_lock(&ndns->dev); | 45 | nvdimm_bus_lock(&ndns->dev); |
41 | __nd_detach_ndns(dev, _ndns); | 46 | __nd_detach_ndns(dev, _ndns); |
42 | device_unlock(&ndns->dev); | 47 | nvdimm_bus_unlock(&ndns->dev); |
43 | put_device(&ndns->dev); | 48 | put_device(&ndns->dev); |
44 | } | 49 | } |
45 | 50 | ||
46 | bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, | 51 | bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, |
47 | struct nd_namespace_common **_ndns) | 52 | struct nd_namespace_common **_ndns) |
48 | { | 53 | { |
54 | struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&attach->dev); | ||
55 | |||
49 | if (attach->claim) | 56 | if (attach->claim) |
50 | return false; | 57 | return false; |
51 | lockdep_assert_held(&attach->dev.mutex); | 58 | lockdep_assert_held(&nvdimm_bus->reconfig_mutex); |
52 | dev_WARN_ONCE(dev, *_ndns, "%s: invalid claim\n", __func__); | 59 | dev_WARN_ONCE(dev, *_ndns, "%s: invalid claim\n", __func__); |
53 | attach->claim = dev; | 60 | attach->claim = dev; |
54 | *_ndns = attach; | 61 | *_ndns = attach; |
@@ -61,9 +68,9 @@ bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, | |||
61 | { | 68 | { |
62 | bool claimed; | 69 | bool claimed; |
63 | 70 | ||
64 | device_lock(&attach->dev); | 71 | nvdimm_bus_lock(&attach->dev); |
65 | claimed = __nd_attach_ndns(dev, attach, _ndns); | 72 | claimed = __nd_attach_ndns(dev, attach, _ndns); |
66 | device_unlock(&attach->dev); | 73 | nvdimm_bus_unlock(&attach->dev); |
67 | return claimed; | 74 | return claimed; |
68 | } | 75 | } |
69 | 76 | ||
@@ -114,7 +121,7 @@ static void nd_detach_and_reset(struct device *dev, | |||
114 | struct nd_namespace_common **_ndns) | 121 | struct nd_namespace_common **_ndns) |
115 | { | 122 | { |
116 | /* detach the namespace and destroy / reset the device */ | 123 | /* detach the namespace and destroy / reset the device */ |
117 | nd_detach_ndns(dev, _ndns); | 124 | __nd_detach_ndns(dev, _ndns); |
118 | if (is_idle(dev, *_ndns)) { | 125 | if (is_idle(dev, *_ndns)) { |
119 | nd_device_unregister(dev, ND_ASYNC); | 126 | nd_device_unregister(dev, ND_ASYNC); |
120 | } else if (is_nd_btt(dev)) { | 127 | } else if (is_nd_btt(dev)) { |
@@ -184,7 +191,7 @@ ssize_t nd_namespace_store(struct device *dev, | |||
184 | } | 191 | } |
185 | 192 | ||
186 | WARN_ON_ONCE(!is_nvdimm_bus_locked(dev)); | 193 | WARN_ON_ONCE(!is_nvdimm_bus_locked(dev)); |
187 | if (!nd_attach_ndns(dev, ndns, _ndns)) { | 194 | if (!__nd_attach_ndns(dev, ndns, _ndns)) { |
188 | dev_dbg(dev, "%s already claimed\n", | 195 | dev_dbg(dev, "%s already claimed\n", |
189 | dev_name(&ndns->dev)); | 196 | dev_name(&ndns->dev)); |
190 | len = -EBUSY; | 197 | len = -EBUSY; |
@@ -239,22 +246,24 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, | |||
239 | if (rw == READ) { | 246 | if (rw == READ) { |
240 | if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) | 247 | if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) |
241 | return -EIO; | 248 | return -EIO; |
242 | return memcpy_from_pmem(buf, nsio->addr + offset, size); | 249 | return memcpy_mcsafe(buf, nsio->addr + offset, size); |
243 | } | 250 | } |
244 | 251 | ||
245 | if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) { | 252 | if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) { |
246 | /* | 253 | /* |
247 | * FIXME: nsio_rw_bytes() may be called from atomic | 254 | * FIXME: nsio_rw_bytes() may be called from atomic |
248 | * context in the btt case and nvdimm_clear_poison() | 255 | * context in the btt case and the ACPI DSM path for |
249 | * takes a sleeping lock. Until the locking can be | 256 | * clearing the error takes sleeping locks and allocates |
250 | * reworked this capability requires that the namespace | 257 | * memory. An explicit error clearing path, and support |
251 | * is not claimed by btt. | 258 | * for tracking badblocks in BTT metadata is needed to |
259 | * work around this collision. | ||
252 | */ | 260 | */ |
253 | if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512) | 261 | if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512) |
254 | && (!ndns->claim || !is_nd_btt(ndns->claim))) { | 262 | && (!ndns->claim || !is_nd_btt(ndns->claim))) { |
255 | long cleared; | 263 | long cleared; |
256 | 264 | ||
257 | cleared = nvdimm_clear_poison(&ndns->dev, offset, size); | 265 | cleared = nvdimm_clear_poison(&ndns->dev, |
266 | nsio->res.start + offset, size); | ||
258 | if (cleared < size) | 267 | if (cleared < size) |
259 | rc = -EIO; | 268 | rc = -EIO; |
260 | if (cleared > 0 && cleared / 512) { | 269 | if (cleared > 0 && cleared / 512) { |
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 9303cfeb8bee..2dee908e4bae 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c | |||
@@ -518,6 +518,15 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region, | |||
518 | } | 518 | } |
519 | EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate); | 519 | EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate); |
520 | 520 | ||
521 | static void append_poison_entry(struct nvdimm_bus *nvdimm_bus, | ||
522 | struct nd_poison *pl, u64 addr, u64 length) | ||
523 | { | ||
524 | lockdep_assert_held(&nvdimm_bus->poison_lock); | ||
525 | pl->start = addr; | ||
526 | pl->length = length; | ||
527 | list_add_tail(&pl->list, &nvdimm_bus->poison_list); | ||
528 | } | ||
529 | |||
521 | static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length, | 530 | static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length, |
522 | gfp_t flags) | 531 | gfp_t flags) |
523 | { | 532 | { |
@@ -527,19 +536,24 @@ static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length, | |||
527 | if (!pl) | 536 | if (!pl) |
528 | return -ENOMEM; | 537 | return -ENOMEM; |
529 | 538 | ||
530 | pl->start = addr; | 539 | append_poison_entry(nvdimm_bus, pl, addr, length); |
531 | pl->length = length; | ||
532 | list_add_tail(&pl->list, &nvdimm_bus->poison_list); | ||
533 | |||
534 | return 0; | 540 | return 0; |
535 | } | 541 | } |
536 | 542 | ||
537 | static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) | 543 | static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) |
538 | { | 544 | { |
539 | struct nd_poison *pl; | 545 | struct nd_poison *pl, *pl_new; |
540 | 546 | ||
541 | if (list_empty(&nvdimm_bus->poison_list)) | 547 | spin_unlock(&nvdimm_bus->poison_lock); |
542 | return add_poison(nvdimm_bus, addr, length, GFP_KERNEL); | 548 | pl_new = kzalloc(sizeof(*pl_new), GFP_KERNEL); |
549 | spin_lock(&nvdimm_bus->poison_lock); | ||
550 | |||
551 | if (list_empty(&nvdimm_bus->poison_list)) { | ||
552 | if (!pl_new) | ||
553 | return -ENOMEM; | ||
554 | append_poison_entry(nvdimm_bus, pl_new, addr, length); | ||
555 | return 0; | ||
556 | } | ||
543 | 557 | ||
544 | /* | 558 | /* |
545 | * There is a chance this is a duplicate, check for those first. | 559 | * There is a chance this is a duplicate, check for those first. |
@@ -551,6 +565,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) | |||
551 | /* If length has changed, update this list entry */ | 565 | /* If length has changed, update this list entry */ |
552 | if (pl->length != length) | 566 | if (pl->length != length) |
553 | pl->length = length; | 567 | pl->length = length; |
568 | kfree(pl_new); | ||
554 | return 0; | 569 | return 0; |
555 | } | 570 | } |
556 | 571 | ||
@@ -559,29 +574,33 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) | |||
559 | * as any overlapping ranges will get resolved when the list is consumed | 574 | * as any overlapping ranges will get resolved when the list is consumed |
560 | * and converted to badblocks | 575 | * and converted to badblocks |
561 | */ | 576 | */ |
562 | return add_poison(nvdimm_bus, addr, length, GFP_KERNEL); | 577 | if (!pl_new) |
578 | return -ENOMEM; | ||
579 | append_poison_entry(nvdimm_bus, pl_new, addr, length); | ||
580 | |||
581 | return 0; | ||
563 | } | 582 | } |
564 | 583 | ||
565 | int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) | 584 | int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) |
566 | { | 585 | { |
567 | int rc; | 586 | int rc; |
568 | 587 | ||
569 | nvdimm_bus_lock(&nvdimm_bus->dev); | 588 | spin_lock(&nvdimm_bus->poison_lock); |
570 | rc = bus_add_poison(nvdimm_bus, addr, length); | 589 | rc = bus_add_poison(nvdimm_bus, addr, length); |
571 | nvdimm_bus_unlock(&nvdimm_bus->dev); | 590 | spin_unlock(&nvdimm_bus->poison_lock); |
572 | 591 | ||
573 | return rc; | 592 | return rc; |
574 | } | 593 | } |
575 | EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison); | 594 | EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison); |
576 | 595 | ||
577 | void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus, | 596 | void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start, |
578 | phys_addr_t start, unsigned int len) | 597 | unsigned int len) |
579 | { | 598 | { |
580 | struct list_head *poison_list = &nvdimm_bus->poison_list; | 599 | struct list_head *poison_list = &nvdimm_bus->poison_list; |
581 | u64 clr_end = start + len - 1; | 600 | u64 clr_end = start + len - 1; |
582 | struct nd_poison *pl, *next; | 601 | struct nd_poison *pl, *next; |
583 | 602 | ||
584 | nvdimm_bus_lock(&nvdimm_bus->dev); | 603 | spin_lock(&nvdimm_bus->poison_lock); |
585 | WARN_ON_ONCE(list_empty(poison_list)); | 604 | WARN_ON_ONCE(list_empty(poison_list)); |
586 | 605 | ||
587 | /* | 606 | /* |
@@ -628,15 +647,15 @@ void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus, | |||
628 | u64 new_len = pl_end - new_start + 1; | 647 | u64 new_len = pl_end - new_start + 1; |
629 | 648 | ||
630 | /* Add new entry covering the right half */ | 649 | /* Add new entry covering the right half */ |
631 | add_poison(nvdimm_bus, new_start, new_len, GFP_NOIO); | 650 | add_poison(nvdimm_bus, new_start, new_len, GFP_NOWAIT); |
632 | /* Adjust this entry to cover the left half */ | 651 | /* Adjust this entry to cover the left half */ |
633 | pl->length = start - pl->start; | 652 | pl->length = start - pl->start; |
634 | continue; | 653 | continue; |
635 | } | 654 | } |
636 | } | 655 | } |
637 | nvdimm_bus_unlock(&nvdimm_bus->dev); | 656 | spin_unlock(&nvdimm_bus->poison_lock); |
638 | } | 657 | } |
639 | EXPORT_SYMBOL_GPL(nvdimm_clear_from_poison_list); | 658 | EXPORT_SYMBOL_GPL(nvdimm_forget_poison); |
640 | 659 | ||
641 | #ifdef CONFIG_BLK_DEV_INTEGRITY | 660 | #ifdef CONFIG_BLK_DEV_INTEGRITY |
642 | int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) | 661 | int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) |
diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c index 45fa82cae87c..c1b6556aea6e 100644 --- a/drivers/nvdimm/dax_devs.c +++ b/drivers/nvdimm/dax_devs.c | |||
@@ -124,7 +124,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns) | |||
124 | dev_dbg(dev, "%s: dax: %s\n", __func__, | 124 | dev_dbg(dev, "%s: dax: %s\n", __func__, |
125 | rc == 0 ? dev_name(dax_dev) : "<none>"); | 125 | rc == 0 ? dev_name(dax_dev) : "<none>"); |
126 | if (rc < 0) { | 126 | if (rc < 0) { |
127 | __nd_detach_ndns(dax_dev, &nd_pfn->ndns); | 127 | nd_detach_ndns(dax_dev, &nd_pfn->ndns); |
128 | put_device(dax_dev); | 128 | put_device(dax_dev); |
129 | } else | 129 | } else |
130 | __nd_device_register(dax_dev); | 130 | __nd_device_register(dax_dev); |
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c index ee0b412827bf..e0f0e3ce1a32 100644 --- a/drivers/nvdimm/dimm.c +++ b/drivers/nvdimm/dimm.c | |||
@@ -49,6 +49,8 @@ static int nvdimm_probe(struct device *dev) | |||
49 | kref_init(&ndd->kref); | 49 | kref_init(&ndd->kref); |
50 | 50 | ||
51 | rc = nvdimm_init_nsarea(ndd); | 51 | rc = nvdimm_init_nsarea(ndd); |
52 | if (rc == -EACCES) | ||
53 | nvdimm_set_locked(dev); | ||
52 | if (rc) | 54 | if (rc) |
53 | goto err; | 55 | goto err; |
54 | 56 | ||
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 8b721321be5b..fac1e9fbd11d 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c | |||
@@ -34,7 +34,7 @@ int nvdimm_check_config_data(struct device *dev) | |||
34 | 34 | ||
35 | if (!nvdimm->cmd_mask || | 35 | if (!nvdimm->cmd_mask || |
36 | !test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) { | 36 | !test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) { |
37 | if (nvdimm->flags & NDD_ALIASING) | 37 | if (test_bit(NDD_ALIASING, &nvdimm->flags)) |
38 | return -ENXIO; | 38 | return -ENXIO; |
39 | else | 39 | else |
40 | return -ENOTTY; | 40 | return -ENOTTY; |
@@ -67,6 +67,7 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) | |||
67 | struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); | 67 | struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); |
68 | struct nvdimm_bus_descriptor *nd_desc; | 68 | struct nvdimm_bus_descriptor *nd_desc; |
69 | int rc = validate_dimm(ndd); | 69 | int rc = validate_dimm(ndd); |
70 | int cmd_rc = 0; | ||
70 | 71 | ||
71 | if (rc) | 72 | if (rc) |
72 | return rc; | 73 | return rc; |
@@ -76,8 +77,11 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) | |||
76 | 77 | ||
77 | memset(cmd, 0, sizeof(*cmd)); | 78 | memset(cmd, 0, sizeof(*cmd)); |
78 | nd_desc = nvdimm_bus->nd_desc; | 79 | nd_desc = nvdimm_bus->nd_desc; |
79 | return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), | 80 | rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), |
80 | ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd), NULL); | 81 | ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd), &cmd_rc); |
82 | if (rc < 0) | ||
83 | return rc; | ||
84 | return cmd_rc; | ||
81 | } | 85 | } |
82 | 86 | ||
83 | int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) | 87 | int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) |
@@ -188,7 +192,14 @@ void nvdimm_set_aliasing(struct device *dev) | |||
188 | { | 192 | { |
189 | struct nvdimm *nvdimm = to_nvdimm(dev); | 193 | struct nvdimm *nvdimm = to_nvdimm(dev); |
190 | 194 | ||
191 | nvdimm->flags |= NDD_ALIASING; | 195 | set_bit(NDD_ALIASING, &nvdimm->flags); |
196 | } | ||
197 | |||
198 | void nvdimm_set_locked(struct device *dev) | ||
199 | { | ||
200 | struct nvdimm *nvdimm = to_nvdimm(dev); | ||
201 | |||
202 | set_bit(NDD_LOCKED, &nvdimm->flags); | ||
192 | } | 203 | } |
193 | 204 | ||
194 | static void nvdimm_release(struct device *dev) | 205 | static void nvdimm_release(struct device *dev) |
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 1b481a5fb966..2f9dfbd2dbec 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c | |||
@@ -2236,14 +2236,21 @@ static int init_active_labels(struct nd_region *nd_region) | |||
2236 | int count, j; | 2236 | int count, j; |
2237 | 2237 | ||
2238 | /* | 2238 | /* |
2239 | * If the dimm is disabled then prevent the region from | 2239 | * If the dimm is disabled then we may need to prevent |
2240 | * being activated if it aliases DPA. | 2240 | * the region from being activated. |
2241 | */ | 2241 | */ |
2242 | if (!ndd) { | 2242 | if (!ndd) { |
2243 | if ((nvdimm->flags & NDD_ALIASING) == 0) | 2243 | if (test_bit(NDD_LOCKED, &nvdimm->flags)) |
2244 | /* fail, label data may be unreadable */; | ||
2245 | else if (test_bit(NDD_ALIASING, &nvdimm->flags)) | ||
2246 | /* fail, labels needed to disambiguate dpa */; | ||
2247 | else | ||
2244 | return 0; | 2248 | return 0; |
2245 | dev_dbg(&nd_region->dev, "%s: is disabled, failing probe\n", | 2249 | |
2246 | dev_name(&nd_mapping->nvdimm->dev)); | 2250 | dev_err(&nd_region->dev, "%s: is %s, failing probe\n", |
2251 | dev_name(&nd_mapping->nvdimm->dev), | ||
2252 | test_bit(NDD_LOCKED, &nvdimm->flags) | ||
2253 | ? "locked" : "disabled"); | ||
2247 | return -ENXIO; | 2254 | return -ENXIO; |
2248 | } | 2255 | } |
2249 | nd_mapping->ndd = ndd; | 2256 | nd_mapping->ndd = ndd; |
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 8623e57c2ce3..4c4bd209e725 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h | |||
@@ -32,6 +32,7 @@ struct nvdimm_bus { | |||
32 | struct list_head poison_list; | 32 | struct list_head poison_list; |
33 | struct list_head mapping_list; | 33 | struct list_head mapping_list; |
34 | struct mutex reconfig_mutex; | 34 | struct mutex reconfig_mutex; |
35 | spinlock_t poison_lock; | ||
35 | }; | 36 | }; |
36 | 37 | ||
37 | struct nvdimm { | 38 | struct nvdimm { |
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 2a99c83aa19f..77d032192bf7 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h | |||
@@ -154,6 +154,7 @@ struct nd_region { | |||
154 | u64 ndr_start; | 154 | u64 ndr_start; |
155 | int id, num_lanes, ro, numa_node; | 155 | int id, num_lanes, ro, numa_node; |
156 | void *provider_data; | 156 | void *provider_data; |
157 | struct badblocks bb; | ||
157 | struct nd_interleave_set *nd_set; | 158 | struct nd_interleave_set *nd_set; |
158 | struct nd_percpu_lane __percpu *lane; | 159 | struct nd_percpu_lane __percpu *lane; |
159 | struct nd_mapping mapping[0]; | 160 | struct nd_mapping mapping[0]; |
@@ -239,6 +240,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, | |||
239 | long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, | 240 | long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, |
240 | unsigned int len); | 241 | unsigned int len); |
241 | void nvdimm_set_aliasing(struct device *dev); | 242 | void nvdimm_set_aliasing(struct device *dev); |
243 | void nvdimm_set_locked(struct device *dev); | ||
242 | struct nd_btt *to_nd_btt(struct device *dev); | 244 | struct nd_btt *to_nd_btt(struct device *dev); |
243 | 245 | ||
244 | struct nd_gen_sb { | 246 | struct nd_gen_sb { |
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 6c033c9a2f06..335c8175410b 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c | |||
@@ -484,7 +484,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) | |||
484 | dev_dbg(dev, "%s: pfn: %s\n", __func__, | 484 | dev_dbg(dev, "%s: pfn: %s\n", __func__, |
485 | rc == 0 ? dev_name(pfn_dev) : "<none>"); | 485 | rc == 0 ? dev_name(pfn_dev) : "<none>"); |
486 | if (rc < 0) { | 486 | if (rc < 0) { |
487 | __nd_detach_ndns(pfn_dev, &nd_pfn->ndns); | 487 | nd_detach_ndns(pfn_dev, &nd_pfn->ndns); |
488 | put_device(pfn_dev); | 488 | put_device(pfn_dev); |
489 | } else | 489 | } else |
490 | __nd_device_register(pfn_dev); | 490 | __nd_device_register(pfn_dev); |
@@ -538,7 +538,8 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, | |||
538 | nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); | 538 | nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); |
539 | altmap = NULL; | 539 | altmap = NULL; |
540 | } else if (nd_pfn->mode == PFN_MODE_PMEM) { | 540 | } else if (nd_pfn->mode == PFN_MODE_PMEM) { |
541 | nd_pfn->npfns = (resource_size(res) - offset) / PAGE_SIZE; | 541 | nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res) |
542 | - offset) / PAGE_SIZE); | ||
542 | if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) | 543 | if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) |
543 | dev_info(&nd_pfn->dev, | 544 | dev_info(&nd_pfn->dev, |
544 | "number of pfns truncated from %lld to %ld\n", | 545 | "number of pfns truncated from %lld to %ld\n", |
@@ -625,7 +626,8 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) | |||
625 | */ | 626 | */ |
626 | start += start_pad; | 627 | start += start_pad; |
627 | size = resource_size(&nsio->res); | 628 | size = resource_size(&nsio->res); |
628 | npfns = (size - start_pad - end_trunc - SZ_8K) / SZ_4K; | 629 | npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K) |
630 | / PAGE_SIZE); | ||
629 | if (nd_pfn->mode == PFN_MODE_PMEM) { | 631 | if (nd_pfn->mode == PFN_MODE_PMEM) { |
630 | /* | 632 | /* |
631 | * vmemmap_populate_hugepages() allocates the memmap array in | 633 | * vmemmap_populate_hugepages() allocates the memmap array in |
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index fbc640bf06b0..c544d466ea51 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/pfn_t.h> | 29 | #include <linux/pfn_t.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/pmem.h> | 31 | #include <linux/pmem.h> |
32 | #include <linux/dax.h> | ||
32 | #include <linux/nd.h> | 33 | #include <linux/nd.h> |
33 | #include "pmem.h" | 34 | #include "pmem.h" |
34 | #include "pfn.h" | 35 | #include "pfn.h" |
@@ -89,7 +90,7 @@ static int read_pmem(struct page *page, unsigned int off, | |||
89 | int rc; | 90 | int rc; |
90 | void *mem = kmap_atomic(page); | 91 | void *mem = kmap_atomic(page); |
91 | 92 | ||
92 | rc = memcpy_from_pmem(mem + off, pmem_addr, len); | 93 | rc = memcpy_mcsafe(mem + off, pmem_addr, len); |
93 | kunmap_atomic(mem); | 94 | kunmap_atomic(mem); |
94 | if (rc) | 95 | if (rc) |
95 | return -EIO; | 96 | return -EIO; |
@@ -200,13 +201,13 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, | |||
200 | } | 201 | } |
201 | 202 | ||
202 | /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ | 203 | /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ |
203 | __weak long pmem_direct_access(struct block_device *bdev, sector_t sector, | 204 | __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, |
204 | void **kaddr, pfn_t *pfn, long size) | 205 | long nr_pages, void **kaddr, pfn_t *pfn) |
205 | { | 206 | { |
206 | struct pmem_device *pmem = bdev->bd_queue->queuedata; | 207 | resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; |
207 | resource_size_t offset = sector * 512 + pmem->data_offset; | ||
208 | 208 | ||
209 | if (unlikely(is_bad_pmem(&pmem->bb, sector, size))) | 209 | if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512, |
210 | PFN_PHYS(nr_pages)))) | ||
210 | return -EIO; | 211 | return -EIO; |
211 | *kaddr = pmem->virt_addr + offset; | 212 | *kaddr = pmem->virt_addr + offset; |
212 | *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); | 213 | *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); |
@@ -216,17 +217,28 @@ __weak long pmem_direct_access(struct block_device *bdev, sector_t sector, | |||
216 | * requested range. | 217 | * requested range. |
217 | */ | 218 | */ |
218 | if (unlikely(pmem->bb.count)) | 219 | if (unlikely(pmem->bb.count)) |
219 | return size; | 220 | return nr_pages; |
220 | return pmem->size - pmem->pfn_pad - offset; | 221 | return PHYS_PFN(pmem->size - pmem->pfn_pad - offset); |
221 | } | 222 | } |
222 | 223 | ||
223 | static const struct block_device_operations pmem_fops = { | 224 | static const struct block_device_operations pmem_fops = { |
224 | .owner = THIS_MODULE, | 225 | .owner = THIS_MODULE, |
225 | .rw_page = pmem_rw_page, | 226 | .rw_page = pmem_rw_page, |
226 | .direct_access = pmem_direct_access, | ||
227 | .revalidate_disk = nvdimm_revalidate_disk, | 227 | .revalidate_disk = nvdimm_revalidate_disk, |
228 | }; | 228 | }; |
229 | 229 | ||
230 | static long pmem_dax_direct_access(struct dax_device *dax_dev, | ||
231 | pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) | ||
232 | { | ||
233 | struct pmem_device *pmem = dax_get_private(dax_dev); | ||
234 | |||
235 | return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); | ||
236 | } | ||
237 | |||
238 | static const struct dax_operations pmem_dax_ops = { | ||
239 | .direct_access = pmem_dax_direct_access, | ||
240 | }; | ||
241 | |||
230 | static void pmem_release_queue(void *q) | 242 | static void pmem_release_queue(void *q) |
231 | { | 243 | { |
232 | blk_cleanup_queue(q); | 244 | blk_cleanup_queue(q); |
@@ -237,10 +249,14 @@ static void pmem_freeze_queue(void *q) | |||
237 | blk_freeze_queue_start(q); | 249 | blk_freeze_queue_start(q); |
238 | } | 250 | } |
239 | 251 | ||
240 | static void pmem_release_disk(void *disk) | 252 | static void pmem_release_disk(void *__pmem) |
241 | { | 253 | { |
242 | del_gendisk(disk); | 254 | struct pmem_device *pmem = __pmem; |
243 | put_disk(disk); | 255 | |
256 | kill_dax(pmem->dax_dev); | ||
257 | put_dax(pmem->dax_dev); | ||
258 | del_gendisk(pmem->disk); | ||
259 | put_disk(pmem->disk); | ||
244 | } | 260 | } |
245 | 261 | ||
246 | static int pmem_attach_disk(struct device *dev, | 262 | static int pmem_attach_disk(struct device *dev, |
@@ -251,6 +267,7 @@ static int pmem_attach_disk(struct device *dev, | |||
251 | struct vmem_altmap __altmap, *altmap = NULL; | 267 | struct vmem_altmap __altmap, *altmap = NULL; |
252 | struct resource *res = &nsio->res; | 268 | struct resource *res = &nsio->res; |
253 | struct nd_pfn *nd_pfn = NULL; | 269 | struct nd_pfn *nd_pfn = NULL; |
270 | struct dax_device *dax_dev; | ||
254 | int nid = dev_to_node(dev); | 271 | int nid = dev_to_node(dev); |
255 | struct nd_pfn_sb *pfn_sb; | 272 | struct nd_pfn_sb *pfn_sb; |
256 | struct pmem_device *pmem; | 273 | struct pmem_device *pmem; |
@@ -334,6 +351,7 @@ static int pmem_attach_disk(struct device *dev, | |||
334 | disk = alloc_disk_node(0, nid); | 351 | disk = alloc_disk_node(0, nid); |
335 | if (!disk) | 352 | if (!disk) |
336 | return -ENOMEM; | 353 | return -ENOMEM; |
354 | pmem->disk = disk; | ||
337 | 355 | ||
338 | disk->fops = &pmem_fops; | 356 | disk->fops = &pmem_fops; |
339 | disk->queue = q; | 357 | disk->queue = q; |
@@ -345,9 +363,16 @@ static int pmem_attach_disk(struct device *dev, | |||
345 | return -ENOMEM; | 363 | return -ENOMEM; |
346 | nvdimm_badblocks_populate(nd_region, &pmem->bb, res); | 364 | nvdimm_badblocks_populate(nd_region, &pmem->bb, res); |
347 | disk->bb = &pmem->bb; | 365 | disk->bb = &pmem->bb; |
348 | device_add_disk(dev, disk); | ||
349 | 366 | ||
350 | if (devm_add_action_or_reset(dev, pmem_release_disk, disk)) | 367 | dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops); |
368 | if (!dax_dev) { | ||
369 | put_disk(disk); | ||
370 | return -ENOMEM; | ||
371 | } | ||
372 | pmem->dax_dev = dax_dev; | ||
373 | |||
374 | device_add_disk(dev, disk); | ||
375 | if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) | ||
351 | return -ENOMEM; | 376 | return -ENOMEM; |
352 | 377 | ||
353 | revalidate_disk(disk); | 378 | revalidate_disk(disk); |
@@ -397,12 +422,12 @@ static void nd_pmem_shutdown(struct device *dev) | |||
397 | 422 | ||
398 | static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) | 423 | static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) |
399 | { | 424 | { |
400 | struct pmem_device *pmem = dev_get_drvdata(dev); | 425 | struct nd_region *nd_region; |
401 | struct nd_region *nd_region = to_region(pmem); | ||
402 | resource_size_t offset = 0, end_trunc = 0; | 426 | resource_size_t offset = 0, end_trunc = 0; |
403 | struct nd_namespace_common *ndns; | 427 | struct nd_namespace_common *ndns; |
404 | struct nd_namespace_io *nsio; | 428 | struct nd_namespace_io *nsio; |
405 | struct resource res; | 429 | struct resource res; |
430 | struct badblocks *bb; | ||
406 | 431 | ||
407 | if (event != NVDIMM_REVALIDATE_POISON) | 432 | if (event != NVDIMM_REVALIDATE_POISON) |
408 | return; | 433 | return; |
@@ -411,20 +436,33 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) | |||
411 | struct nd_btt *nd_btt = to_nd_btt(dev); | 436 | struct nd_btt *nd_btt = to_nd_btt(dev); |
412 | 437 | ||
413 | ndns = nd_btt->ndns; | 438 | ndns = nd_btt->ndns; |
414 | } else if (is_nd_pfn(dev)) { | 439 | nd_region = to_nd_region(ndns->dev.parent); |
415 | struct nd_pfn *nd_pfn = to_nd_pfn(dev); | 440 | nsio = to_nd_namespace_io(&ndns->dev); |
416 | struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; | 441 | bb = &nsio->bb; |
442 | } else { | ||
443 | struct pmem_device *pmem = dev_get_drvdata(dev); | ||
417 | 444 | ||
418 | ndns = nd_pfn->ndns; | 445 | nd_region = to_region(pmem); |
419 | offset = pmem->data_offset + __le32_to_cpu(pfn_sb->start_pad); | 446 | bb = &pmem->bb; |
420 | end_trunc = __le32_to_cpu(pfn_sb->end_trunc); | 447 | |
421 | } else | 448 | if (is_nd_pfn(dev)) { |
422 | ndns = to_ndns(dev); | 449 | struct nd_pfn *nd_pfn = to_nd_pfn(dev); |
450 | struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; | ||
451 | |||
452 | ndns = nd_pfn->ndns; | ||
453 | offset = pmem->data_offset + | ||
454 | __le32_to_cpu(pfn_sb->start_pad); | ||
455 | end_trunc = __le32_to_cpu(pfn_sb->end_trunc); | ||
456 | } else { | ||
457 | ndns = to_ndns(dev); | ||
458 | } | ||
459 | |||
460 | nsio = to_nd_namespace_io(&ndns->dev); | ||
461 | } | ||
423 | 462 | ||
424 | nsio = to_nd_namespace_io(&ndns->dev); | ||
425 | res.start = nsio->res.start + offset; | 463 | res.start = nsio->res.start + offset; |
426 | res.end = nsio->res.end - end_trunc; | 464 | res.end = nsio->res.end - end_trunc; |
427 | nvdimm_badblocks_populate(nd_region, &pmem->bb, &res); | 465 | nvdimm_badblocks_populate(nd_region, bb, &res); |
428 | } | 466 | } |
429 | 467 | ||
430 | MODULE_ALIAS("pmem"); | 468 | MODULE_ALIAS("pmem"); |
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h index b4ee4f71b4a1..7f4dbd72a90a 100644 --- a/drivers/nvdimm/pmem.h +++ b/drivers/nvdimm/pmem.h | |||
@@ -5,8 +5,6 @@ | |||
5 | #include <linux/pfn_t.h> | 5 | #include <linux/pfn_t.h> |
6 | #include <linux/fs.h> | 6 | #include <linux/fs.h> |
7 | 7 | ||
8 | long pmem_direct_access(struct block_device *bdev, sector_t sector, | ||
9 | void **kaddr, pfn_t *pfn, long size); | ||
10 | /* this definition is in it's own header for tools/testing/nvdimm to consume */ | 8 | /* this definition is in it's own header for tools/testing/nvdimm to consume */ |
11 | struct pmem_device { | 9 | struct pmem_device { |
12 | /* One contiguous memory region per device */ | 10 | /* One contiguous memory region per device */ |
@@ -20,5 +18,10 @@ struct pmem_device { | |||
20 | /* trim size when namespace capacity has been section aligned */ | 18 | /* trim size when namespace capacity has been section aligned */ |
21 | u32 pfn_pad; | 19 | u32 pfn_pad; |
22 | struct badblocks bb; | 20 | struct badblocks bb; |
21 | struct dax_device *dax_dev; | ||
22 | struct gendisk *disk; | ||
23 | }; | 23 | }; |
24 | |||
25 | long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, | ||
26 | long nr_pages, void **kaddr, pfn_t *pfn); | ||
24 | #endif /* __NVDIMM_PMEM_H__ */ | 27 | #endif /* __NVDIMM_PMEM_H__ */ |
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 8f241772ec0b..869a886c292e 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/device.h> | 15 | #include <linux/device.h> |
16 | #include <linux/nd.h> | 16 | #include <linux/nd.h> |
17 | #include "nd-core.h" | ||
17 | #include "nd.h" | 18 | #include "nd.h" |
18 | 19 | ||
19 | static int nd_region_probe(struct device *dev) | 20 | static int nd_region_probe(struct device *dev) |
@@ -52,6 +53,17 @@ static int nd_region_probe(struct device *dev) | |||
52 | if (rc && err && rc == err) | 53 | if (rc && err && rc == err) |
53 | return -ENODEV; | 54 | return -ENODEV; |
54 | 55 | ||
56 | if (is_nd_pmem(&nd_region->dev)) { | ||
57 | struct resource ndr_res; | ||
58 | |||
59 | if (devm_init_badblocks(dev, &nd_region->bb)) | ||
60 | return -ENODEV; | ||
61 | ndr_res.start = nd_region->ndr_start; | ||
62 | ndr_res.end = nd_region->ndr_start + nd_region->ndr_size - 1; | ||
63 | nvdimm_badblocks_populate(nd_region, | ||
64 | &nd_region->bb, &ndr_res); | ||
65 | } | ||
66 | |||
55 | nd_region->btt_seed = nd_btt_create(nd_region); | 67 | nd_region->btt_seed = nd_btt_create(nd_region); |
56 | nd_region->pfn_seed = nd_pfn_create(nd_region); | 68 | nd_region->pfn_seed = nd_pfn_create(nd_region); |
57 | nd_region->dax_seed = nd_dax_create(nd_region); | 69 | nd_region->dax_seed = nd_dax_create(nd_region); |
@@ -104,6 +116,18 @@ static int child_notify(struct device *dev, void *data) | |||
104 | 116 | ||
105 | static void nd_region_notify(struct device *dev, enum nvdimm_event event) | 117 | static void nd_region_notify(struct device *dev, enum nvdimm_event event) |
106 | { | 118 | { |
119 | if (event == NVDIMM_REVALIDATE_POISON) { | ||
120 | struct nd_region *nd_region = to_nd_region(dev); | ||
121 | struct resource res; | ||
122 | |||
123 | if (is_nd_pmem(&nd_region->dev)) { | ||
124 | res.start = nd_region->ndr_start; | ||
125 | res.end = nd_region->ndr_start + | ||
126 | nd_region->ndr_size - 1; | ||
127 | nvdimm_badblocks_populate(nd_region, | ||
128 | &nd_region->bb, &res); | ||
129 | } | ||
130 | } | ||
107 | device_for_each_child(dev, &event, child_notify); | 131 | device_for_each_child(dev, &event, child_notify); |
108 | } | 132 | } |
109 | 133 | ||
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index b7cb5066d961..b550edf2571f 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c | |||
@@ -222,7 +222,7 @@ int nd_region_to_nstype(struct nd_region *nd_region) | |||
222 | struct nd_mapping *nd_mapping = &nd_region->mapping[i]; | 222 | struct nd_mapping *nd_mapping = &nd_region->mapping[i]; |
223 | struct nvdimm *nvdimm = nd_mapping->nvdimm; | 223 | struct nvdimm *nvdimm = nd_mapping->nvdimm; |
224 | 224 | ||
225 | if (nvdimm->flags & NDD_ALIASING) | 225 | if (test_bit(NDD_ALIASING, &nvdimm->flags)) |
226 | alias++; | 226 | alias++; |
227 | } | 227 | } |
228 | if (alias) | 228 | if (alias) |
@@ -255,6 +255,35 @@ static ssize_t size_show(struct device *dev, | |||
255 | } | 255 | } |
256 | static DEVICE_ATTR_RO(size); | 256 | static DEVICE_ATTR_RO(size); |
257 | 257 | ||
258 | static ssize_t deep_flush_show(struct device *dev, | ||
259 | struct device_attribute *attr, char *buf) | ||
260 | { | ||
261 | struct nd_region *nd_region = to_nd_region(dev); | ||
262 | |||
263 | /* | ||
264 | * NOTE: in the nvdimm_has_flush() error case this attribute is | ||
265 | * not visible. | ||
266 | */ | ||
267 | return sprintf(buf, "%d\n", nvdimm_has_flush(nd_region)); | ||
268 | } | ||
269 | |||
270 | static ssize_t deep_flush_store(struct device *dev, struct device_attribute *attr, | ||
271 | const char *buf, size_t len) | ||
272 | { | ||
273 | bool flush; | ||
274 | int rc = strtobool(buf, &flush); | ||
275 | struct nd_region *nd_region = to_nd_region(dev); | ||
276 | |||
277 | if (rc) | ||
278 | return rc; | ||
279 | if (!flush) | ||
280 | return -EINVAL; | ||
281 | nvdimm_flush(nd_region); | ||
282 | |||
283 | return len; | ||
284 | } | ||
285 | static DEVICE_ATTR_RW(deep_flush); | ||
286 | |||
258 | static ssize_t mappings_show(struct device *dev, | 287 | static ssize_t mappings_show(struct device *dev, |
259 | struct device_attribute *attr, char *buf) | 288 | struct device_attribute *attr, char *buf) |
260 | { | 289 | { |
@@ -448,6 +477,25 @@ static ssize_t read_only_store(struct device *dev, | |||
448 | } | 477 | } |
449 | static DEVICE_ATTR_RW(read_only); | 478 | static DEVICE_ATTR_RW(read_only); |
450 | 479 | ||
480 | static ssize_t region_badblocks_show(struct device *dev, | ||
481 | struct device_attribute *attr, char *buf) | ||
482 | { | ||
483 | struct nd_region *nd_region = to_nd_region(dev); | ||
484 | |||
485 | return badblocks_show(&nd_region->bb, buf, 0); | ||
486 | } | ||
487 | |||
488 | static DEVICE_ATTR(badblocks, 0444, region_badblocks_show, NULL); | ||
489 | |||
490 | static ssize_t resource_show(struct device *dev, | ||
491 | struct device_attribute *attr, char *buf) | ||
492 | { | ||
493 | struct nd_region *nd_region = to_nd_region(dev); | ||
494 | |||
495 | return sprintf(buf, "%#llx\n", nd_region->ndr_start); | ||
496 | } | ||
497 | static DEVICE_ATTR_RO(resource); | ||
498 | |||
451 | static struct attribute *nd_region_attributes[] = { | 499 | static struct attribute *nd_region_attributes[] = { |
452 | &dev_attr_size.attr, | 500 | &dev_attr_size.attr, |
453 | &dev_attr_nstype.attr, | 501 | &dev_attr_nstype.attr, |
@@ -455,11 +503,14 @@ static struct attribute *nd_region_attributes[] = { | |||
455 | &dev_attr_btt_seed.attr, | 503 | &dev_attr_btt_seed.attr, |
456 | &dev_attr_pfn_seed.attr, | 504 | &dev_attr_pfn_seed.attr, |
457 | &dev_attr_dax_seed.attr, | 505 | &dev_attr_dax_seed.attr, |
506 | &dev_attr_deep_flush.attr, | ||
458 | &dev_attr_read_only.attr, | 507 | &dev_attr_read_only.attr, |
459 | &dev_attr_set_cookie.attr, | 508 | &dev_attr_set_cookie.attr, |
460 | &dev_attr_available_size.attr, | 509 | &dev_attr_available_size.attr, |
461 | &dev_attr_namespace_seed.attr, | 510 | &dev_attr_namespace_seed.attr, |
462 | &dev_attr_init_namespaces.attr, | 511 | &dev_attr_init_namespaces.attr, |
512 | &dev_attr_badblocks.attr, | ||
513 | &dev_attr_resource.attr, | ||
463 | NULL, | 514 | NULL, |
464 | }; | 515 | }; |
465 | 516 | ||
@@ -476,6 +527,23 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) | |||
476 | if (!is_nd_pmem(dev) && a == &dev_attr_dax_seed.attr) | 527 | if (!is_nd_pmem(dev) && a == &dev_attr_dax_seed.attr) |
477 | return 0; | 528 | return 0; |
478 | 529 | ||
530 | if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr) | ||
531 | return 0; | ||
532 | |||
533 | if (!is_nd_pmem(dev) && a == &dev_attr_resource.attr) | ||
534 | return 0; | ||
535 | |||
536 | if (a == &dev_attr_deep_flush.attr) { | ||
537 | int has_flush = nvdimm_has_flush(nd_region); | ||
538 | |||
539 | if (has_flush == 1) | ||
540 | return a->mode; | ||
541 | else if (has_flush == 0) | ||
542 | return 0444; | ||
543 | else | ||
544 | return 0; | ||
545 | } | ||
546 | |||
479 | if (a != &dev_attr_set_cookie.attr | 547 | if (a != &dev_attr_set_cookie.attr |
480 | && a != &dev_attr_available_size.attr) | 548 | && a != &dev_attr_available_size.attr) |
481 | return a->mode; | 549 | return a->mode; |
@@ -813,7 +881,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, | |||
813 | return NULL; | 881 | return NULL; |
814 | } | 882 | } |
815 | 883 | ||
816 | if (nvdimm->flags & NDD_UNARMED) | 884 | if (test_bit(NDD_UNARMED, &nvdimm->flags)) |
817 | ro = 1; | 885 | ro = 1; |
818 | } | 886 | } |
819 | 887 | ||
@@ -968,17 +1036,20 @@ EXPORT_SYMBOL_GPL(nvdimm_flush); | |||
968 | */ | 1036 | */ |
969 | int nvdimm_has_flush(struct nd_region *nd_region) | 1037 | int nvdimm_has_flush(struct nd_region *nd_region) |
970 | { | 1038 | { |
971 | struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev); | ||
972 | int i; | 1039 | int i; |
973 | 1040 | ||
974 | /* no nvdimm == flushing capability unknown */ | 1041 | /* no nvdimm == flushing capability unknown */ |
975 | if (nd_region->ndr_mappings == 0) | 1042 | if (nd_region->ndr_mappings == 0) |
976 | return -ENXIO; | 1043 | return -ENXIO; |
977 | 1044 | ||
978 | for (i = 0; i < nd_region->ndr_mappings; i++) | 1045 | for (i = 0; i < nd_region->ndr_mappings; i++) { |
979 | /* flush hints present, flushing required */ | 1046 | struct nd_mapping *nd_mapping = &nd_region->mapping[i]; |
980 | if (ndrd_get_flush_wpq(ndrd, i, 0)) | 1047 | struct nvdimm *nvdimm = nd_mapping->nvdimm; |
1048 | |||
1049 | /* flush hints present / available */ | ||
1050 | if (nvdimm->num_flush) | ||
981 | return 1; | 1051 | return 1; |
1052 | } | ||
982 | 1053 | ||
983 | /* | 1054 | /* |
984 | * The platform defines dimm devices without hints, assume | 1055 | * The platform defines dimm devices without hints, assume |
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index 4a3b62326183..0acb8c2f9475 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig | |||
@@ -14,6 +14,7 @@ config BLK_DEV_XPRAM | |||
14 | 14 | ||
15 | config DCSSBLK | 15 | config DCSSBLK |
16 | def_tristate m | 16 | def_tristate m |
17 | select DAX | ||
17 | prompt "DCSSBLK support" | 18 | prompt "DCSSBLK support" |
18 | depends on S390 && BLOCK | 19 | depends on S390 && BLOCK |
19 | help | 20 | help |
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 415d10a67b7a..36e5280af3e4 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/interrupt.h> | 18 | #include <linux/interrupt.h> |
19 | #include <linux/platform_device.h> | 19 | #include <linux/platform_device.h> |
20 | #include <linux/pfn_t.h> | 20 | #include <linux/pfn_t.h> |
21 | #include <linux/dax.h> | ||
21 | #include <asm/extmem.h> | 22 | #include <asm/extmem.h> |
22 | #include <asm/io.h> | 23 | #include <asm/io.h> |
23 | 24 | ||
@@ -30,8 +31,8 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode); | |||
30 | static void dcssblk_release(struct gendisk *disk, fmode_t mode); | 31 | static void dcssblk_release(struct gendisk *disk, fmode_t mode); |
31 | static blk_qc_t dcssblk_make_request(struct request_queue *q, | 32 | static blk_qc_t dcssblk_make_request(struct request_queue *q, |
32 | struct bio *bio); | 33 | struct bio *bio); |
33 | static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum, | 34 | static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, |
34 | void **kaddr, pfn_t *pfn, long size); | 35 | long nr_pages, void **kaddr, pfn_t *pfn); |
35 | 36 | ||
36 | static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; | 37 | static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; |
37 | 38 | ||
@@ -40,7 +41,10 @@ static const struct block_device_operations dcssblk_devops = { | |||
40 | .owner = THIS_MODULE, | 41 | .owner = THIS_MODULE, |
41 | .open = dcssblk_open, | 42 | .open = dcssblk_open, |
42 | .release = dcssblk_release, | 43 | .release = dcssblk_release, |
43 | .direct_access = dcssblk_direct_access, | 44 | }; |
45 | |||
46 | static const struct dax_operations dcssblk_dax_ops = { | ||
47 | .direct_access = dcssblk_dax_direct_access, | ||
44 | }; | 48 | }; |
45 | 49 | ||
46 | struct dcssblk_dev_info { | 50 | struct dcssblk_dev_info { |
@@ -57,6 +61,7 @@ struct dcssblk_dev_info { | |||
57 | struct request_queue *dcssblk_queue; | 61 | struct request_queue *dcssblk_queue; |
58 | int num_of_segments; | 62 | int num_of_segments; |
59 | struct list_head seg_list; | 63 | struct list_head seg_list; |
64 | struct dax_device *dax_dev; | ||
60 | }; | 65 | }; |
61 | 66 | ||
62 | struct segment_info { | 67 | struct segment_info { |
@@ -389,6 +394,8 @@ removeseg: | |||
389 | } | 394 | } |
390 | list_del(&dev_info->lh); | 395 | list_del(&dev_info->lh); |
391 | 396 | ||
397 | kill_dax(dev_info->dax_dev); | ||
398 | put_dax(dev_info->dax_dev); | ||
392 | del_gendisk(dev_info->gd); | 399 | del_gendisk(dev_info->gd); |
393 | blk_cleanup_queue(dev_info->dcssblk_queue); | 400 | blk_cleanup_queue(dev_info->dcssblk_queue); |
394 | dev_info->gd->queue = NULL; | 401 | dev_info->gd->queue = NULL; |
@@ -654,6 +661,13 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char | |||
654 | if (rc) | 661 | if (rc) |
655 | goto put_dev; | 662 | goto put_dev; |
656 | 663 | ||
664 | dev_info->dax_dev = alloc_dax(dev_info, dev_info->gd->disk_name, | ||
665 | &dcssblk_dax_ops); | ||
666 | if (!dev_info->dax_dev) { | ||
667 | rc = -ENOMEM; | ||
668 | goto put_dev; | ||
669 | } | ||
670 | |||
657 | get_device(&dev_info->dev); | 671 | get_device(&dev_info->dev); |
658 | device_add_disk(&dev_info->dev, dev_info->gd); | 672 | device_add_disk(&dev_info->dev, dev_info->gd); |
659 | 673 | ||
@@ -752,6 +766,8 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch | |||
752 | } | 766 | } |
753 | 767 | ||
754 | list_del(&dev_info->lh); | 768 | list_del(&dev_info->lh); |
769 | kill_dax(dev_info->dax_dev); | ||
770 | put_dax(dev_info->dax_dev); | ||
755 | del_gendisk(dev_info->gd); | 771 | del_gendisk(dev_info->gd); |
756 | blk_cleanup_queue(dev_info->dcssblk_queue); | 772 | blk_cleanup_queue(dev_info->dcssblk_queue); |
757 | dev_info->gd->queue = NULL; | 773 | dev_info->gd->queue = NULL; |
@@ -883,21 +899,26 @@ fail: | |||
883 | } | 899 | } |
884 | 900 | ||
885 | static long | 901 | static long |
886 | dcssblk_direct_access (struct block_device *bdev, sector_t secnum, | 902 | __dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff, |
887 | void **kaddr, pfn_t *pfn, long size) | 903 | long nr_pages, void **kaddr, pfn_t *pfn) |
888 | { | 904 | { |
889 | struct dcssblk_dev_info *dev_info; | 905 | resource_size_t offset = pgoff * PAGE_SIZE; |
890 | unsigned long offset, dev_sz; | 906 | unsigned long dev_sz; |
891 | 907 | ||
892 | dev_info = bdev->bd_disk->private_data; | ||
893 | if (!dev_info) | ||
894 | return -ENODEV; | ||
895 | dev_sz = dev_info->end - dev_info->start + 1; | 908 | dev_sz = dev_info->end - dev_info->start + 1; |
896 | offset = secnum * 512; | ||
897 | *kaddr = (void *) dev_info->start + offset; | 909 | *kaddr = (void *) dev_info->start + offset; |
898 | *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV); | 910 | *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV); |
899 | 911 | ||
900 | return dev_sz - offset; | 912 | return (dev_sz - offset) / PAGE_SIZE; |
913 | } | ||
914 | |||
915 | static long | ||
916 | dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, | ||
917 | long nr_pages, void **kaddr, pfn_t *pfn) | ||
918 | { | ||
919 | struct dcssblk_dev_info *dev_info = dax_get_private(dax_dev); | ||
920 | |||
921 | return __dcssblk_direct_access(dev_info, pgoff, nr_pages, kaddr, pfn); | ||
901 | } | 922 | } |
902 | 923 | ||
903 | static void | 924 | static void |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 0d435c794d76..2a305c1a2d88 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/blkpg.h> | 19 | #include <linux/blkpg.h> |
20 | #include <linux/magic.h> | 20 | #include <linux/magic.h> |
21 | #include <linux/dax.h> | ||
21 | #include <linux/buffer_head.h> | 22 | #include <linux/buffer_head.h> |
22 | #include <linux/swap.h> | 23 | #include <linux/swap.h> |
23 | #include <linux/pagevec.h> | 24 | #include <linux/pagevec.h> |
@@ -716,50 +717,18 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, | |||
716 | } | 717 | } |
717 | EXPORT_SYMBOL_GPL(bdev_write_page); | 718 | EXPORT_SYMBOL_GPL(bdev_write_page); |
718 | 719 | ||
719 | /** | 720 | int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, |
720 | * bdev_direct_access() - Get the address for directly-accessibly memory | 721 | pgoff_t *pgoff) |
721 | * @bdev: The device containing the memory | ||
722 | * @dax: control and output parameters for ->direct_access | ||
723 | * | ||
724 | * If a block device is made up of directly addressable memory, this function | ||
725 | * will tell the caller the PFN and the address of the memory. The address | ||
726 | * may be directly dereferenced within the kernel without the need to call | ||
727 | * ioremap(), kmap() or similar. The PFN is suitable for inserting into | ||
728 | * page tables. | ||
729 | * | ||
730 | * Return: negative errno if an error occurs, otherwise the number of bytes | ||
731 | * accessible at this address. | ||
732 | */ | ||
733 | long bdev_direct_access(struct block_device *bdev, struct blk_dax_ctl *dax) | ||
734 | { | 722 | { |
735 | sector_t sector = dax->sector; | 723 | phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512; |
736 | long avail, size = dax->size; | ||
737 | const struct block_device_operations *ops = bdev->bd_disk->fops; | ||
738 | 724 | ||
739 | /* | 725 | if (pgoff) |
740 | * The device driver is allowed to sleep, in order to make the | 726 | *pgoff = PHYS_PFN(phys_off); |
741 | * memory directly accessible. | 727 | if (phys_off % PAGE_SIZE || size % PAGE_SIZE) |
742 | */ | ||
743 | might_sleep(); | ||
744 | |||
745 | if (size < 0) | ||
746 | return size; | ||
747 | if (!blk_queue_dax(bdev_get_queue(bdev)) || !ops->direct_access) | ||
748 | return -EOPNOTSUPP; | ||
749 | if ((sector + DIV_ROUND_UP(size, 512)) > | ||
750 | part_nr_sects_read(bdev->bd_part)) | ||
751 | return -ERANGE; | ||
752 | sector += get_start_sect(bdev); | ||
753 | if (sector % (PAGE_SIZE / 512)) | ||
754 | return -EINVAL; | 728 | return -EINVAL; |
755 | avail = ops->direct_access(bdev, sector, &dax->addr, &dax->pfn, size); | 729 | return 0; |
756 | if (!avail) | ||
757 | return -ERANGE; | ||
758 | if (avail > 0 && avail & ~PAGE_MASK) | ||
759 | return -ENXIO; | ||
760 | return min(avail, size); | ||
761 | } | 730 | } |
762 | EXPORT_SYMBOL_GPL(bdev_direct_access); | 731 | EXPORT_SYMBOL(bdev_dax_pgoff); |
763 | 732 | ||
764 | /** | 733 | /** |
765 | * bdev_dax_supported() - Check if the device supports dax for filesystem | 734 | * bdev_dax_supported() - Check if the device supports dax for filesystem |
@@ -773,62 +742,46 @@ EXPORT_SYMBOL_GPL(bdev_direct_access); | |||
773 | */ | 742 | */ |
774 | int bdev_dax_supported(struct super_block *sb, int blocksize) | 743 | int bdev_dax_supported(struct super_block *sb, int blocksize) |
775 | { | 744 | { |
776 | struct blk_dax_ctl dax = { | 745 | struct block_device *bdev = sb->s_bdev; |
777 | .sector = 0, | 746 | struct dax_device *dax_dev; |
778 | .size = PAGE_SIZE, | 747 | pgoff_t pgoff; |
779 | }; | 748 | int err, id; |
780 | int err; | 749 | void *kaddr; |
750 | pfn_t pfn; | ||
751 | long len; | ||
781 | 752 | ||
782 | if (blocksize != PAGE_SIZE) { | 753 | if (blocksize != PAGE_SIZE) { |
783 | vfs_msg(sb, KERN_ERR, "error: unsupported blocksize for dax"); | 754 | vfs_msg(sb, KERN_ERR, "error: unsupported blocksize for dax"); |
784 | return -EINVAL; | 755 | return -EINVAL; |
785 | } | 756 | } |
786 | 757 | ||
787 | err = bdev_direct_access(sb->s_bdev, &dax); | 758 | err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff); |
788 | if (err < 0) { | 759 | if (err) { |
789 | switch (err) { | 760 | vfs_msg(sb, KERN_ERR, "error: unaligned partition for dax"); |
790 | case -EOPNOTSUPP: | ||
791 | vfs_msg(sb, KERN_ERR, | ||
792 | "error: device does not support dax"); | ||
793 | break; | ||
794 | case -EINVAL: | ||
795 | vfs_msg(sb, KERN_ERR, | ||
796 | "error: unaligned partition for dax"); | ||
797 | break; | ||
798 | default: | ||
799 | vfs_msg(sb, KERN_ERR, | ||
800 | "error: dax access failed (%d)", err); | ||
801 | } | ||
802 | return err; | 761 | return err; |
803 | } | 762 | } |
804 | 763 | ||
805 | return 0; | 764 | dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); |
806 | } | 765 | if (!dax_dev) { |
807 | EXPORT_SYMBOL_GPL(bdev_dax_supported); | 766 | vfs_msg(sb, KERN_ERR, "error: device does not support dax"); |
808 | 767 | return -EOPNOTSUPP; | |
809 | /** | 768 | } |
810 | * bdev_dax_capable() - Return if the raw device is capable for dax | ||
811 | * @bdev: The device for raw block device access | ||
812 | */ | ||
813 | bool bdev_dax_capable(struct block_device *bdev) | ||
814 | { | ||
815 | struct blk_dax_ctl dax = { | ||
816 | .size = PAGE_SIZE, | ||
817 | }; | ||
818 | 769 | ||
819 | if (!IS_ENABLED(CONFIG_FS_DAX)) | 770 | id = dax_read_lock(); |
820 | return false; | 771 | len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn); |
772 | dax_read_unlock(id); | ||
821 | 773 | ||
822 | dax.sector = 0; | 774 | put_dax(dax_dev); |
823 | if (bdev_direct_access(bdev, &dax) < 0) | ||
824 | return false; | ||
825 | 775 | ||
826 | dax.sector = bdev->bd_part->nr_sects - (PAGE_SIZE / 512); | 776 | if (len < 1) { |
827 | if (bdev_direct_access(bdev, &dax) < 0) | 777 | vfs_msg(sb, KERN_ERR, |
828 | return false; | 778 | "error: dax access failed (%ld)", len); |
779 | return len < 0 ? len : -EIO; | ||
780 | } | ||
829 | 781 | ||
830 | return true; | 782 | return 0; |
831 | } | 783 | } |
784 | EXPORT_SYMBOL_GPL(bdev_dax_supported); | ||
832 | 785 | ||
833 | /* | 786 | /* |
834 | * pseudo-fs | 787 | * pseudo-fs |
@@ -55,32 +55,6 @@ static int __init init_dax_wait_table(void) | |||
55 | } | 55 | } |
56 | fs_initcall(init_dax_wait_table); | 56 | fs_initcall(init_dax_wait_table); |
57 | 57 | ||
58 | static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax) | ||
59 | { | ||
60 | struct request_queue *q = bdev->bd_queue; | ||
61 | long rc = -EIO; | ||
62 | |||
63 | dax->addr = ERR_PTR(-EIO); | ||
64 | if (blk_queue_enter(q, true) != 0) | ||
65 | return rc; | ||
66 | |||
67 | rc = bdev_direct_access(bdev, dax); | ||
68 | if (rc < 0) { | ||
69 | dax->addr = ERR_PTR(rc); | ||
70 | blk_queue_exit(q); | ||
71 | return rc; | ||
72 | } | ||
73 | return rc; | ||
74 | } | ||
75 | |||
76 | static void dax_unmap_atomic(struct block_device *bdev, | ||
77 | const struct blk_dax_ctl *dax) | ||
78 | { | ||
79 | if (IS_ERR(dax->addr)) | ||
80 | return; | ||
81 | blk_queue_exit(bdev->bd_queue); | ||
82 | } | ||
83 | |||
84 | static int dax_is_pmd_entry(void *entry) | 58 | static int dax_is_pmd_entry(void *entry) |
85 | { | 59 | { |
86 | return (unsigned long)entry & RADIX_DAX_PMD; | 60 | return (unsigned long)entry & RADIX_DAX_PMD; |
@@ -101,26 +75,6 @@ static int dax_is_empty_entry(void *entry) | |||
101 | return (unsigned long)entry & RADIX_DAX_EMPTY; | 75 | return (unsigned long)entry & RADIX_DAX_EMPTY; |
102 | } | 76 | } |
103 | 77 | ||
104 | struct page *read_dax_sector(struct block_device *bdev, sector_t n) | ||
105 | { | ||
106 | struct page *page = alloc_pages(GFP_KERNEL, 0); | ||
107 | struct blk_dax_ctl dax = { | ||
108 | .size = PAGE_SIZE, | ||
109 | .sector = n & ~((((int) PAGE_SIZE) / 512) - 1), | ||
110 | }; | ||
111 | long rc; | ||
112 | |||
113 | if (!page) | ||
114 | return ERR_PTR(-ENOMEM); | ||
115 | |||
116 | rc = dax_map_atomic(bdev, &dax); | ||
117 | if (rc < 0) | ||
118 | return ERR_PTR(rc); | ||
119 | memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE); | ||
120 | dax_unmap_atomic(bdev, &dax); | ||
121 | return page; | ||
122 | } | ||
123 | |||
124 | /* | 78 | /* |
125 | * DAX radix tree locking | 79 | * DAX radix tree locking |
126 | */ | 80 | */ |
@@ -582,21 +536,30 @@ static int dax_load_hole(struct address_space *mapping, void **entry, | |||
582 | return ret; | 536 | return ret; |
583 | } | 537 | } |
584 | 538 | ||
585 | static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size, | 539 | static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev, |
586 | struct page *to, unsigned long vaddr) | 540 | sector_t sector, size_t size, struct page *to, |
541 | unsigned long vaddr) | ||
587 | { | 542 | { |
588 | struct blk_dax_ctl dax = { | 543 | void *vto, *kaddr; |
589 | .sector = sector, | 544 | pgoff_t pgoff; |
590 | .size = size, | 545 | pfn_t pfn; |
591 | }; | 546 | long rc; |
592 | void *vto; | 547 | int id; |
593 | 548 | ||
594 | if (dax_map_atomic(bdev, &dax) < 0) | 549 | rc = bdev_dax_pgoff(bdev, sector, size, &pgoff); |
595 | return PTR_ERR(dax.addr); | 550 | if (rc) |
551 | return rc; | ||
552 | |||
553 | id = dax_read_lock(); | ||
554 | rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn); | ||
555 | if (rc < 0) { | ||
556 | dax_read_unlock(id); | ||
557 | return rc; | ||
558 | } | ||
596 | vto = kmap_atomic(to); | 559 | vto = kmap_atomic(to); |
597 | copy_user_page(vto, (void __force *)dax.addr, vaddr, to); | 560 | copy_user_page(vto, (void __force *)kaddr, vaddr, to); |
598 | kunmap_atomic(vto); | 561 | kunmap_atomic(vto); |
599 | dax_unmap_atomic(bdev, &dax); | 562 | dax_read_unlock(id); |
600 | return 0; | 563 | return 0; |
601 | } | 564 | } |
602 | 565 | ||
@@ -764,12 +727,16 @@ unlock_pte: | |||
764 | } | 727 | } |
765 | 728 | ||
766 | static int dax_writeback_one(struct block_device *bdev, | 729 | static int dax_writeback_one(struct block_device *bdev, |
767 | struct address_space *mapping, pgoff_t index, void *entry) | 730 | struct dax_device *dax_dev, struct address_space *mapping, |
731 | pgoff_t index, void *entry) | ||
768 | { | 732 | { |
769 | struct radix_tree_root *page_tree = &mapping->page_tree; | 733 | struct radix_tree_root *page_tree = &mapping->page_tree; |
770 | struct blk_dax_ctl dax; | 734 | void *entry2, **slot, *kaddr; |
771 | void *entry2, **slot; | 735 | long ret = 0, id; |
772 | int ret = 0; | 736 | sector_t sector; |
737 | pgoff_t pgoff; | ||
738 | size_t size; | ||
739 | pfn_t pfn; | ||
773 | 740 | ||
774 | /* | 741 | /* |
775 | * A page got tagged dirty in DAX mapping? Something is seriously | 742 | * A page got tagged dirty in DAX mapping? Something is seriously |
@@ -818,26 +785,29 @@ static int dax_writeback_one(struct block_device *bdev, | |||
818 | * 'entry'. This allows us to flush for PMD_SIZE and not have to | 785 | * 'entry'. This allows us to flush for PMD_SIZE and not have to |
819 | * worry about partial PMD writebacks. | 786 | * worry about partial PMD writebacks. |
820 | */ | 787 | */ |
821 | dax.sector = dax_radix_sector(entry); | 788 | sector = dax_radix_sector(entry); |
822 | dax.size = PAGE_SIZE << dax_radix_order(entry); | 789 | size = PAGE_SIZE << dax_radix_order(entry); |
790 | |||
791 | id = dax_read_lock(); | ||
792 | ret = bdev_dax_pgoff(bdev, sector, size, &pgoff); | ||
793 | if (ret) | ||
794 | goto dax_unlock; | ||
823 | 795 | ||
824 | /* | 796 | /* |
825 | * We cannot hold tree_lock while calling dax_map_atomic() because it | 797 | * dax_direct_access() may sleep, so cannot hold tree_lock over |
826 | * eventually calls cond_resched(). | 798 | * its invocation. |
827 | */ | 799 | */ |
828 | ret = dax_map_atomic(bdev, &dax); | 800 | ret = dax_direct_access(dax_dev, pgoff, size / PAGE_SIZE, &kaddr, &pfn); |
829 | if (ret < 0) { | 801 | if (ret < 0) |
830 | put_locked_mapping_entry(mapping, index, entry); | 802 | goto dax_unlock; |
831 | return ret; | ||
832 | } | ||
833 | 803 | ||
834 | if (WARN_ON_ONCE(ret < dax.size)) { | 804 | if (WARN_ON_ONCE(ret < size / PAGE_SIZE)) { |
835 | ret = -EIO; | 805 | ret = -EIO; |
836 | goto unmap; | 806 | goto dax_unlock; |
837 | } | 807 | } |
838 | 808 | ||
839 | dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(dax.pfn)); | 809 | dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn)); |
840 | wb_cache_pmem(dax.addr, dax.size); | 810 | wb_cache_pmem(kaddr, size); |
841 | /* | 811 | /* |
842 | * After we have flushed the cache, we can clear the dirty tag. There | 812 | * After we have flushed the cache, we can clear the dirty tag. There |
843 | * cannot be new dirty data in the pfn after the flush has completed as | 813 | * cannot be new dirty data in the pfn after the flush has completed as |
@@ -847,8 +817,8 @@ static int dax_writeback_one(struct block_device *bdev, | |||
847 | spin_lock_irq(&mapping->tree_lock); | 817 | spin_lock_irq(&mapping->tree_lock); |
848 | radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY); | 818 | radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY); |
849 | spin_unlock_irq(&mapping->tree_lock); | 819 | spin_unlock_irq(&mapping->tree_lock); |
850 | unmap: | 820 | dax_unlock: |
851 | dax_unmap_atomic(bdev, &dax); | 821 | dax_read_unlock(id); |
852 | put_locked_mapping_entry(mapping, index, entry); | 822 | put_locked_mapping_entry(mapping, index, entry); |
853 | return ret; | 823 | return ret; |
854 | 824 | ||
@@ -869,6 +839,7 @@ int dax_writeback_mapping_range(struct address_space *mapping, | |||
869 | struct inode *inode = mapping->host; | 839 | struct inode *inode = mapping->host; |
870 | pgoff_t start_index, end_index; | 840 | pgoff_t start_index, end_index; |
871 | pgoff_t indices[PAGEVEC_SIZE]; | 841 | pgoff_t indices[PAGEVEC_SIZE]; |
842 | struct dax_device *dax_dev; | ||
872 | struct pagevec pvec; | 843 | struct pagevec pvec; |
873 | bool done = false; | 844 | bool done = false; |
874 | int i, ret = 0; | 845 | int i, ret = 0; |
@@ -879,6 +850,10 @@ int dax_writeback_mapping_range(struct address_space *mapping, | |||
879 | if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL) | 850 | if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL) |
880 | return 0; | 851 | return 0; |
881 | 852 | ||
853 | dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); | ||
854 | if (!dax_dev) | ||
855 | return -EIO; | ||
856 | |||
882 | start_index = wbc->range_start >> PAGE_SHIFT; | 857 | start_index = wbc->range_start >> PAGE_SHIFT; |
883 | end_index = wbc->range_end >> PAGE_SHIFT; | 858 | end_index = wbc->range_end >> PAGE_SHIFT; |
884 | 859 | ||
@@ -899,38 +874,49 @@ int dax_writeback_mapping_range(struct address_space *mapping, | |||
899 | break; | 874 | break; |
900 | } | 875 | } |
901 | 876 | ||
902 | ret = dax_writeback_one(bdev, mapping, indices[i], | 877 | ret = dax_writeback_one(bdev, dax_dev, mapping, |
903 | pvec.pages[i]); | 878 | indices[i], pvec.pages[i]); |
904 | if (ret < 0) | 879 | if (ret < 0) { |
880 | put_dax(dax_dev); | ||
905 | return ret; | 881 | return ret; |
882 | } | ||
906 | } | 883 | } |
907 | } | 884 | } |
885 | put_dax(dax_dev); | ||
908 | return 0; | 886 | return 0; |
909 | } | 887 | } |
910 | EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); | 888 | EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); |
911 | 889 | ||
912 | static int dax_insert_mapping(struct address_space *mapping, | 890 | static int dax_insert_mapping(struct address_space *mapping, |
913 | struct block_device *bdev, sector_t sector, size_t size, | 891 | struct block_device *bdev, struct dax_device *dax_dev, |
914 | void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf) | 892 | sector_t sector, size_t size, void **entryp, |
893 | struct vm_area_struct *vma, struct vm_fault *vmf) | ||
915 | { | 894 | { |
916 | unsigned long vaddr = vmf->address; | 895 | unsigned long vaddr = vmf->address; |
917 | struct blk_dax_ctl dax = { | ||
918 | .sector = sector, | ||
919 | .size = size, | ||
920 | }; | ||
921 | void *ret; | ||
922 | void *entry = *entryp; | 896 | void *entry = *entryp; |
897 | void *ret, *kaddr; | ||
898 | pgoff_t pgoff; | ||
899 | int id, rc; | ||
900 | pfn_t pfn; | ||
923 | 901 | ||
924 | if (dax_map_atomic(bdev, &dax) < 0) | 902 | rc = bdev_dax_pgoff(bdev, sector, size, &pgoff); |
925 | return PTR_ERR(dax.addr); | 903 | if (rc) |
926 | dax_unmap_atomic(bdev, &dax); | 904 | return rc; |
927 | 905 | ||
928 | ret = dax_insert_mapping_entry(mapping, vmf, entry, dax.sector, 0); | 906 | id = dax_read_lock(); |
907 | rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn); | ||
908 | if (rc < 0) { | ||
909 | dax_read_unlock(id); | ||
910 | return rc; | ||
911 | } | ||
912 | dax_read_unlock(id); | ||
913 | |||
914 | ret = dax_insert_mapping_entry(mapping, vmf, entry, sector, 0); | ||
929 | if (IS_ERR(ret)) | 915 | if (IS_ERR(ret)) |
930 | return PTR_ERR(ret); | 916 | return PTR_ERR(ret); |
931 | *entryp = ret; | 917 | *entryp = ret; |
932 | 918 | ||
933 | return vm_insert_mixed(vma, vaddr, dax.pfn); | 919 | return vm_insert_mixed(vma, vaddr, pfn); |
934 | } | 920 | } |
935 | 921 | ||
936 | /** | 922 | /** |
@@ -979,24 +965,34 @@ static bool dax_range_is_aligned(struct block_device *bdev, | |||
979 | return true; | 965 | return true; |
980 | } | 966 | } |
981 | 967 | ||
982 | int __dax_zero_page_range(struct block_device *bdev, sector_t sector, | 968 | int __dax_zero_page_range(struct block_device *bdev, |
983 | unsigned int offset, unsigned int length) | 969 | struct dax_device *dax_dev, sector_t sector, |
970 | unsigned int offset, unsigned int size) | ||
984 | { | 971 | { |
985 | struct blk_dax_ctl dax = { | 972 | if (dax_range_is_aligned(bdev, offset, size)) { |
986 | .sector = sector, | 973 | sector_t start_sector = sector + (offset >> 9); |
987 | .size = PAGE_SIZE, | ||
988 | }; | ||
989 | |||
990 | if (dax_range_is_aligned(bdev, offset, length)) { | ||
991 | sector_t start_sector = dax.sector + (offset >> 9); | ||
992 | 974 | ||
993 | return blkdev_issue_zeroout(bdev, start_sector, | 975 | return blkdev_issue_zeroout(bdev, start_sector, |
994 | length >> 9, GFP_NOFS, 0); | 976 | size >> 9, GFP_NOFS, 0); |
995 | } else { | 977 | } else { |
996 | if (dax_map_atomic(bdev, &dax) < 0) | 978 | pgoff_t pgoff; |
997 | return PTR_ERR(dax.addr); | 979 | long rc, id; |
998 | clear_pmem(dax.addr + offset, length); | 980 | void *kaddr; |
999 | dax_unmap_atomic(bdev, &dax); | 981 | pfn_t pfn; |
982 | |||
983 | rc = bdev_dax_pgoff(bdev, sector, size, &pgoff); | ||
984 | if (rc) | ||
985 | return rc; | ||
986 | |||
987 | id = dax_read_lock(); | ||
988 | rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, | ||
989 | &pfn); | ||
990 | if (rc < 0) { | ||
991 | dax_read_unlock(id); | ||
992 | return rc; | ||
993 | } | ||
994 | clear_pmem(kaddr + offset, size); | ||
995 | dax_read_unlock(id); | ||
1000 | } | 996 | } |
1001 | return 0; | 997 | return 0; |
1002 | } | 998 | } |
@@ -1011,9 +1007,12 @@ static loff_t | |||
1011 | dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, | 1007 | dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, |
1012 | struct iomap *iomap) | 1008 | struct iomap *iomap) |
1013 | { | 1009 | { |
1010 | struct block_device *bdev = iomap->bdev; | ||
1011 | struct dax_device *dax_dev = iomap->dax_dev; | ||
1014 | struct iov_iter *iter = data; | 1012 | struct iov_iter *iter = data; |
1015 | loff_t end = pos + length, done = 0; | 1013 | loff_t end = pos + length, done = 0; |
1016 | ssize_t ret = 0; | 1014 | ssize_t ret = 0; |
1015 | int id; | ||
1017 | 1016 | ||
1018 | if (iov_iter_rw(iter) == READ) { | 1017 | if (iov_iter_rw(iter) == READ) { |
1019 | end = min(end, i_size_read(inode)); | 1018 | end = min(end, i_size_read(inode)); |
@@ -1038,34 +1037,42 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, | |||
1038 | (end - 1) >> PAGE_SHIFT); | 1037 | (end - 1) >> PAGE_SHIFT); |
1039 | } | 1038 | } |
1040 | 1039 | ||
1040 | id = dax_read_lock(); | ||
1041 | while (pos < end) { | 1041 | while (pos < end) { |
1042 | unsigned offset = pos & (PAGE_SIZE - 1); | 1042 | unsigned offset = pos & (PAGE_SIZE - 1); |
1043 | struct blk_dax_ctl dax = { 0 }; | 1043 | const size_t size = ALIGN(length + offset, PAGE_SIZE); |
1044 | const sector_t sector = dax_iomap_sector(iomap, pos); | ||
1044 | ssize_t map_len; | 1045 | ssize_t map_len; |
1046 | pgoff_t pgoff; | ||
1047 | void *kaddr; | ||
1048 | pfn_t pfn; | ||
1045 | 1049 | ||
1046 | if (fatal_signal_pending(current)) { | 1050 | if (fatal_signal_pending(current)) { |
1047 | ret = -EINTR; | 1051 | ret = -EINTR; |
1048 | break; | 1052 | break; |
1049 | } | 1053 | } |
1050 | 1054 | ||
1051 | dax.sector = dax_iomap_sector(iomap, pos); | 1055 | ret = bdev_dax_pgoff(bdev, sector, size, &pgoff); |
1052 | dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK; | 1056 | if (ret) |
1053 | map_len = dax_map_atomic(iomap->bdev, &dax); | 1057 | break; |
1058 | |||
1059 | map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), | ||
1060 | &kaddr, &pfn); | ||
1054 | if (map_len < 0) { | 1061 | if (map_len < 0) { |
1055 | ret = map_len; | 1062 | ret = map_len; |
1056 | break; | 1063 | break; |
1057 | } | 1064 | } |
1058 | 1065 | ||
1059 | dax.addr += offset; | 1066 | map_len = PFN_PHYS(map_len); |
1067 | kaddr += offset; | ||
1060 | map_len -= offset; | 1068 | map_len -= offset; |
1061 | if (map_len > end - pos) | 1069 | if (map_len > end - pos) |
1062 | map_len = end - pos; | 1070 | map_len = end - pos; |
1063 | 1071 | ||
1064 | if (iov_iter_rw(iter) == WRITE) | 1072 | if (iov_iter_rw(iter) == WRITE) |
1065 | map_len = copy_from_iter_pmem(dax.addr, map_len, iter); | 1073 | map_len = copy_from_iter_pmem(kaddr, map_len, iter); |
1066 | else | 1074 | else |
1067 | map_len = copy_to_iter(dax.addr, map_len, iter); | 1075 | map_len = copy_to_iter(kaddr, map_len, iter); |
1068 | dax_unmap_atomic(iomap->bdev, &dax); | ||
1069 | if (map_len <= 0) { | 1076 | if (map_len <= 0) { |
1070 | ret = map_len ? map_len : -EFAULT; | 1077 | ret = map_len ? map_len : -EFAULT; |
1071 | break; | 1078 | break; |
@@ -1075,6 +1082,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, | |||
1075 | length -= map_len; | 1082 | length -= map_len; |
1076 | done += map_len; | 1083 | done += map_len; |
1077 | } | 1084 | } |
1085 | dax_read_unlock(id); | ||
1078 | 1086 | ||
1079 | return done ? done : ret; | 1087 | return done ? done : ret; |
1080 | } | 1088 | } |
@@ -1181,8 +1189,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, | |||
1181 | clear_user_highpage(vmf->cow_page, vaddr); | 1189 | clear_user_highpage(vmf->cow_page, vaddr); |
1182 | break; | 1190 | break; |
1183 | case IOMAP_MAPPED: | 1191 | case IOMAP_MAPPED: |
1184 | error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE, | 1192 | error = copy_user_dax(iomap.bdev, iomap.dax_dev, |
1185 | vmf->cow_page, vaddr); | 1193 | sector, PAGE_SIZE, vmf->cow_page, vaddr); |
1186 | break; | 1194 | break; |
1187 | default: | 1195 | default: |
1188 | WARN_ON_ONCE(1); | 1196 | WARN_ON_ONCE(1); |
@@ -1207,8 +1215,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, | |||
1207 | mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT); | 1215 | mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT); |
1208 | major = VM_FAULT_MAJOR; | 1216 | major = VM_FAULT_MAJOR; |
1209 | } | 1217 | } |
1210 | error = dax_insert_mapping(mapping, iomap.bdev, sector, | 1218 | error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev, |
1211 | PAGE_SIZE, &entry, vmf->vma, vmf); | 1219 | sector, PAGE_SIZE, &entry, vmf->vma, vmf); |
1212 | /* -EBUSY is fine, somebody else faulted on the same PTE */ | 1220 | /* -EBUSY is fine, somebody else faulted on the same PTE */ |
1213 | if (error == -EBUSY) | 1221 | if (error == -EBUSY) |
1214 | error = 0; | 1222 | error = 0; |
@@ -1258,41 +1266,48 @@ static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap, | |||
1258 | loff_t pos, void **entryp) | 1266 | loff_t pos, void **entryp) |
1259 | { | 1267 | { |
1260 | struct address_space *mapping = vmf->vma->vm_file->f_mapping; | 1268 | struct address_space *mapping = vmf->vma->vm_file->f_mapping; |
1269 | const sector_t sector = dax_iomap_sector(iomap, pos); | ||
1270 | struct dax_device *dax_dev = iomap->dax_dev; | ||
1261 | struct block_device *bdev = iomap->bdev; | 1271 | struct block_device *bdev = iomap->bdev; |
1262 | struct inode *inode = mapping->host; | 1272 | struct inode *inode = mapping->host; |
1263 | struct blk_dax_ctl dax = { | 1273 | const size_t size = PMD_SIZE; |
1264 | .sector = dax_iomap_sector(iomap, pos), | 1274 | void *ret = NULL, *kaddr; |
1265 | .size = PMD_SIZE, | 1275 | long length = 0; |
1266 | }; | 1276 | pgoff_t pgoff; |
1267 | long length = dax_map_atomic(bdev, &dax); | 1277 | pfn_t pfn; |
1268 | void *ret = NULL; | 1278 | int id; |
1269 | 1279 | ||
1270 | if (length < 0) /* dax_map_atomic() failed */ | 1280 | if (bdev_dax_pgoff(bdev, sector, size, &pgoff) != 0) |
1271 | goto fallback; | 1281 | goto fallback; |
1272 | if (length < PMD_SIZE) | ||
1273 | goto unmap_fallback; | ||
1274 | if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR) | ||
1275 | goto unmap_fallback; | ||
1276 | if (!pfn_t_devmap(dax.pfn)) | ||
1277 | goto unmap_fallback; | ||
1278 | |||
1279 | dax_unmap_atomic(bdev, &dax); | ||
1280 | 1282 | ||
1281 | ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector, | 1283 | id = dax_read_lock(); |
1284 | length = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn); | ||
1285 | if (length < 0) | ||
1286 | goto unlock_fallback; | ||
1287 | length = PFN_PHYS(length); | ||
1288 | |||
1289 | if (length < size) | ||
1290 | goto unlock_fallback; | ||
1291 | if (pfn_t_to_pfn(pfn) & PG_PMD_COLOUR) | ||
1292 | goto unlock_fallback; | ||
1293 | if (!pfn_t_devmap(pfn)) | ||
1294 | goto unlock_fallback; | ||
1295 | dax_read_unlock(id); | ||
1296 | |||
1297 | ret = dax_insert_mapping_entry(mapping, vmf, *entryp, sector, | ||
1282 | RADIX_DAX_PMD); | 1298 | RADIX_DAX_PMD); |
1283 | if (IS_ERR(ret)) | 1299 | if (IS_ERR(ret)) |
1284 | goto fallback; | 1300 | goto fallback; |
1285 | *entryp = ret; | 1301 | *entryp = ret; |
1286 | 1302 | ||
1287 | trace_dax_pmd_insert_mapping(inode, vmf, length, dax.pfn, ret); | 1303 | trace_dax_pmd_insert_mapping(inode, vmf, length, pfn, ret); |
1288 | return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, | 1304 | return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, |
1289 | dax.pfn, vmf->flags & FAULT_FLAG_WRITE); | 1305 | pfn, vmf->flags & FAULT_FLAG_WRITE); |
1290 | 1306 | ||
1291 | unmap_fallback: | 1307 | unlock_fallback: |
1292 | dax_unmap_atomic(bdev, &dax); | 1308 | dax_read_unlock(id); |
1293 | fallback: | 1309 | fallback: |
1294 | trace_dax_pmd_insert_mapping_fallback(inode, vmf, length, | 1310 | trace_dax_pmd_insert_mapping_fallback(inode, vmf, length, pfn, ret); |
1295 | dax.pfn, ret); | ||
1296 | return VM_FAULT_FALLBACK; | 1311 | return VM_FAULT_FALLBACK; |
1297 | } | 1312 | } |
1298 | 1313 | ||
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 3a38c1b84e3c..26d77f9f8c12 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -799,6 +799,7 @@ int ext2_get_block(struct inode *inode, sector_t iblock, | |||
799 | static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, | 799 | static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, |
800 | unsigned flags, struct iomap *iomap) | 800 | unsigned flags, struct iomap *iomap) |
801 | { | 801 | { |
802 | struct block_device *bdev; | ||
802 | unsigned int blkbits = inode->i_blkbits; | 803 | unsigned int blkbits = inode->i_blkbits; |
803 | unsigned long first_block = offset >> blkbits; | 804 | unsigned long first_block = offset >> blkbits; |
804 | unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits; | 805 | unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits; |
@@ -812,8 +813,13 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, | |||
812 | return ret; | 813 | return ret; |
813 | 814 | ||
814 | iomap->flags = 0; | 815 | iomap->flags = 0; |
815 | iomap->bdev = inode->i_sb->s_bdev; | 816 | bdev = inode->i_sb->s_bdev; |
817 | iomap->bdev = bdev; | ||
816 | iomap->offset = (u64)first_block << blkbits; | 818 | iomap->offset = (u64)first_block << blkbits; |
819 | if (blk_queue_dax(bdev->bd_queue)) | ||
820 | iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); | ||
821 | else | ||
822 | iomap->dax_dev = NULL; | ||
817 | 823 | ||
818 | if (ret == 0) { | 824 | if (ret == 0) { |
819 | iomap->type = IOMAP_HOLE; | 825 | iomap->type = IOMAP_HOLE; |
@@ -835,6 +841,7 @@ static int | |||
835 | ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length, | 841 | ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length, |
836 | ssize_t written, unsigned flags, struct iomap *iomap) | 842 | ssize_t written, unsigned flags, struct iomap *iomap) |
837 | { | 843 | { |
844 | put_dax(iomap->dax_dev); | ||
838 | if (iomap->type == IOMAP_MAPPED && | 845 | if (iomap->type == IOMAP_MAPPED && |
839 | written < length && | 846 | written < length && |
840 | (flags & IOMAP_WRITE)) | 847 | (flags & IOMAP_WRITE)) |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 10b574ab354b..f0729b0705c7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -3305,6 +3305,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
3305 | static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, | 3305 | static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, |
3306 | unsigned flags, struct iomap *iomap) | 3306 | unsigned flags, struct iomap *iomap) |
3307 | { | 3307 | { |
3308 | struct block_device *bdev; | ||
3308 | unsigned int blkbits = inode->i_blkbits; | 3309 | unsigned int blkbits = inode->i_blkbits; |
3309 | unsigned long first_block = offset >> blkbits; | 3310 | unsigned long first_block = offset >> blkbits; |
3310 | unsigned long last_block = (offset + length - 1) >> blkbits; | 3311 | unsigned long last_block = (offset + length - 1) >> blkbits; |
@@ -3373,7 +3374,12 @@ retry: | |||
3373 | } | 3374 | } |
3374 | 3375 | ||
3375 | iomap->flags = 0; | 3376 | iomap->flags = 0; |
3376 | iomap->bdev = inode->i_sb->s_bdev; | 3377 | bdev = inode->i_sb->s_bdev; |
3378 | iomap->bdev = bdev; | ||
3379 | if (blk_queue_dax(bdev->bd_queue)) | ||
3380 | iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); | ||
3381 | else | ||
3382 | iomap->dax_dev = NULL; | ||
3377 | iomap->offset = first_block << blkbits; | 3383 | iomap->offset = first_block << blkbits; |
3378 | 3384 | ||
3379 | if (ret == 0) { | 3385 | if (ret == 0) { |
@@ -3406,6 +3412,7 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, | |||
3406 | int blkbits = inode->i_blkbits; | 3412 | int blkbits = inode->i_blkbits; |
3407 | bool truncate = false; | 3413 | bool truncate = false; |
3408 | 3414 | ||
3415 | put_dax(iomap->dax_dev); | ||
3409 | if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT)) | 3416 | if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT)) |
3410 | return 0; | 3417 | return 0; |
3411 | 3418 | ||
diff --git a/fs/iomap.c b/fs/iomap.c index 1c25ae30500e..4add7d4ad006 100644 --- a/fs/iomap.c +++ b/fs/iomap.c | |||
@@ -360,7 +360,8 @@ static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes, | |||
360 | sector_t sector = iomap->blkno + | 360 | sector_t sector = iomap->blkno + |
361 | (((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9); | 361 | (((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9); |
362 | 362 | ||
363 | return __dax_zero_page_range(iomap->bdev, sector, offset, bytes); | 363 | return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, sector, |
364 | offset, bytes); | ||
364 | } | 365 | } |
365 | 366 | ||
366 | static loff_t | 367 | static loff_t |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 288ee5b840d7..4b47403f8089 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -976,6 +976,7 @@ xfs_file_iomap_begin( | |||
976 | int nimaps = 1, error = 0; | 976 | int nimaps = 1, error = 0; |
977 | bool shared = false, trimmed = false; | 977 | bool shared = false, trimmed = false; |
978 | unsigned lockmode; | 978 | unsigned lockmode; |
979 | struct block_device *bdev; | ||
979 | 980 | ||
980 | if (XFS_FORCED_SHUTDOWN(mp)) | 981 | if (XFS_FORCED_SHUTDOWN(mp)) |
981 | return -EIO; | 982 | return -EIO; |
@@ -1063,6 +1064,14 @@ xfs_file_iomap_begin( | |||
1063 | } | 1064 | } |
1064 | 1065 | ||
1065 | xfs_bmbt_to_iomap(ip, iomap, &imap); | 1066 | xfs_bmbt_to_iomap(ip, iomap, &imap); |
1067 | |||
1068 | /* optionally associate a dax device with the iomap bdev */ | ||
1069 | bdev = iomap->bdev; | ||
1070 | if (blk_queue_dax(bdev->bd_queue)) | ||
1071 | iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); | ||
1072 | else | ||
1073 | iomap->dax_dev = NULL; | ||
1074 | |||
1066 | if (shared) | 1075 | if (shared) |
1067 | iomap->flags |= IOMAP_F_SHARED; | 1076 | iomap->flags |= IOMAP_F_SHARED; |
1068 | return 0; | 1077 | return 0; |
@@ -1140,6 +1149,7 @@ xfs_file_iomap_end( | |||
1140 | unsigned flags, | 1149 | unsigned flags, |
1141 | struct iomap *iomap) | 1150 | struct iomap *iomap) |
1142 | { | 1151 | { |
1152 | put_dax(iomap->dax_dev); | ||
1143 | if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) | 1153 | if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) |
1144 | return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, | 1154 | return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, |
1145 | length, written, iomap); | 1155 | length, written, iomap); |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 83d28623645f..5493a66dc710 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -1923,28 +1923,12 @@ static inline bool integrity_req_gap_front_merge(struct request *req, | |||
1923 | 1923 | ||
1924 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ | 1924 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ |
1925 | 1925 | ||
1926 | /** | ||
1927 | * struct blk_dax_ctl - control and output parameters for ->direct_access | ||
1928 | * @sector: (input) offset relative to a block_device | ||
1929 | * @addr: (output) kernel virtual address for @sector populated by driver | ||
1930 | * @pfn: (output) page frame number for @addr populated by driver | ||
1931 | * @size: (input) number of bytes requested | ||
1932 | */ | ||
1933 | struct blk_dax_ctl { | ||
1934 | sector_t sector; | ||
1935 | void *addr; | ||
1936 | long size; | ||
1937 | pfn_t pfn; | ||
1938 | }; | ||
1939 | |||
1940 | struct block_device_operations { | 1926 | struct block_device_operations { |
1941 | int (*open) (struct block_device *, fmode_t); | 1927 | int (*open) (struct block_device *, fmode_t); |
1942 | void (*release) (struct gendisk *, fmode_t); | 1928 | void (*release) (struct gendisk *, fmode_t); |
1943 | int (*rw_page)(struct block_device *, sector_t, struct page *, bool); | 1929 | int (*rw_page)(struct block_device *, sector_t, struct page *, bool); |
1944 | int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); | 1930 | int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); |
1945 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); | 1931 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); |
1946 | long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *, | ||
1947 | long); | ||
1948 | unsigned int (*check_events) (struct gendisk *disk, | 1932 | unsigned int (*check_events) (struct gendisk *disk, |
1949 | unsigned int clearing); | 1933 | unsigned int clearing); |
1950 | /* ->media_changed() is DEPRECATED, use ->check_events() instead */ | 1934 | /* ->media_changed() is DEPRECATED, use ->check_events() instead */ |
@@ -1963,9 +1947,8 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, | |||
1963 | extern int bdev_read_page(struct block_device *, sector_t, struct page *); | 1947 | extern int bdev_read_page(struct block_device *, sector_t, struct page *); |
1964 | extern int bdev_write_page(struct block_device *, sector_t, struct page *, | 1948 | extern int bdev_write_page(struct block_device *, sector_t, struct page *, |
1965 | struct writeback_control *); | 1949 | struct writeback_control *); |
1966 | extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *); | ||
1967 | extern int bdev_dax_supported(struct super_block *, int); | 1950 | extern int bdev_dax_supported(struct super_block *, int); |
1968 | extern bool bdev_dax_capable(struct block_device *); | 1951 | int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); |
1969 | #else /* CONFIG_BLOCK */ | 1952 | #else /* CONFIG_BLOCK */ |
1970 | 1953 | ||
1971 | struct block_device; | 1954 | struct block_device; |
diff --git a/include/linux/dax.h b/include/linux/dax.h index d8a3dc042e1c..d3158e74a59e 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h | |||
@@ -7,6 +7,28 @@ | |||
7 | #include <asm/pgtable.h> | 7 | #include <asm/pgtable.h> |
8 | 8 | ||
9 | struct iomap_ops; | 9 | struct iomap_ops; |
10 | struct dax_device; | ||
11 | struct dax_operations { | ||
12 | /* | ||
13 | * direct_access: translate a device-relative | ||
14 | * logical-page-offset into an absolute physical pfn. Return the | ||
15 | * number of pages available for DAX at that pfn. | ||
16 | */ | ||
17 | long (*direct_access)(struct dax_device *, pgoff_t, long, | ||
18 | void **, pfn_t *); | ||
19 | }; | ||
20 | |||
21 | int dax_read_lock(void); | ||
22 | void dax_read_unlock(int id); | ||
23 | struct dax_device *dax_get_by_host(const char *host); | ||
24 | struct dax_device *alloc_dax(void *private, const char *host, | ||
25 | const struct dax_operations *ops); | ||
26 | void put_dax(struct dax_device *dax_dev); | ||
27 | bool dax_alive(struct dax_device *dax_dev); | ||
28 | void kill_dax(struct dax_device *dax_dev); | ||
29 | void *dax_get_private(struct dax_device *dax_dev); | ||
30 | long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, | ||
31 | void **kaddr, pfn_t *pfn); | ||
10 | 32 | ||
11 | /* | 33 | /* |
12 | * We use lowest available bit in exceptional entry for locking, one bit for | 34 | * We use lowest available bit in exceptional entry for locking, one bit for |
@@ -48,17 +70,13 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping, | |||
48 | pgoff_t index, void *entry, bool wake_all); | 70 | pgoff_t index, void *entry, bool wake_all); |
49 | 71 | ||
50 | #ifdef CONFIG_FS_DAX | 72 | #ifdef CONFIG_FS_DAX |
51 | struct page *read_dax_sector(struct block_device *bdev, sector_t n); | 73 | int __dax_zero_page_range(struct block_device *bdev, |
52 | int __dax_zero_page_range(struct block_device *bdev, sector_t sector, | 74 | struct dax_device *dax_dev, sector_t sector, |
53 | unsigned int offset, unsigned int length); | 75 | unsigned int offset, unsigned int length); |
54 | #else | 76 | #else |
55 | static inline struct page *read_dax_sector(struct block_device *bdev, | ||
56 | sector_t n) | ||
57 | { | ||
58 | return ERR_PTR(-ENXIO); | ||
59 | } | ||
60 | static inline int __dax_zero_page_range(struct block_device *bdev, | 77 | static inline int __dax_zero_page_range(struct block_device *bdev, |
61 | sector_t sector, unsigned int offset, unsigned int length) | 78 | struct dax_device *dax_dev, sector_t sector, |
79 | unsigned int offset, unsigned int length) | ||
62 | { | 80 | { |
63 | return -ENXIO; | 81 | return -ENXIO; |
64 | } | 82 | } |
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 78ad0624cdae..f4c639c0c362 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h | |||
@@ -130,13 +130,15 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); | |||
130 | * < 0 : error | 130 | * < 0 : error |
131 | * >= 0 : the number of bytes accessible at the address | 131 | * >= 0 : the number of bytes accessible at the address |
132 | */ | 132 | */ |
133 | typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector, | 133 | typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, |
134 | void **kaddr, pfn_t *pfn, long size); | 134 | long nr_pages, void **kaddr, pfn_t *pfn); |
135 | #define PAGE_SECTORS (PAGE_SIZE / 512) | ||
135 | 136 | ||
136 | void dm_error(const char *message); | 137 | void dm_error(const char *message); |
137 | 138 | ||
138 | struct dm_dev { | 139 | struct dm_dev { |
139 | struct block_device *bdev; | 140 | struct block_device *bdev; |
141 | struct dax_device *dax_dev; | ||
140 | fmode_t mode; | 142 | fmode_t mode; |
141 | char name[16]; | 143 | char name[16]; |
142 | }; | 144 | }; |
@@ -178,7 +180,7 @@ struct target_type { | |||
178 | dm_busy_fn busy; | 180 | dm_busy_fn busy; |
179 | dm_iterate_devices_fn iterate_devices; | 181 | dm_iterate_devices_fn iterate_devices; |
180 | dm_io_hints_fn io_hints; | 182 | dm_io_hints_fn io_hints; |
181 | dm_direct_access_fn direct_access; | 183 | dm_dax_direct_access_fn direct_access; |
182 | 184 | ||
183 | /* For internal device-mapper use. */ | 185 | /* For internal device-mapper use. */ |
184 | struct list_head list; | 186 | struct list_head list; |
diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 7291810067eb..f753e788da31 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h | |||
@@ -41,6 +41,7 @@ struct iomap { | |||
41 | u16 type; /* type of mapping */ | 41 | u16 type; /* type of mapping */ |
42 | u16 flags; /* flags for mapping */ | 42 | u16 flags; /* flags for mapping */ |
43 | struct block_device *bdev; /* block device for I/O */ | 43 | struct block_device *bdev; /* block device for I/O */ |
44 | struct dax_device *dax_dev; /* dax_dev for dax operations */ | ||
44 | }; | 45 | }; |
45 | 46 | ||
46 | /* | 47 | /* |
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 77e7af32543f..6c807017128d 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h | |||
@@ -20,9 +20,11 @@ | |||
20 | 20 | ||
21 | enum { | 21 | enum { |
22 | /* when a dimm supports both PMEM and BLK access a label is required */ | 22 | /* when a dimm supports both PMEM and BLK access a label is required */ |
23 | NDD_ALIASING = 1 << 0, | 23 | NDD_ALIASING = 0, |
24 | /* unarmed memory devices may not persist writes */ | 24 | /* unarmed memory devices may not persist writes */ |
25 | NDD_UNARMED = 1 << 1, | 25 | NDD_UNARMED = 1, |
26 | /* locked memory devices should not be accessed */ | ||
27 | NDD_LOCKED = 2, | ||
26 | 28 | ||
27 | /* need to set a limit somewhere, but yes, this is likely overkill */ | 29 | /* need to set a limit somewhere, but yes, this is likely overkill */ |
28 | ND_IOCTL_MAX_BUFLEN = SZ_4M, | 30 | ND_IOCTL_MAX_BUFLEN = SZ_4M, |
@@ -120,7 +122,7 @@ static inline struct nd_blk_region_desc *to_blk_region_desc( | |||
120 | } | 122 | } |
121 | 123 | ||
122 | int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length); | 124 | int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length); |
123 | void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus, | 125 | void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, |
124 | phys_addr_t start, unsigned int len); | 126 | phys_addr_t start, unsigned int len); |
125 | struct nvdimm_bus *nvdimm_bus_register(struct device *parent, | 127 | struct nvdimm_bus *nvdimm_bus_register(struct device *parent, |
126 | struct nvdimm_bus_descriptor *nfit_desc); | 128 | struct nvdimm_bus_descriptor *nfit_desc); |
diff --git a/include/linux/pmem.h b/include/linux/pmem.h index e856c2cb0fe8..71ecf3d46aac 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h | |||
@@ -31,12 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) | |||
31 | BUG(); | 31 | BUG(); |
32 | } | 32 | } |
33 | 33 | ||
34 | static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) | ||
35 | { | ||
36 | BUG(); | ||
37 | return -EFAULT; | ||
38 | } | ||
39 | |||
40 | static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, | 34 | static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, |
41 | struct iov_iter *i) | 35 | struct iov_iter *i) |
42 | { | 36 | { |
@@ -65,23 +59,6 @@ static inline bool arch_has_pmem_api(void) | |||
65 | return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); | 59 | return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); |
66 | } | 60 | } |
67 | 61 | ||
68 | /* | ||
69 | * memcpy_from_pmem - read from persistent memory with error handling | ||
70 | * @dst: destination buffer | ||
71 | * @src: source buffer | ||
72 | * @size: transfer length | ||
73 | * | ||
74 | * Returns 0 on success negative error code on failure. | ||
75 | */ | ||
76 | static inline int memcpy_from_pmem(void *dst, void const *src, size_t size) | ||
77 | { | ||
78 | if (arch_has_pmem_api()) | ||
79 | return arch_memcpy_from_pmem(dst, src, size); | ||
80 | else | ||
81 | memcpy(dst, src, size); | ||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | /** | 62 | /** |
86 | * memcpy_to_pmem - copy data to persistent memory | 63 | * memcpy_to_pmem - copy data to persistent memory |
87 | * @dst: destination buffer for the copy | 64 | * @dst: destination buffer for the copy |
diff --git a/include/linux/string.h b/include/linux/string.h index c4011b28f3d8..537918f8a98e 100644 --- a/include/linux/string.h +++ b/include/linux/string.h | |||
@@ -114,6 +114,14 @@ extern int memcmp(const void *,const void *,__kernel_size_t); | |||
114 | #ifndef __HAVE_ARCH_MEMCHR | 114 | #ifndef __HAVE_ARCH_MEMCHR |
115 | extern void * memchr(const void *,int,__kernel_size_t); | 115 | extern void * memchr(const void *,int,__kernel_size_t); |
116 | #endif | 116 | #endif |
117 | #ifndef __HAVE_ARCH_MEMCPY_MCSAFE | ||
118 | static inline __must_check int memcpy_mcsafe(void *dst, const void *src, | ||
119 | size_t cnt) | ||
120 | { | ||
121 | memcpy(dst, src, cnt); | ||
122 | return 0; | ||
123 | } | ||
124 | #endif | ||
117 | void *memchr_inv(const void *s, int c, size_t n); | 125 | void *memchr_inv(const void *s, int c, size_t n); |
118 | char *strreplace(char *s, char old, char new); | 126 | char *strreplace(char *s, char old, char new); |
119 | 127 | ||
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index ede5c6a62164..7ad3863cb88b 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h | |||
@@ -169,6 +169,7 @@ enum { | |||
169 | enum { | 169 | enum { |
170 | ND_ARS_VOLATILE = 1, | 170 | ND_ARS_VOLATILE = 1, |
171 | ND_ARS_PERSISTENT = 2, | 171 | ND_ARS_PERSISTENT = 2, |
172 | ND_CONFIG_LOCKED = 1, | ||
172 | }; | 173 | }; |
173 | 174 | ||
174 | static inline const char *nvdimm_bus_cmd_name(unsigned cmd) | 175 | static inline const char *nvdimm_bus_cmd_name(unsigned cmd) |
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 405212be044a..d870520da68b 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild | |||
@@ -28,7 +28,10 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o | |||
28 | obj-$(CONFIG_ND_BLK) += nd_blk.o | 28 | obj-$(CONFIG_ND_BLK) += nd_blk.o |
29 | obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o | 29 | obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o |
30 | obj-$(CONFIG_ACPI_NFIT) += nfit.o | 30 | obj-$(CONFIG_ACPI_NFIT) += nfit.o |
31 | obj-$(CONFIG_DEV_DAX) += dax.o | 31 | ifeq ($(CONFIG_DAX),m) |
32 | obj-$(CONFIG_DAX) += dax.o | ||
33 | endif | ||
34 | obj-$(CONFIG_DEV_DAX) += device_dax.o | ||
32 | obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o | 35 | obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o |
33 | 36 | ||
34 | nfit-y := $(ACPI_SRC)/core.o | 37 | nfit-y := $(ACPI_SRC)/core.o |
@@ -48,9 +51,13 @@ nd_blk-y += config_check.o | |||
48 | nd_e820-y := $(NVDIMM_SRC)/e820.o | 51 | nd_e820-y := $(NVDIMM_SRC)/e820.o |
49 | nd_e820-y += config_check.o | 52 | nd_e820-y += config_check.o |
50 | 53 | ||
51 | dax-y := $(DAX_SRC)/dax.o | 54 | dax-y := $(DAX_SRC)/super.o |
52 | dax-y += config_check.o | 55 | dax-y += config_check.o |
53 | 56 | ||
57 | device_dax-y := $(DAX_SRC)/device.o | ||
58 | device_dax-y += dax-dev.o | ||
59 | device_dax-y += config_check.o | ||
60 | |||
54 | dax_pmem-y := $(DAX_SRC)/pmem.o | 61 | dax_pmem-y := $(DAX_SRC)/pmem.o |
55 | dax_pmem-y += config_check.o | 62 | dax_pmem-y += config_check.o |
56 | 63 | ||
diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c new file mode 100644 index 000000000000..36ee3d8797c3 --- /dev/null +++ b/tools/testing/nvdimm/dax-dev.c | |||
@@ -0,0 +1,49 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, Intel Corporation. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | #include "test/nfit_test.h" | ||
14 | #include <linux/mm.h> | ||
15 | #include "../../../drivers/dax/dax-private.h" | ||
16 | |||
17 | phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, | ||
18 | unsigned long size) | ||
19 | { | ||
20 | struct resource *res; | ||
21 | phys_addr_t addr; | ||
22 | int i; | ||
23 | |||
24 | for (i = 0; i < dev_dax->num_resources; i++) { | ||
25 | res = &dev_dax->res[i]; | ||
26 | addr = pgoff * PAGE_SIZE + res->start; | ||
27 | if (addr >= res->start && addr <= res->end) | ||
28 | break; | ||
29 | pgoff -= PHYS_PFN(resource_size(res)); | ||
30 | } | ||
31 | |||
32 | if (i < dev_dax->num_resources) { | ||
33 | res = &dev_dax->res[i]; | ||
34 | if (addr + size - 1 <= res->end) { | ||
35 | if (get_nfit_res(addr)) { | ||
36 | struct page *page; | ||
37 | |||
38 | if (dev_dax->region->align > PAGE_SIZE) | ||
39 | return -1; | ||
40 | |||
41 | page = vmalloc_to_page((void *)addr); | ||
42 | return PFN_PHYS(page_to_pfn(page)); | ||
43 | } else | ||
44 | return addr; | ||
45 | } | ||
46 | } | ||
47 | |||
48 | return -1; | ||
49 | } | ||
diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c index c9b8c48f85fc..b53596ad601b 100644 --- a/tools/testing/nvdimm/pmem-dax.c +++ b/tools/testing/nvdimm/pmem-dax.c | |||
@@ -15,13 +15,13 @@ | |||
15 | #include <pmem.h> | 15 | #include <pmem.h> |
16 | #include <nd.h> | 16 | #include <nd.h> |
17 | 17 | ||
18 | long pmem_direct_access(struct block_device *bdev, sector_t sector, | 18 | long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, |
19 | void **kaddr, pfn_t *pfn, long size) | 19 | long nr_pages, void **kaddr, pfn_t *pfn) |
20 | { | 20 | { |
21 | struct pmem_device *pmem = bdev->bd_queue->queuedata; | 21 | resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; |
22 | resource_size_t offset = sector * 512 + pmem->data_offset; | ||
23 | 22 | ||
24 | if (unlikely(is_bad_pmem(&pmem->bb, sector, size))) | 23 | if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512, |
24 | PFN_PHYS(nr_pages)))) | ||
25 | return -EIO; | 25 | return -EIO; |
26 | 26 | ||
27 | /* | 27 | /* |
@@ -34,11 +34,10 @@ long pmem_direct_access(struct block_device *bdev, sector_t sector, | |||
34 | *kaddr = pmem->virt_addr + offset; | 34 | *kaddr = pmem->virt_addr + offset; |
35 | page = vmalloc_to_page(pmem->virt_addr + offset); | 35 | page = vmalloc_to_page(pmem->virt_addr + offset); |
36 | *pfn = page_to_pfn_t(page); | 36 | *pfn = page_to_pfn_t(page); |
37 | dev_dbg_ratelimited(disk_to_dev(bdev->bd_disk)->parent, | 37 | pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n", |
38 | "%s: sector: %#llx pfn: %#lx\n", __func__, | 38 | __func__, pmem, pgoff, page_to_pfn(page)); |
39 | (unsigned long long) sector, page_to_pfn(page)); | ||
40 | 39 | ||
41 | return PAGE_SIZE; | 40 | return 1; |
42 | } | 41 | } |
43 | 42 | ||
44 | *kaddr = pmem->virt_addr + offset; | 43 | *kaddr = pmem->virt_addr + offset; |
@@ -49,6 +48,6 @@ long pmem_direct_access(struct block_device *bdev, sector_t sector, | |||
49 | * requested range. | 48 | * requested range. |
50 | */ | 49 | */ |
51 | if (unlikely(pmem->bb.count)) | 50 | if (unlikely(pmem->bb.count)) |
52 | return size; | 51 | return nr_pages; |
53 | return pmem->size - pmem->pfn_pad - offset; | 52 | return PHYS_PFN(pmem->size - pmem->pfn_pad - offset); |
54 | } | 53 | } |
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 798f17655433..c2187178fb13 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c | |||
@@ -132,6 +132,7 @@ static u32 handle[] = { | |||
132 | [3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1), | 132 | [3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1), |
133 | [4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0), | 133 | [4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0), |
134 | [5] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 0), | 134 | [5] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 0), |
135 | [6] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 1), | ||
135 | }; | 136 | }; |
136 | 137 | ||
137 | static unsigned long dimm_fail_cmd_flags[NUM_DCR]; | 138 | static unsigned long dimm_fail_cmd_flags[NUM_DCR]; |
@@ -728,8 +729,8 @@ static int nfit_test0_alloc(struct nfit_test *t) | |||
728 | static int nfit_test1_alloc(struct nfit_test *t) | 729 | static int nfit_test1_alloc(struct nfit_test *t) |
729 | { | 730 | { |
730 | size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2 | 731 | size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2 |
731 | + sizeof(struct acpi_nfit_memory_map) | 732 | + sizeof(struct acpi_nfit_memory_map) * 2 |
732 | + offsetof(struct acpi_nfit_control_region, window_size); | 733 | + offsetof(struct acpi_nfit_control_region, window_size) * 2; |
733 | int i; | 734 | int i; |
734 | 735 | ||
735 | t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); | 736 | t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); |
@@ -906,6 +907,7 @@ static void nfit_test0_setup(struct nfit_test *t) | |||
906 | memdev->address = 0; | 907 | memdev->address = 0; |
907 | memdev->interleave_index = 0; | 908 | memdev->interleave_index = 0; |
908 | memdev->interleave_ways = 2; | 909 | memdev->interleave_ways = 2; |
910 | memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; | ||
909 | 911 | ||
910 | /* mem-region2 (spa1, dimm0) */ | 912 | /* mem-region2 (spa1, dimm0) */ |
911 | memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2; | 913 | memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2; |
@@ -921,6 +923,7 @@ static void nfit_test0_setup(struct nfit_test *t) | |||
921 | memdev->address = SPA0_SIZE/2; | 923 | memdev->address = SPA0_SIZE/2; |
922 | memdev->interleave_index = 0; | 924 | memdev->interleave_index = 0; |
923 | memdev->interleave_ways = 4; | 925 | memdev->interleave_ways = 4; |
926 | memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; | ||
924 | 927 | ||
925 | /* mem-region3 (spa1, dimm1) */ | 928 | /* mem-region3 (spa1, dimm1) */ |
926 | memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3; | 929 | memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3; |
@@ -951,6 +954,7 @@ static void nfit_test0_setup(struct nfit_test *t) | |||
951 | memdev->address = SPA0_SIZE/2; | 954 | memdev->address = SPA0_SIZE/2; |
952 | memdev->interleave_index = 0; | 955 | memdev->interleave_index = 0; |
953 | memdev->interleave_ways = 4; | 956 | memdev->interleave_ways = 4; |
957 | memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; | ||
954 | 958 | ||
955 | /* mem-region5 (spa1, dimm3) */ | 959 | /* mem-region5 (spa1, dimm3) */ |
956 | memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5; | 960 | memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5; |
@@ -1086,6 +1090,7 @@ static void nfit_test0_setup(struct nfit_test *t) | |||
1086 | memdev->address = 0; | 1090 | memdev->address = 0; |
1087 | memdev->interleave_index = 0; | 1091 | memdev->interleave_index = 0; |
1088 | memdev->interleave_ways = 1; | 1092 | memdev->interleave_ways = 1; |
1093 | memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; | ||
1089 | 1094 | ||
1090 | offset = offset + sizeof(struct acpi_nfit_memory_map) * 14; | 1095 | offset = offset + sizeof(struct acpi_nfit_memory_map) * 14; |
1091 | /* dcr-descriptor0: blk */ | 1096 | /* dcr-descriptor0: blk */ |
@@ -1384,6 +1389,7 @@ static void nfit_test0_setup(struct nfit_test *t) | |||
1384 | memdev->address = 0; | 1389 | memdev->address = 0; |
1385 | memdev->interleave_index = 0; | 1390 | memdev->interleave_index = 0; |
1386 | memdev->interleave_ways = 1; | 1391 | memdev->interleave_ways = 1; |
1392 | memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; | ||
1387 | 1393 | ||
1388 | /* mem-region16 (spa/bdw4, dimm4) */ | 1394 | /* mem-region16 (spa/bdw4, dimm4) */ |
1389 | memdev = nfit_buf + offset + | 1395 | memdev = nfit_buf + offset + |
@@ -1486,6 +1492,34 @@ static void nfit_test1_setup(struct nfit_test *t) | |||
1486 | dcr->code = NFIT_FIC_BYTE; | 1492 | dcr->code = NFIT_FIC_BYTE; |
1487 | dcr->windows = 0; | 1493 | dcr->windows = 0; |
1488 | 1494 | ||
1495 | offset += dcr->header.length; | ||
1496 | memdev = nfit_buf + offset; | ||
1497 | memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; | ||
1498 | memdev->header.length = sizeof(*memdev); | ||
1499 | memdev->device_handle = handle[6]; | ||
1500 | memdev->physical_id = 0; | ||
1501 | memdev->region_id = 0; | ||
1502 | memdev->range_index = 0; | ||
1503 | memdev->region_index = 0+2; | ||
1504 | memdev->region_size = SPA2_SIZE; | ||
1505 | memdev->region_offset = 0; | ||
1506 | memdev->address = 0; | ||
1507 | memdev->interleave_index = 0; | ||
1508 | memdev->interleave_ways = 1; | ||
1509 | memdev->flags = ACPI_NFIT_MEM_MAP_FAILED; | ||
1510 | |||
1511 | /* dcr-descriptor1 */ | ||
1512 | offset += sizeof(*memdev); | ||
1513 | dcr = nfit_buf + offset; | ||
1514 | dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; | ||
1515 | dcr->header.length = offsetof(struct acpi_nfit_control_region, | ||
1516 | window_size); | ||
1517 | dcr->region_index = 0+2; | ||
1518 | dcr_common_init(dcr); | ||
1519 | dcr->serial_number = ~handle[6]; | ||
1520 | dcr->code = NFIT_FIC_BYTE; | ||
1521 | dcr->windows = 0; | ||
1522 | |||
1489 | post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE); | 1523 | post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE); |
1490 | 1524 | ||
1491 | acpi_desc = &t->acpi_desc; | 1525 | acpi_desc = &t->acpi_desc; |
@@ -1817,6 +1851,10 @@ static int nfit_test_probe(struct platform_device *pdev) | |||
1817 | if (rc) | 1851 | if (rc) |
1818 | return rc; | 1852 | return rc; |
1819 | 1853 | ||
1854 | rc = devm_add_action_or_reset(&pdev->dev, acpi_nfit_shutdown, acpi_desc); | ||
1855 | if (rc) | ||
1856 | return rc; | ||
1857 | |||
1820 | if (nfit_test->setup != nfit_test0_setup) | 1858 | if (nfit_test->setup != nfit_test0_setup) |
1821 | return 0; | 1859 | return 0; |
1822 | 1860 | ||
@@ -1907,7 +1945,7 @@ static __init int nfit_test_init(void) | |||
1907 | case 1: | 1945 | case 1: |
1908 | nfit_test->num_pm = 1; | 1946 | nfit_test->num_pm = 1; |
1909 | nfit_test->dcr_idx = NUM_DCR; | 1947 | nfit_test->dcr_idx = NUM_DCR; |
1910 | nfit_test->num_dcr = 1; | 1948 | nfit_test->num_dcr = 2; |
1911 | nfit_test->alloc = nfit_test1_alloc; | 1949 | nfit_test->alloc = nfit_test1_alloc; |
1912 | nfit_test->setup = nfit_test1_setup; | 1950 | nfit_test->setup = nfit_test1_setup; |
1913 | break; | 1951 | break; |
@@ -1924,6 +1962,7 @@ static __init int nfit_test_init(void) | |||
1924 | put_device(&pdev->dev); | 1962 | put_device(&pdev->dev); |
1925 | goto err_register; | 1963 | goto err_register; |
1926 | } | 1964 | } |
1965 | get_device(&pdev->dev); | ||
1927 | 1966 | ||
1928 | rc = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); | 1967 | rc = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); |
1929 | if (rc) | 1968 | if (rc) |
@@ -1942,6 +1981,10 @@ static __init int nfit_test_init(void) | |||
1942 | if (instances[i]) | 1981 | if (instances[i]) |
1943 | platform_device_unregister(&instances[i]->pdev); | 1982 | platform_device_unregister(&instances[i]->pdev); |
1944 | nfit_test_teardown(); | 1983 | nfit_test_teardown(); |
1984 | for (i = 0; i < NUM_NFITS; i++) | ||
1985 | if (instances[i]) | ||
1986 | put_device(&instances[i]->pdev.dev); | ||
1987 | |||
1945 | return rc; | 1988 | return rc; |
1946 | } | 1989 | } |
1947 | 1990 | ||
@@ -1949,10 +1992,13 @@ static __exit void nfit_test_exit(void) | |||
1949 | { | 1992 | { |
1950 | int i; | 1993 | int i; |
1951 | 1994 | ||
1952 | platform_driver_unregister(&nfit_test_driver); | ||
1953 | for (i = 0; i < NUM_NFITS; i++) | 1995 | for (i = 0; i < NUM_NFITS; i++) |
1954 | platform_device_unregister(&instances[i]->pdev); | 1996 | platform_device_unregister(&instances[i]->pdev); |
1997 | platform_driver_unregister(&nfit_test_driver); | ||
1955 | nfit_test_teardown(); | 1998 | nfit_test_teardown(); |
1999 | |||
2000 | for (i = 0; i < NUM_NFITS; i++) | ||
2001 | put_device(&instances[i]->pdev.dev); | ||
1956 | class_destroy(nfit_test_dimm); | 2002 | class_destroy(nfit_test_dimm); |
1957 | } | 2003 | } |
1958 | 2004 | ||