summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-28 20:22:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-28 20:38:16 -0400
commitf0c98ebc57c2d5e535bc4f9167f35650d2ba3c90 (patch)
treead584aa321c0a2dbdaa49e0754f6c9f233b79a48
parentd94ba9e7d8d5c821d0442f13b30b0140c1109c38 (diff)
parent0606263f24f3d64960de742c55894190b5df903b (diff)
Merge tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams: - Replace pcommit with ADR / directed-flushing. The pcommit instruction, which has not shipped on any product, is deprecated. Instead, the requirement is that platforms implement either ADR, or provide one or more flush addresses per nvdimm. ADR (Asynchronous DRAM Refresh) flushes data in posted write buffers to the memory controller on a power-fail event. Flush addresses are defined in ACPI 6.x as an NVDIMM Firmware Interface Table (NFIT) sub-structure: "Flush Hint Address Structure". A flush hint is an mmio address that when written and fenced assures that all previous posted writes targeting a given dimm have been flushed to media. - On-demand ARS (address range scrub). Linux uses the results of the ACPI ARS commands to track bad blocks in pmem devices. When latent errors are detected we re-scrub the media to refresh the bad block list, userspace can also request a re-scrub at any time. - Support for the Microsoft DSM (device specific method) command format. - Support for EDK2/OVMF virtual disk device memory ranges. - Various fixes and cleanups across the subsystem. * tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (41 commits) libnvdimm-btt: Delete an unnecessary check before the function call "__nd_device_register" nfit: do an ARS scrub on hitting a latent media error nfit: move to nfit/ sub-directory nfit, libnvdimm: allow an ARS scrub to be triggered on demand libnvdimm: register nvdimm_bus devices with an nd_bus driver pmem: clarify a debug print in pmem_clear_poison x86/insn: remove pcommit Revert "KVM: x86: add pcommit support" nfit, tools/testing/nvdimm/: unify shutdown paths libnvdimm: move ->module to struct nvdimm_bus_descriptor nfit: cleanup acpi_nfit_init calling convention nfit: fix _FIT evaluation memory leak + use after free tools/testing/nvdimm: add manufacturing_{date|location} dimm properties tools/testing/nvdimm: add virtual ramdisk range acpi, nfit: treat virtual ramdisk SPA as pmem region pmem: kill __pmem address space pmem: kill wmb_pmem() libnvdimm, pmem: use nvdimm_flush() for namespace I/O writes fs/dax: remove wmb_pmem() libnvdimm, pmem: flush posted-write queues on shutdown ...
-rw-r--r--Documentation/filesystems/Locking2
-rw-r--r--Documentation/nvdimm/btt.txt28
-rw-r--r--arch/powerpc/sysdev/axonram.c4
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/pmem.h77
-rw-r--r--arch/x86/include/asm/special_insns.h46
-rw-r--r--arch/x86/include/asm/vmx.h1
-rw-r--r--arch/x86/include/uapi/asm/vmx.h4
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/vmx.c32
-rw-r--r--arch/x86/lib/x86-opcode-map.txt2
-rw-r--r--drivers/acpi/Kconfig27
-rw-r--r--drivers/acpi/Makefile2
-rw-r--r--drivers/acpi/nfit/Kconfig26
-rw-r--r--drivers/acpi/nfit/Makefile3
-rw-r--r--drivers/acpi/nfit/core.c (renamed from drivers/acpi/nfit.c)647
-rw-r--r--drivers/acpi/nfit/mce.c89
-rw-r--r--drivers/acpi/nfit/nfit.h (renamed from drivers/acpi/nfit.h)60
-rw-r--r--drivers/block/brd.c4
-rw-r--r--drivers/dax/dax.c6
-rw-r--r--drivers/dax/pmem.c14
-rw-r--r--drivers/md/dm-linear.c2
-rw-r--r--drivers/md/dm-snap.c2
-rw-r--r--drivers/md/dm-stripe.c2
-rw-r--r--drivers/md/dm-target.c2
-rw-r--r--drivers/md/dm.c2
-rw-r--r--drivers/nvdimm/Kconfig2
-rw-r--r--drivers/nvdimm/blk.c11
-rw-r--r--drivers/nvdimm/btt_devs.c3
-rw-r--r--drivers/nvdimm/bus.c212
-rw-r--r--drivers/nvdimm/claim.c7
-rw-r--r--drivers/nvdimm/core.c253
-rw-r--r--drivers/nvdimm/dimm_devs.c5
-rw-r--r--drivers/nvdimm/e820.c1
-rw-r--r--drivers/nvdimm/nd-core.h5
-rw-r--r--drivers/nvdimm/nd.h10
-rw-r--r--drivers/nvdimm/pmem.c85
-rw-r--r--drivers/nvdimm/pmem.h24
-rw-r--r--drivers/nvdimm/region.c19
-rw-r--r--drivers/nvdimm/region_devs.c154
-rw-r--r--drivers/s390/block/dcssblk.c6
-rw-r--r--fs/dax.c13
-rw-r--r--include/linux/blkdev.h6
-rw-r--r--include/linux/compiler.h2
-rw-r--r--include/linux/device-mapper.h2
-rw-r--r--include/linux/libnvdimm.h24
-rw-r--r--include/linux/nd.h3
-rw-r--r--include/linux/pfn_t.h5
-rw-r--r--include/linux/pmem.h117
-rw-r--r--include/uapi/linux/ndctl.h1
-rw-r--r--kernel/memremap.c6
-rwxr-xr-xscripts/checkpatch.pl1
-rw-r--r--tools/objtool/arch/x86/insn/x86-opcode-map.txt2
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-32.c2
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-64.c2
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-src.c4
-rw-r--r--tools/perf/util/intel-pt-decoder/x86-opcode-map.txt2
-rw-r--r--tools/testing/nvdimm/Kbuild10
-rw-r--r--tools/testing/nvdimm/config_check.c1
-rw-r--r--tools/testing/nvdimm/pmem-dax.c54
-rw-r--r--tools/testing/nvdimm/test/Kbuild2
-rw-r--r--tools/testing/nvdimm/test/iomap.c38
-rw-r--r--tools/testing/nvdimm/test/nfit.c199
-rw-r--r--tools/testing/nvdimm/test/nfit_test.h2
65 files changed, 1375 insertions, 1015 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index ef46d3ac5774..1b3c39a7de62 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -395,7 +395,7 @@ prototypes:
395 int (*release) (struct gendisk *, fmode_t); 395 int (*release) (struct gendisk *, fmode_t);
396 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 396 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
397 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 397 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
398 int (*direct_access) (struct block_device *, sector_t, void __pmem **, 398 int (*direct_access) (struct block_device *, sector_t, void **,
399 unsigned long *); 399 unsigned long *);
400 int (*media_changed) (struct gendisk *); 400 int (*media_changed) (struct gendisk *);
401 void (*unlock_native_capacity) (struct gendisk *); 401 void (*unlock_native_capacity) (struct gendisk *);
diff --git a/Documentation/nvdimm/btt.txt b/Documentation/nvdimm/btt.txt
index b91443f577dc..e293fb664924 100644
--- a/Documentation/nvdimm/btt.txt
+++ b/Documentation/nvdimm/btt.txt
@@ -256,28 +256,18 @@ If any of these error conditions are encountered, the arena is put into a read
256only state using a flag in the info block. 256only state using a flag in the info block.
257 257
258 258
2595. In-kernel usage 2595. Usage
260================== 260========
261 261
262Any block driver that supports byte granularity IO to the storage may register 262The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem
263with the BTT. It will have to provide the rw_bytes interface in its 263(pmem, or blk mode). The easiest way to set up such a namespace is using the
264block_device_operations struct: 264'ndctl' utility [1]:
265 265
266 int (*rw_bytes)(struct gendisk *, void *, size_t, off_t, int rw); 266For example, the ndctl command line to setup a btt with a 4k sector size is:
267 267
268It may register with the BTT after it adds its own gendisk, using btt_init: 268 ndctl create-namespace -f -e namespace0.0 -m sector -l 4k
269 269
270 struct btt *btt_init(struct gendisk *disk, unsigned long long rawsize, 270See ndctl create-namespace --help for more options.
271 u32 lbasize, u8 uuid[], int maxlane);
272 271
273note that maxlane is the maximum amount of concurrency the driver wishes to 272[1]: https://github.com/pmem/ndctl
274allow the BTT to use.
275
276The BTT 'disk' appears as a stacked block device that grabs the underlying block
277device in the O_EXCL mode.
278
279When the driver wishes to remove the backing disk, it should similarly call
280btt_fini using the same struct btt* handle that was provided to it by btt_init.
281
282 void btt_fini(struct btt *btt);
283 273
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index f9af6461521a..9144204442eb 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -143,12 +143,12 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
143 */ 143 */
144static long 144static long
145axon_ram_direct_access(struct block_device *device, sector_t sector, 145axon_ram_direct_access(struct block_device *device, sector_t sector,
146 void __pmem **kaddr, pfn_t *pfn, long size) 146 void **kaddr, pfn_t *pfn, long size)
147{ 147{
148 struct axon_ram_bank *bank = device->bd_disk->private_data; 148 struct axon_ram_bank *bank = device->bd_disk->private_data;
149 loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT; 149 loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
150 150
151 *kaddr = (void __pmem __force *) bank->io_addr + offset; 151 *kaddr = (void *) bank->io_addr + offset;
152 *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); 152 *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
153 return bank->size - offset; 153 return bank->size - offset;
154} 154}
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c64b1e9c5d1a..d683993248c8 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -225,7 +225,6 @@
225#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ 225#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
226#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ 226#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
227#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ 227#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
228#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
229#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ 228#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
230#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ 229#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
231#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ 230#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index fbc5e92e1ecc..643eba42d620 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -26,13 +26,11 @@
26 * @n: length of the copy in bytes 26 * @n: length of the copy in bytes
27 * 27 *
28 * Copy data to persistent memory media via non-temporal stores so that 28 * Copy data to persistent memory media via non-temporal stores so that
29 * a subsequent arch_wmb_pmem() can flush cpu and memory controller 29 * a subsequent pmem driver flush operation will drain posted write queues.
30 * write buffers to guarantee durability.
31 */ 30 */
32static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, 31static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
33 size_t n)
34{ 32{
35 int unwritten; 33 int rem;
36 34
37 /* 35 /*
38 * We are copying between two kernel buffers, if 36 * We are copying between two kernel buffers, if
@@ -40,59 +38,36 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
40 * fault) we would have already reported a general protection fault 38 * fault) we would have already reported a general protection fault
41 * before the WARN+BUG. 39 * before the WARN+BUG.
42 */ 40 */
43 unwritten = __copy_from_user_inatomic_nocache((void __force *) dst, 41 rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n);
44 (void __user *) src, n); 42 if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n",
45 if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n", 43 __func__, dst, src, rem))
46 __func__, dst, src, unwritten))
47 BUG(); 44 BUG();
48} 45}
49 46
50static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src, 47static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
51 size_t n)
52{ 48{
53 if (static_cpu_has(X86_FEATURE_MCE_RECOVERY)) 49 if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
54 return memcpy_mcsafe(dst, (void __force *) src, n); 50 return memcpy_mcsafe(dst, src, n);
55 memcpy(dst, (void __force *) src, n); 51 memcpy(dst, src, n);
56 return 0; 52 return 0;
57} 53}
58 54
59/** 55/**
60 * arch_wmb_pmem - synchronize writes to persistent memory
61 *
62 * After a series of arch_memcpy_to_pmem() operations this drains data
63 * from cpu write buffers and any platform (memory controller) buffers
64 * to ensure that written data is durable on persistent memory media.
65 */
66static inline void arch_wmb_pmem(void)
67{
68 /*
69 * wmb() to 'sfence' all previous writes such that they are
70 * architecturally visible to 'pcommit'. Note, that we've
71 * already arranged for pmem writes to avoid the cache via
72 * arch_memcpy_to_pmem().
73 */
74 wmb();
75 pcommit_sfence();
76}
77
78/**
79 * arch_wb_cache_pmem - write back a cache range with CLWB 56 * arch_wb_cache_pmem - write back a cache range with CLWB
80 * @vaddr: virtual start address 57 * @vaddr: virtual start address
81 * @size: number of bytes to write back 58 * @size: number of bytes to write back
82 * 59 *
83 * Write back a cache range using the CLWB (cache line write back) 60 * Write back a cache range using the CLWB (cache line write back)
84 * instruction. This function requires explicit ordering with an 61 * instruction.
85 * arch_wmb_pmem() call.
86 */ 62 */
87static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size) 63static inline void arch_wb_cache_pmem(void *addr, size_t size)
88{ 64{
89 u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; 65 u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
90 unsigned long clflush_mask = x86_clflush_size - 1; 66 unsigned long clflush_mask = x86_clflush_size - 1;
91 void *vaddr = (void __force *)addr; 67 void *vend = addr + size;
92 void *vend = vaddr + size;
93 void *p; 68 void *p;
94 69
95 for (p = (void *)((unsigned long)vaddr & ~clflush_mask); 70 for (p = (void *)((unsigned long)addr & ~clflush_mask);
96 p < vend; p += x86_clflush_size) 71 p < vend; p += x86_clflush_size)
97 clwb(p); 72 clwb(p);
98} 73}
@@ -113,16 +88,14 @@ static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
113 * @i: iterator with source data 88 * @i: iterator with source data
114 * 89 *
115 * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. 90 * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
116 * This function requires explicit ordering with an arch_wmb_pmem() call.
117 */ 91 */
118static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, 92static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
119 struct iov_iter *i) 93 struct iov_iter *i)
120{ 94{
121 void *vaddr = (void __force *)addr;
122 size_t len; 95 size_t len;
123 96
124 /* TODO: skip the write-back by always using non-temporal stores */ 97 /* TODO: skip the write-back by always using non-temporal stores */
125 len = copy_from_iter_nocache(vaddr, bytes, i); 98 len = copy_from_iter_nocache(addr, bytes, i);
126 99
127 if (__iter_needs_pmem_wb(i)) 100 if (__iter_needs_pmem_wb(i))
128 arch_wb_cache_pmem(addr, bytes); 101 arch_wb_cache_pmem(addr, bytes);
@@ -136,28 +109,16 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
136 * @size: number of bytes to zero 109 * @size: number of bytes to zero
137 * 110 *
138 * Write zeros into the memory range starting at 'addr' for 'size' bytes. 111 * Write zeros into the memory range starting at 'addr' for 'size' bytes.
139 * This function requires explicit ordering with an arch_wmb_pmem() call.
140 */ 112 */
141static inline void arch_clear_pmem(void __pmem *addr, size_t size) 113static inline void arch_clear_pmem(void *addr, size_t size)
142{ 114{
143 void *vaddr = (void __force *)addr; 115 memset(addr, 0, size);
144
145 memset(vaddr, 0, size);
146 arch_wb_cache_pmem(addr, size); 116 arch_wb_cache_pmem(addr, size);
147} 117}
148 118
149static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) 119static inline void arch_invalidate_pmem(void *addr, size_t size)
150{ 120{
151 clflush_cache_range((void __force *) addr, size); 121 clflush_cache_range(addr, size);
152}
153
154static inline bool __arch_has_wmb_pmem(void)
155{
156 /*
157 * We require that wmb() be an 'sfence', that is only guaranteed on
158 * 64-bit builds
159 */
160 return static_cpu_has(X86_FEATURE_PCOMMIT);
161} 122}
162#endif /* CONFIG_ARCH_HAS_PMEM_API */ 123#endif /* CONFIG_ARCH_HAS_PMEM_API */
163#endif /* __ASM_X86_PMEM_H__ */ 124#endif /* __ASM_X86_PMEM_H__ */
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index d96d04377765..587d7914ea4b 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -253,52 +253,6 @@ static inline void clwb(volatile void *__p)
253 : [pax] "a" (p)); 253 : [pax] "a" (p));
254} 254}
255 255
256/**
257 * pcommit_sfence() - persistent commit and fence
258 *
259 * The PCOMMIT instruction ensures that data that has been flushed from the
260 * processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to
261 * memory and is durable on the DIMM. The primary use case for this is
262 * persistent memory.
263 *
264 * This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT
265 * with appropriate fencing.
266 *
267 * Example:
268 * void flush_and_commit_buffer(void *vaddr, unsigned int size)
269 * {
270 * unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
271 * void *vend = vaddr + size;
272 * void *p;
273 *
274 * for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
275 * p < vend; p += boot_cpu_data.x86_clflush_size)
276 * clwb(p);
277 *
278 * // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes
279 * // MFENCE via mb() also works
280 * wmb();
281 *
282 * // PCOMMIT and the required SFENCE for ordering
283 * pcommit_sfence();
284 * }
285 *
286 * After this function completes the data pointed to by 'vaddr' has been
287 * accepted to memory and will be durable if the 'vaddr' points to persistent
288 * memory.
289 *
290 * PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify
291 * things we include both the PCOMMIT and the required SFENCE in the
292 * alternatives generated by pcommit_sfence().
293 */
294static inline void pcommit_sfence(void)
295{
296 alternative(ASM_NOP7,
297 ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
298 "sfence",
299 X86_FEATURE_PCOMMIT);
300}
301
302#define nop() asm volatile ("nop") 256#define nop() asm volatile ("nop")
303 257
304 258
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 14c63c7e8337..a002b07a7099 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -72,7 +72,6 @@
72#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 72#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
73#define SECONDARY_EXEC_ENABLE_PML 0x00020000 73#define SECONDARY_EXEC_ENABLE_PML 0x00020000
74#define SECONDARY_EXEC_XSAVES 0x00100000 74#define SECONDARY_EXEC_XSAVES 0x00100000
75#define SECONDARY_EXEC_PCOMMIT 0x00200000
76#define SECONDARY_EXEC_TSC_SCALING 0x02000000 75#define SECONDARY_EXEC_TSC_SCALING 0x02000000
77 76
78#define PIN_BASED_EXT_INTR_MASK 0x00000001 77#define PIN_BASED_EXT_INTR_MASK 0x00000001
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 5b15d94a33f8..37fee272618f 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -78,7 +78,6 @@
78#define EXIT_REASON_PML_FULL 62 78#define EXIT_REASON_PML_FULL 62
79#define EXIT_REASON_XSAVES 63 79#define EXIT_REASON_XSAVES 63
80#define EXIT_REASON_XRSTORS 64 80#define EXIT_REASON_XRSTORS 64
81#define EXIT_REASON_PCOMMIT 65
82 81
83#define VMX_EXIT_REASONS \ 82#define VMX_EXIT_REASONS \
84 { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ 83 { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -127,8 +126,7 @@
127 { EXIT_REASON_INVVPID, "INVVPID" }, \ 126 { EXIT_REASON_INVVPID, "INVVPID" }, \
128 { EXIT_REASON_INVPCID, "INVPCID" }, \ 127 { EXIT_REASON_INVPCID, "INVPCID" }, \
129 { EXIT_REASON_XSAVES, "XSAVES" }, \ 128 { EXIT_REASON_XSAVES, "XSAVES" }, \
130 { EXIT_REASON_XRSTORS, "XRSTORS" }, \ 129 { EXIT_REASON_XRSTORS, "XRSTORS" }
131 { EXIT_REASON_PCOMMIT, "PCOMMIT" }
132 130
133#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 131#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
134#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 132#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7597b42a8a88..643565364497 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -366,7 +366,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
366 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | 366 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
367 F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | 367 F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
368 F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) | 368 F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
369 F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT); 369 F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB);
370 370
371 /* cpuid 0xD.1.eax */ 371 /* cpuid 0xD.1.eax */
372 const u32 kvm_cpuid_D_1_eax_x86_features = 372 const u32 kvm_cpuid_D_1_eax_x86_features =
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index e17a74b1d852..35058c2c0eea 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -144,14 +144,6 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
144 return best && (best->ebx & bit(X86_FEATURE_RTM)); 144 return best && (best->ebx & bit(X86_FEATURE_RTM));
145} 145}
146 146
147static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu)
148{
149 struct kvm_cpuid_entry2 *best;
150
151 best = kvm_find_cpuid_entry(vcpu, 7, 0);
152 return best && (best->ebx & bit(X86_FEATURE_PCOMMIT));
153}
154
155static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) 147static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
156{ 148{
157 struct kvm_cpuid_entry2 *best; 149 struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7758680db20b..df07a0a4611f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2707,8 +2707,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
2707 SECONDARY_EXEC_APIC_REGISTER_VIRT | 2707 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2708 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 2708 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
2709 SECONDARY_EXEC_WBINVD_EXITING | 2709 SECONDARY_EXEC_WBINVD_EXITING |
2710 SECONDARY_EXEC_XSAVES | 2710 SECONDARY_EXEC_XSAVES;
2711 SECONDARY_EXEC_PCOMMIT;
2712 2711
2713 if (enable_ept) { 2712 if (enable_ept) {
2714 /* nested EPT: emulate EPT also to L1 */ 2713 /* nested EPT: emulate EPT also to L1 */
@@ -3270,7 +3269,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
3270 SECONDARY_EXEC_SHADOW_VMCS | 3269 SECONDARY_EXEC_SHADOW_VMCS |
3271 SECONDARY_EXEC_XSAVES | 3270 SECONDARY_EXEC_XSAVES |
3272 SECONDARY_EXEC_ENABLE_PML | 3271 SECONDARY_EXEC_ENABLE_PML |
3273 SECONDARY_EXEC_PCOMMIT |
3274 SECONDARY_EXEC_TSC_SCALING; 3272 SECONDARY_EXEC_TSC_SCALING;
3275 if (adjust_vmx_controls(min2, opt2, 3273 if (adjust_vmx_controls(min2, opt2,
3276 MSR_IA32_VMX_PROCBASED_CTLS2, 3274 MSR_IA32_VMX_PROCBASED_CTLS2,
@@ -4858,9 +4856,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
4858 if (!enable_pml) 4856 if (!enable_pml)
4859 exec_control &= ~SECONDARY_EXEC_ENABLE_PML; 4857 exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
4860 4858
4861 /* Currently, we allow L1 guest to directly run pcommit instruction. */
4862 exec_control &= ~SECONDARY_EXEC_PCOMMIT;
4863
4864 return exec_control; 4859 return exec_control;
4865} 4860}
4866 4861
@@ -4904,9 +4899,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4904 4899
4905 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); 4900 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
4906 4901
4907 if (cpu_has_secondary_exec_ctrls()) 4902 if (cpu_has_secondary_exec_ctrls()) {
4908 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, 4903 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
4909 vmx_secondary_exec_control(vmx)); 4904 vmx_secondary_exec_control(vmx));
4905 }
4910 4906
4911 if (kvm_vcpu_apicv_active(&vmx->vcpu)) { 4907 if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
4912 vmcs_write64(EOI_EXIT_BITMAP0, 0); 4908 vmcs_write64(EOI_EXIT_BITMAP0, 0);
@@ -7564,13 +7560,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
7564 return 1; 7560 return 1;
7565} 7561}
7566 7562
7567static int handle_pcommit(struct kvm_vcpu *vcpu)
7568{
7569 /* we never catch pcommit instruct for L1 guest. */
7570 WARN_ON(1);
7571 return 1;
7572}
7573
7574/* 7563/*
7575 * The exit handlers return 1 if the exit was handled fully and guest execution 7564 * The exit handlers return 1 if the exit was handled fully and guest execution
7576 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 7565 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -7621,7 +7610,6 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
7621 [EXIT_REASON_XSAVES] = handle_xsaves, 7610 [EXIT_REASON_XSAVES] = handle_xsaves,
7622 [EXIT_REASON_XRSTORS] = handle_xrstors, 7611 [EXIT_REASON_XRSTORS] = handle_xrstors,
7623 [EXIT_REASON_PML_FULL] = handle_pml_full, 7612 [EXIT_REASON_PML_FULL] = handle_pml_full,
7624 [EXIT_REASON_PCOMMIT] = handle_pcommit,
7625}; 7613};
7626 7614
7627static const int kvm_vmx_max_exit_handlers = 7615static const int kvm_vmx_max_exit_handlers =
@@ -7930,8 +7918,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
7930 * the XSS exit bitmap in vmcs12. 7918 * the XSS exit bitmap in vmcs12.
7931 */ 7919 */
7932 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); 7920 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
7933 case EXIT_REASON_PCOMMIT:
7934 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT);
7935 default: 7921 default:
7936 return true; 7922 return true;
7937 } 7923 }
@@ -9094,15 +9080,6 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
9094 9080
9095 if (cpu_has_secondary_exec_ctrls()) 9081 if (cpu_has_secondary_exec_ctrls())
9096 vmcs_set_secondary_exec_control(secondary_exec_ctl); 9082 vmcs_set_secondary_exec_control(secondary_exec_ctl);
9097
9098 if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
9099 if (guest_cpuid_has_pcommit(vcpu))
9100 vmx->nested.nested_vmx_secondary_ctls_high |=
9101 SECONDARY_EXEC_PCOMMIT;
9102 else
9103 vmx->nested.nested_vmx_secondary_ctls_high &=
9104 ~SECONDARY_EXEC_PCOMMIT;
9105 }
9106} 9083}
9107 9084
9108static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) 9085static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -9715,8 +9692,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
9715 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 9692 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
9716 SECONDARY_EXEC_RDTSCP | 9693 SECONDARY_EXEC_RDTSCP |
9717 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 9694 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
9718 SECONDARY_EXEC_APIC_REGISTER_VIRT | 9695 SECONDARY_EXEC_APIC_REGISTER_VIRT);
9719 SECONDARY_EXEC_PCOMMIT);
9720 if (nested_cpu_has(vmcs12, 9696 if (nested_cpu_has(vmcs12,
9721 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) 9697 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
9722 exec_control |= vmcs12->secondary_vm_exec_control; 9698 exec_control |= vmcs12->secondary_vm_exec_control;
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index ec378cd7b71e..767be7c76034 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -1012,7 +1012,7 @@ GrpTable: Grp15
10124: XSAVE 10124: XSAVE
10135: XRSTOR | lfence (11B) 10135: XRSTOR | lfence (11B)
10146: XSAVEOPT | clwb (66) | mfence (11B) 10146: XSAVEOPT | clwb (66) | mfence (11B)
10157: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) 10157: clflush | clflushopt (66) | sfence (11B)
1016EndTable 1016EndTable
1017 1017
1018GrpTable: Grp16 1018GrpTable: Grp16
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index acad70a0bb0d..aebd944bdaa1 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -454,32 +454,7 @@ config ACPI_REDUCED_HARDWARE_ONLY
454 454
455 If you are unsure what to do, do not enable this option. 455 If you are unsure what to do, do not enable this option.
456 456
457config ACPI_NFIT 457source "drivers/acpi/nfit/Kconfig"
458 tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
459 depends on PHYS_ADDR_T_64BIT
460 depends on BLK_DEV
461 depends on ARCH_HAS_MMIO_FLUSH
462 select LIBNVDIMM
463 help
464 Infrastructure to probe ACPI 6 compliant platforms for
465 NVDIMMs (NFIT) and register a libnvdimm device tree. In
466 addition to storage devices this also enables libnvdimm to pass
467 ACPI._DSM messages for platform/dimm configuration.
468
469 To compile this driver as a module, choose M here:
470 the module will be called nfit.
471
472config ACPI_NFIT_DEBUG
473 bool "NFIT DSM debug"
474 depends on ACPI_NFIT
475 depends on DYNAMIC_DEBUG
476 default n
477 help
478 Enabling this option causes the nfit driver to dump the
479 input and output buffers of _DSM operations on the ACPI0012
480 device and its children. This can be very verbose, so leave
481 it disabled unless you are debugging a hardware / firmware
482 issue.
483 458
484source "drivers/acpi/apei/Kconfig" 459source "drivers/acpi/apei/Kconfig"
485source "drivers/acpi/dptf/Kconfig" 460source "drivers/acpi/dptf/Kconfig"
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 88f54f03e3d2..35a6ccbe3025 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -69,7 +69,7 @@ obj-$(CONFIG_ACPI_PCI_SLOT) += pci_slot.o
69obj-$(CONFIG_ACPI_PROCESSOR) += processor.o 69obj-$(CONFIG_ACPI_PROCESSOR) += processor.o
70obj-$(CONFIG_ACPI) += container.o 70obj-$(CONFIG_ACPI) += container.o
71obj-$(CONFIG_ACPI_THERMAL) += thermal.o 71obj-$(CONFIG_ACPI_THERMAL) += thermal.o
72obj-$(CONFIG_ACPI_NFIT) += nfit.o 72obj-$(CONFIG_ACPI_NFIT) += nfit/
73obj-$(CONFIG_ACPI) += acpi_memhotplug.o 73obj-$(CONFIG_ACPI) += acpi_memhotplug.o
74obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o 74obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o
75obj-$(CONFIG_ACPI_BATTERY) += battery.o 75obj-$(CONFIG_ACPI_BATTERY) += battery.o
diff --git a/drivers/acpi/nfit/Kconfig b/drivers/acpi/nfit/Kconfig
new file mode 100644
index 000000000000..dd0d53c52552
--- /dev/null
+++ b/drivers/acpi/nfit/Kconfig
@@ -0,0 +1,26 @@
1config ACPI_NFIT
2 tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
3 depends on PHYS_ADDR_T_64BIT
4 depends on BLK_DEV
5 depends on ARCH_HAS_MMIO_FLUSH
6 select LIBNVDIMM
7 help
8 Infrastructure to probe ACPI 6 compliant platforms for
9 NVDIMMs (NFIT) and register a libnvdimm device tree. In
10 addition to storage devices this also enables libnvdimm to pass
11 ACPI._DSM messages for platform/dimm configuration.
12
13 To compile this driver as a module, choose M here:
14 the module will be called nfit.
15
16config ACPI_NFIT_DEBUG
17 bool "NFIT DSM debug"
18 depends on ACPI_NFIT
19 depends on DYNAMIC_DEBUG
20 default n
21 help
22 Enabling this option causes the nfit driver to dump the
23 input and output buffers of _DSM operations on the ACPI0012
24 device and its children. This can be very verbose, so leave
25 it disabled unless you are debugging a hardware / firmware
26 issue.
diff --git a/drivers/acpi/nfit/Makefile b/drivers/acpi/nfit/Makefile
new file mode 100644
index 000000000000..a407e769f103
--- /dev/null
+++ b/drivers/acpi/nfit/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_ACPI_NFIT) := nfit.o
2nfit-y := core.o
3nfit-$(CONFIG_X86_MCE) += mce.o
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit/core.c
index 1f0e06065ae6..8c234dd9b8bc 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit/core.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/ndctl.h> 17#include <linux/ndctl.h>
18#include <linux/sysfs.h>
18#include <linux/delay.h> 19#include <linux/delay.h>
19#include <linux/list.h> 20#include <linux/list.h>
20#include <linux/acpi.h> 21#include <linux/acpi.h>
@@ -50,6 +51,9 @@ module_param(disable_vendor_specific, bool, S_IRUGO);
50MODULE_PARM_DESC(disable_vendor_specific, 51MODULE_PARM_DESC(disable_vendor_specific,
51 "Limit commands to the publicly specified set\n"); 52 "Limit commands to the publicly specified set\n");
52 53
54LIST_HEAD(acpi_descs);
55DEFINE_MUTEX(acpi_desc_lock);
56
53static struct workqueue_struct *nfit_wq; 57static struct workqueue_struct *nfit_wq;
54 58
55struct nfit_table_prev { 59struct nfit_table_prev {
@@ -360,7 +364,7 @@ static const char *spa_type_name(u16 type)
360 return to_name[type]; 364 return to_name[type];
361} 365}
362 366
363static int nfit_spa_type(struct acpi_nfit_system_address *spa) 367int nfit_spa_type(struct acpi_nfit_system_address *spa)
364{ 368{
365 int i; 369 int i;
366 370
@@ -374,22 +378,25 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc,
374 struct nfit_table_prev *prev, 378 struct nfit_table_prev *prev,
375 struct acpi_nfit_system_address *spa) 379 struct acpi_nfit_system_address *spa)
376{ 380{
377 size_t length = min_t(size_t, sizeof(*spa), spa->header.length);
378 struct device *dev = acpi_desc->dev; 381 struct device *dev = acpi_desc->dev;
379 struct nfit_spa *nfit_spa; 382 struct nfit_spa *nfit_spa;
380 383
384 if (spa->header.length != sizeof(*spa))
385 return false;
386
381 list_for_each_entry(nfit_spa, &prev->spas, list) { 387 list_for_each_entry(nfit_spa, &prev->spas, list) {
382 if (memcmp(nfit_spa->spa, spa, length) == 0) { 388 if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) {
383 list_move_tail(&nfit_spa->list, &acpi_desc->spas); 389 list_move_tail(&nfit_spa->list, &acpi_desc->spas);
384 return true; 390 return true;
385 } 391 }
386 } 392 }
387 393
388 nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa), GFP_KERNEL); 394 nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof(*spa),
395 GFP_KERNEL);
389 if (!nfit_spa) 396 if (!nfit_spa)
390 return false; 397 return false;
391 INIT_LIST_HEAD(&nfit_spa->list); 398 INIT_LIST_HEAD(&nfit_spa->list);
392 nfit_spa->spa = spa; 399 memcpy(nfit_spa->spa, spa, sizeof(*spa));
393 list_add_tail(&nfit_spa->list, &acpi_desc->spas); 400 list_add_tail(&nfit_spa->list, &acpi_desc->spas);
394 dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__, 401 dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
395 spa->range_index, 402 spa->range_index,
@@ -401,21 +408,24 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
401 struct nfit_table_prev *prev, 408 struct nfit_table_prev *prev,
402 struct acpi_nfit_memory_map *memdev) 409 struct acpi_nfit_memory_map *memdev)
403{ 410{
404 size_t length = min_t(size_t, sizeof(*memdev), memdev->header.length);
405 struct device *dev = acpi_desc->dev; 411 struct device *dev = acpi_desc->dev;
406 struct nfit_memdev *nfit_memdev; 412 struct nfit_memdev *nfit_memdev;
407 413
414 if (memdev->header.length != sizeof(*memdev))
415 return false;
416
408 list_for_each_entry(nfit_memdev, &prev->memdevs, list) 417 list_for_each_entry(nfit_memdev, &prev->memdevs, list)
409 if (memcmp(nfit_memdev->memdev, memdev, length) == 0) { 418 if (memcmp(nfit_memdev->memdev, memdev, sizeof(*memdev)) == 0) {
410 list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs); 419 list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs);
411 return true; 420 return true;
412 } 421 }
413 422
414 nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev), GFP_KERNEL); 423 nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev) + sizeof(*memdev),
424 GFP_KERNEL);
415 if (!nfit_memdev) 425 if (!nfit_memdev)
416 return false; 426 return false;
417 INIT_LIST_HEAD(&nfit_memdev->list); 427 INIT_LIST_HEAD(&nfit_memdev->list);
418 nfit_memdev->memdev = memdev; 428 memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev));
419 list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs); 429 list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
420 dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n", 430 dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n",
421 __func__, memdev->device_handle, memdev->range_index, 431 __func__, memdev->device_handle, memdev->range_index,
@@ -423,25 +433,42 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
423 return true; 433 return true;
424} 434}
425 435
436/*
437 * An implementation may provide a truncated control region if no block windows
438 * are defined.
439 */
440static size_t sizeof_dcr(struct acpi_nfit_control_region *dcr)
441{
442 if (dcr->header.length < offsetof(struct acpi_nfit_control_region,
443 window_size))
444 return 0;
445 if (dcr->windows)
446 return sizeof(*dcr);
447 return offsetof(struct acpi_nfit_control_region, window_size);
448}
449
426static bool add_dcr(struct acpi_nfit_desc *acpi_desc, 450static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
427 struct nfit_table_prev *prev, 451 struct nfit_table_prev *prev,
428 struct acpi_nfit_control_region *dcr) 452 struct acpi_nfit_control_region *dcr)
429{ 453{
430 size_t length = min_t(size_t, sizeof(*dcr), dcr->header.length);
431 struct device *dev = acpi_desc->dev; 454 struct device *dev = acpi_desc->dev;
432 struct nfit_dcr *nfit_dcr; 455 struct nfit_dcr *nfit_dcr;
433 456
457 if (!sizeof_dcr(dcr))
458 return false;
459
434 list_for_each_entry(nfit_dcr, &prev->dcrs, list) 460 list_for_each_entry(nfit_dcr, &prev->dcrs, list)
435 if (memcmp(nfit_dcr->dcr, dcr, length) == 0) { 461 if (memcmp(nfit_dcr->dcr, dcr, sizeof_dcr(dcr)) == 0) {
436 list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs); 462 list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs);
437 return true; 463 return true;
438 } 464 }
439 465
440 nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr), GFP_KERNEL); 466 nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr) + sizeof(*dcr),
467 GFP_KERNEL);
441 if (!nfit_dcr) 468 if (!nfit_dcr)
442 return false; 469 return false;
443 INIT_LIST_HEAD(&nfit_dcr->list); 470 INIT_LIST_HEAD(&nfit_dcr->list);
444 nfit_dcr->dcr = dcr; 471 memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr));
445 list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs); 472 list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
446 dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__, 473 dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
447 dcr->region_index, dcr->windows); 474 dcr->region_index, dcr->windows);
@@ -452,71 +479,102 @@ static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
452 struct nfit_table_prev *prev, 479 struct nfit_table_prev *prev,
453 struct acpi_nfit_data_region *bdw) 480 struct acpi_nfit_data_region *bdw)
454{ 481{
455 size_t length = min_t(size_t, sizeof(*bdw), bdw->header.length);
456 struct device *dev = acpi_desc->dev; 482 struct device *dev = acpi_desc->dev;
457 struct nfit_bdw *nfit_bdw; 483 struct nfit_bdw *nfit_bdw;
458 484
485 if (bdw->header.length != sizeof(*bdw))
486 return false;
459 list_for_each_entry(nfit_bdw, &prev->bdws, list) 487 list_for_each_entry(nfit_bdw, &prev->bdws, list)
460 if (memcmp(nfit_bdw->bdw, bdw, length) == 0) { 488 if (memcmp(nfit_bdw->bdw, bdw, sizeof(*bdw)) == 0) {
461 list_move_tail(&nfit_bdw->list, &acpi_desc->bdws); 489 list_move_tail(&nfit_bdw->list, &acpi_desc->bdws);
462 return true; 490 return true;
463 } 491 }
464 492
465 nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw), GFP_KERNEL); 493 nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw) + sizeof(*bdw),
494 GFP_KERNEL);
466 if (!nfit_bdw) 495 if (!nfit_bdw)
467 return false; 496 return false;
468 INIT_LIST_HEAD(&nfit_bdw->list); 497 INIT_LIST_HEAD(&nfit_bdw->list);
469 nfit_bdw->bdw = bdw; 498 memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw));
470 list_add_tail(&nfit_bdw->list, &acpi_desc->bdws); 499 list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
471 dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__, 500 dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
472 bdw->region_index, bdw->windows); 501 bdw->region_index, bdw->windows);
473 return true; 502 return true;
474} 503}
475 504
505static size_t sizeof_idt(struct acpi_nfit_interleave *idt)
506{
507 if (idt->header.length < sizeof(*idt))
508 return 0;
509 return sizeof(*idt) + sizeof(u32) * (idt->line_count - 1);
510}
511
476static bool add_idt(struct acpi_nfit_desc *acpi_desc, 512static bool add_idt(struct acpi_nfit_desc *acpi_desc,
477 struct nfit_table_prev *prev, 513 struct nfit_table_prev *prev,
478 struct acpi_nfit_interleave *idt) 514 struct acpi_nfit_interleave *idt)
479{ 515{
480 size_t length = min_t(size_t, sizeof(*idt), idt->header.length);
481 struct device *dev = acpi_desc->dev; 516 struct device *dev = acpi_desc->dev;
482 struct nfit_idt *nfit_idt; 517 struct nfit_idt *nfit_idt;
483 518
484 list_for_each_entry(nfit_idt, &prev->idts, list) 519 if (!sizeof_idt(idt))
485 if (memcmp(nfit_idt->idt, idt, length) == 0) { 520 return false;
521
522 list_for_each_entry(nfit_idt, &prev->idts, list) {
523 if (sizeof_idt(nfit_idt->idt) != sizeof_idt(idt))
524 continue;
525
526 if (memcmp(nfit_idt->idt, idt, sizeof_idt(idt)) == 0) {
486 list_move_tail(&nfit_idt->list, &acpi_desc->idts); 527 list_move_tail(&nfit_idt->list, &acpi_desc->idts);
487 return true; 528 return true;
488 } 529 }
530 }
489 531
490 nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt), GFP_KERNEL); 532 nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt) + sizeof_idt(idt),
533 GFP_KERNEL);
491 if (!nfit_idt) 534 if (!nfit_idt)
492 return false; 535 return false;
493 INIT_LIST_HEAD(&nfit_idt->list); 536 INIT_LIST_HEAD(&nfit_idt->list);
494 nfit_idt->idt = idt; 537 memcpy(nfit_idt->idt, idt, sizeof_idt(idt));
495 list_add_tail(&nfit_idt->list, &acpi_desc->idts); 538 list_add_tail(&nfit_idt->list, &acpi_desc->idts);
496 dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__, 539 dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
497 idt->interleave_index, idt->line_count); 540 idt->interleave_index, idt->line_count);
498 return true; 541 return true;
499} 542}
500 543
544static size_t sizeof_flush(struct acpi_nfit_flush_address *flush)
545{
546 if (flush->header.length < sizeof(*flush))
547 return 0;
548 return sizeof(*flush) + sizeof(u64) * (flush->hint_count - 1);
549}
550
501static bool add_flush(struct acpi_nfit_desc *acpi_desc, 551static bool add_flush(struct acpi_nfit_desc *acpi_desc,
502 struct nfit_table_prev *prev, 552 struct nfit_table_prev *prev,
503 struct acpi_nfit_flush_address *flush) 553 struct acpi_nfit_flush_address *flush)
504{ 554{
505 size_t length = min_t(size_t, sizeof(*flush), flush->header.length);
506 struct device *dev = acpi_desc->dev; 555 struct device *dev = acpi_desc->dev;
507 struct nfit_flush *nfit_flush; 556 struct nfit_flush *nfit_flush;
508 557
509 list_for_each_entry(nfit_flush, &prev->flushes, list) 558 if (!sizeof_flush(flush))
510 if (memcmp(nfit_flush->flush, flush, length) == 0) { 559 return false;
560
561 list_for_each_entry(nfit_flush, &prev->flushes, list) {
562 if (sizeof_flush(nfit_flush->flush) != sizeof_flush(flush))
563 continue;
564
565 if (memcmp(nfit_flush->flush, flush,
566 sizeof_flush(flush)) == 0) {
511 list_move_tail(&nfit_flush->list, &acpi_desc->flushes); 567 list_move_tail(&nfit_flush->list, &acpi_desc->flushes);
512 return true; 568 return true;
513 } 569 }
570 }
514 571
515 nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush), GFP_KERNEL); 572 nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush)
573 + sizeof_flush(flush), GFP_KERNEL);
516 if (!nfit_flush) 574 if (!nfit_flush)
517 return false; 575 return false;
518 INIT_LIST_HEAD(&nfit_flush->list); 576 INIT_LIST_HEAD(&nfit_flush->list);
519 nfit_flush->flush = flush; 577 memcpy(nfit_flush->flush, flush, sizeof_flush(flush));
520 list_add_tail(&nfit_flush->list, &acpi_desc->flushes); 578 list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
521 dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__, 579 dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__,
522 flush->device_handle, flush->hint_count); 580 flush->device_handle, flush->hint_count);
@@ -614,7 +672,6 @@ static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
614{ 672{
615 u16 dcr = __to_nfit_memdev(nfit_mem)->region_index; 673 u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
616 struct nfit_memdev *nfit_memdev; 674 struct nfit_memdev *nfit_memdev;
617 struct nfit_flush *nfit_flush;
618 struct nfit_bdw *nfit_bdw; 675 struct nfit_bdw *nfit_bdw;
619 struct nfit_idt *nfit_idt; 676 struct nfit_idt *nfit_idt;
620 u16 idt_idx, range_index; 677 u16 idt_idx, range_index;
@@ -647,14 +704,6 @@ static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
647 nfit_mem->idt_bdw = nfit_idt->idt; 704 nfit_mem->idt_bdw = nfit_idt->idt;
648 break; 705 break;
649 } 706 }
650
651 list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) {
652 if (nfit_flush->flush->device_handle !=
653 nfit_memdev->memdev->device_handle)
654 continue;
655 nfit_mem->nfit_flush = nfit_flush;
656 break;
657 }
658 break; 707 break;
659 } 708 }
660} 709}
@@ -675,6 +724,7 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
675 } 724 }
676 725
677 list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { 726 list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
727 struct nfit_flush *nfit_flush;
678 struct nfit_dcr *nfit_dcr; 728 struct nfit_dcr *nfit_dcr;
679 u32 device_handle; 729 u32 device_handle;
680 u16 dcr; 730 u16 dcr;
@@ -721,6 +771,28 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
721 break; 771 break;
722 } 772 }
723 773
774 list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) {
775 struct acpi_nfit_flush_address *flush;
776 u16 i;
777
778 if (nfit_flush->flush->device_handle != device_handle)
779 continue;
780 nfit_mem->nfit_flush = nfit_flush;
781 flush = nfit_flush->flush;
782 nfit_mem->flush_wpq = devm_kzalloc(acpi_desc->dev,
783 flush->hint_count
784 * sizeof(struct resource), GFP_KERNEL);
785 if (!nfit_mem->flush_wpq)
786 return -ENOMEM;
787 for (i = 0; i < flush->hint_count; i++) {
788 struct resource *res = &nfit_mem->flush_wpq[i];
789
790 res->start = flush->hint_address[i];
791 res->end = res->start + 8 - 1;
792 }
793 break;
794 }
795
724 if (dcr && !nfit_mem->dcr) { 796 if (dcr && !nfit_mem->dcr) {
725 dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n", 797 dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n",
726 spa->range_index, dcr); 798 spa->range_index, dcr);
@@ -806,14 +878,85 @@ static ssize_t revision_show(struct device *dev,
806} 878}
807static DEVICE_ATTR_RO(revision); 879static DEVICE_ATTR_RO(revision);
808 880
881/*
882 * This shows the number of full Address Range Scrubs that have been
883 * completed since driver load time. Userspace can wait on this using
884 * select/poll etc. A '+' at the end indicates an ARS is in progress
885 */
886static ssize_t scrub_show(struct device *dev,
887 struct device_attribute *attr, char *buf)
888{
889 struct nvdimm_bus_descriptor *nd_desc;
890 ssize_t rc = -ENXIO;
891
892 device_lock(dev);
893 nd_desc = dev_get_drvdata(dev);
894 if (nd_desc) {
895 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
896
897 rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
898 (work_busy(&acpi_desc->work)) ? "+\n" : "\n");
899 }
900 device_unlock(dev);
901 return rc;
902}
903
904static ssize_t scrub_store(struct device *dev,
905 struct device_attribute *attr, const char *buf, size_t size)
906{
907 struct nvdimm_bus_descriptor *nd_desc;
908 ssize_t rc;
909 long val;
910
911 rc = kstrtol(buf, 0, &val);
912 if (rc)
913 return rc;
914 if (val != 1)
915 return -EINVAL;
916
917 device_lock(dev);
918 nd_desc = dev_get_drvdata(dev);
919 if (nd_desc) {
920 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
921
922 rc = acpi_nfit_ars_rescan(acpi_desc);
923 }
924 device_unlock(dev);
925 if (rc)
926 return rc;
927 return size;
928}
929static DEVICE_ATTR_RW(scrub);
930
931static bool ars_supported(struct nvdimm_bus *nvdimm_bus)
932{
933 struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
934 const unsigned long mask = 1 << ND_CMD_ARS_CAP | 1 << ND_CMD_ARS_START
935 | 1 << ND_CMD_ARS_STATUS;
936
937 return (nd_desc->cmd_mask & mask) == mask;
938}
939
940static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n)
941{
942 struct device *dev = container_of(kobj, struct device, kobj);
943 struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
944
945 if (a == &dev_attr_scrub.attr && !ars_supported(nvdimm_bus))
946 return 0;
947 return a->mode;
948}
949
809static struct attribute *acpi_nfit_attributes[] = { 950static struct attribute *acpi_nfit_attributes[] = {
810 &dev_attr_revision.attr, 951 &dev_attr_revision.attr,
952 &dev_attr_scrub.attr,
811 NULL, 953 NULL,
812}; 954};
813 955
814static struct attribute_group acpi_nfit_attribute_group = { 956static struct attribute_group acpi_nfit_attribute_group = {
815 .name = "nfit", 957 .name = "nfit",
816 .attrs = acpi_nfit_attributes, 958 .attrs = acpi_nfit_attributes,
959 .is_visible = nfit_visible,
817}; 960};
818 961
819static const struct attribute_group *acpi_nfit_attribute_groups[] = { 962static const struct attribute_group *acpi_nfit_attribute_groups[] = {
@@ -1130,11 +1273,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
1130 } 1273 }
1131 1274
1132 /* 1275 /*
1133 * Until standardization materializes we need to consider up to 3 1276 * Until standardization materializes we need to consider 4
1134 * different command sets. Note, that checking for function0 (bit0) 1277 * different command sets. Note, that checking for function0 (bit0)
1135 * tells us if any commands are reachable through this uuid. 1278 * tells us if any commands are reachable through this uuid.
1136 */ 1279 */
1137 for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_HPE2; i++) 1280 for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++)
1138 if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) 1281 if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
1139 break; 1282 break;
1140 1283
@@ -1144,12 +1287,14 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
1144 dsm_mask = 0x3fe; 1287 dsm_mask = 0x3fe;
1145 if (disable_vendor_specific) 1288 if (disable_vendor_specific)
1146 dsm_mask &= ~(1 << ND_CMD_VENDOR); 1289 dsm_mask &= ~(1 << ND_CMD_VENDOR);
1147 } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) 1290 } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) {
1148 dsm_mask = 0x1c3c76; 1291 dsm_mask = 0x1c3c76;
1149 else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) { 1292 } else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) {
1150 dsm_mask = 0x1fe; 1293 dsm_mask = 0x1fe;
1151 if (disable_vendor_specific) 1294 if (disable_vendor_specific)
1152 dsm_mask &= ~(1 << 8); 1295 dsm_mask &= ~(1 << 8);
1296 } else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) {
1297 dsm_mask = 0xffffffff;
1153 } else { 1298 } else {
1154 dev_dbg(dev, "unknown dimm command family\n"); 1299 dev_dbg(dev, "unknown dimm command family\n");
1155 nfit_mem->family = -1; 1300 nfit_mem->family = -1;
@@ -1171,6 +1316,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
1171 int dimm_count = 0; 1316 int dimm_count = 0;
1172 1317
1173 list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { 1318 list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
1319 struct acpi_nfit_flush_address *flush;
1174 unsigned long flags = 0, cmd_mask; 1320 unsigned long flags = 0, cmd_mask;
1175 struct nvdimm *nvdimm; 1321 struct nvdimm *nvdimm;
1176 u32 device_handle; 1322 u32 device_handle;
@@ -1204,9 +1350,12 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
1204 if (nfit_mem->family == NVDIMM_FAMILY_INTEL) 1350 if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
1205 cmd_mask |= nfit_mem->dsm_mask; 1351 cmd_mask |= nfit_mem->dsm_mask;
1206 1352
1353 flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush
1354 : NULL;
1207 nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem, 1355 nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
1208 acpi_nfit_dimm_attribute_groups, 1356 acpi_nfit_dimm_attribute_groups,
1209 flags, cmd_mask); 1357 flags, cmd_mask, flush ? flush->hint_count : 0,
1358 nfit_mem->flush_wpq);
1210 if (!nvdimm) 1359 if (!nvdimm)
1211 return -ENOMEM; 1360 return -ENOMEM;
1212 1361
@@ -1374,24 +1523,6 @@ static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio)
1374 return mmio->base_offset + line_offset + table_offset + sub_line_offset; 1523 return mmio->base_offset + line_offset + table_offset + sub_line_offset;
1375} 1524}
1376 1525
1377static void wmb_blk(struct nfit_blk *nfit_blk)
1378{
1379
1380 if (nfit_blk->nvdimm_flush) {
1381 /*
1382 * The first wmb() is needed to 'sfence' all previous writes
1383 * such that they are architecturally visible for the platform
1384 * buffer flush. Note that we've already arranged for pmem
1385 * writes to avoid the cache via arch_memcpy_to_pmem(). The
1386 * final wmb() ensures ordering for the NVDIMM flush write.
1387 */
1388 wmb();
1389 writeq(1, nfit_blk->nvdimm_flush);
1390 wmb();
1391 } else
1392 wmb_pmem();
1393}
1394
1395static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) 1526static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
1396{ 1527{
1397 struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR]; 1528 struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
@@ -1426,7 +1557,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
1426 offset = to_interleave_offset(offset, mmio); 1557 offset = to_interleave_offset(offset, mmio);
1427 1558
1428 writeq(cmd, mmio->addr.base + offset); 1559 writeq(cmd, mmio->addr.base + offset);
1429 wmb_blk(nfit_blk); 1560 nvdimm_flush(nfit_blk->nd_region);
1430 1561
1431 if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH) 1562 if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
1432 readq(mmio->addr.base + offset); 1563 readq(mmio->addr.base + offset);
@@ -1477,7 +1608,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
1477 } 1608 }
1478 1609
1479 if (rw) 1610 if (rw)
1480 wmb_blk(nfit_blk); 1611 nvdimm_flush(nfit_blk->nd_region);
1481 1612
1482 rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0; 1613 rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
1483 return rc; 1614 return rc;
@@ -1509,125 +1640,6 @@ static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr,
1509 return rc; 1640 return rc;
1510} 1641}
1511 1642
1512static void nfit_spa_mapping_release(struct kref *kref)
1513{
1514 struct nfit_spa_mapping *spa_map = to_spa_map(kref);
1515 struct acpi_nfit_system_address *spa = spa_map->spa;
1516 struct acpi_nfit_desc *acpi_desc = spa_map->acpi_desc;
1517
1518 WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
1519 dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
1520 if (spa_map->type == SPA_MAP_APERTURE)
1521 memunmap((void __force *)spa_map->addr.aperture);
1522 else
1523 iounmap(spa_map->addr.base);
1524 release_mem_region(spa->address, spa->length);
1525 list_del(&spa_map->list);
1526 kfree(spa_map);
1527}
1528
1529static struct nfit_spa_mapping *find_spa_mapping(
1530 struct acpi_nfit_desc *acpi_desc,
1531 struct acpi_nfit_system_address *spa)
1532{
1533 struct nfit_spa_mapping *spa_map;
1534
1535 WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
1536 list_for_each_entry(spa_map, &acpi_desc->spa_maps, list)
1537 if (spa_map->spa == spa)
1538 return spa_map;
1539
1540 return NULL;
1541}
1542
1543static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc,
1544 struct acpi_nfit_system_address *spa)
1545{
1546 struct nfit_spa_mapping *spa_map;
1547
1548 mutex_lock(&acpi_desc->spa_map_mutex);
1549 spa_map = find_spa_mapping(acpi_desc, spa);
1550
1551 if (spa_map)
1552 kref_put(&spa_map->kref, nfit_spa_mapping_release);
1553 mutex_unlock(&acpi_desc->spa_map_mutex);
1554}
1555
1556static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
1557 struct acpi_nfit_system_address *spa, enum spa_map_type type)
1558{
1559 resource_size_t start = spa->address;
1560 resource_size_t n = spa->length;
1561 struct nfit_spa_mapping *spa_map;
1562 struct resource *res;
1563
1564 WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
1565
1566 spa_map = find_spa_mapping(acpi_desc, spa);
1567 if (spa_map) {
1568 kref_get(&spa_map->kref);
1569 return spa_map->addr.base;
1570 }
1571
1572 spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
1573 if (!spa_map)
1574 return NULL;
1575
1576 INIT_LIST_HEAD(&spa_map->list);
1577 spa_map->spa = spa;
1578 kref_init(&spa_map->kref);
1579 spa_map->acpi_desc = acpi_desc;
1580
1581 res = request_mem_region(start, n, dev_name(acpi_desc->dev));
1582 if (!res)
1583 goto err_mem;
1584
1585 spa_map->type = type;
1586 if (type == SPA_MAP_APERTURE)
1587 spa_map->addr.aperture = (void __pmem *)memremap(start, n,
1588 ARCH_MEMREMAP_PMEM);
1589 else
1590 spa_map->addr.base = ioremap_nocache(start, n);
1591
1592
1593 if (!spa_map->addr.base)
1594 goto err_map;
1595
1596 list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
1597 return spa_map->addr.base;
1598
1599 err_map:
1600 release_mem_region(start, n);
1601 err_mem:
1602 kfree(spa_map);
1603 return NULL;
1604}
1605
1606/**
1607 * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges
1608 * @nvdimm_bus: NFIT-bus that provided the spa table entry
1609 * @nfit_spa: spa table to map
1610 * @type: aperture or control region
1611 *
1612 * In the case where block-data-window apertures and
1613 * dimm-control-regions are interleaved they will end up sharing a
1614 * single request_mem_region() + ioremap() for the address range. In
1615 * the style of devm nfit_spa_map() mappings are automatically dropped
1616 * when all region devices referencing the same mapping are disabled /
1617 * unbound.
1618 */
1619static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
1620 struct acpi_nfit_system_address *spa, enum spa_map_type type)
1621{
1622 void __iomem *iomem;
1623
1624 mutex_lock(&acpi_desc->spa_map_mutex);
1625 iomem = __nfit_spa_map(acpi_desc, spa, type);
1626 mutex_unlock(&acpi_desc->spa_map_mutex);
1627
1628 return iomem;
1629}
1630
1631static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio, 1643static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio,
1632 struct acpi_nfit_interleave *idt, u16 interleave_ways) 1644 struct acpi_nfit_interleave *idt, u16 interleave_ways)
1633{ 1645{
@@ -1669,9 +1681,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
1669 struct device *dev) 1681 struct device *dev)
1670{ 1682{
1671 struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); 1683 struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
1672 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
1673 struct nd_blk_region *ndbr = to_nd_blk_region(dev); 1684 struct nd_blk_region *ndbr = to_nd_blk_region(dev);
1674 struct nfit_flush *nfit_flush;
1675 struct nfit_blk_mmio *mmio; 1685 struct nfit_blk_mmio *mmio;
1676 struct nfit_blk *nfit_blk; 1686 struct nfit_blk *nfit_blk;
1677 struct nfit_mem *nfit_mem; 1687 struct nfit_mem *nfit_mem;
@@ -1697,8 +1707,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
1697 /* map block aperture memory */ 1707 /* map block aperture memory */
1698 nfit_blk->bdw_offset = nfit_mem->bdw->offset; 1708 nfit_blk->bdw_offset = nfit_mem->bdw->offset;
1699 mmio = &nfit_blk->mmio[BDW]; 1709 mmio = &nfit_blk->mmio[BDW];
1700 mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw, 1710 mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
1701 SPA_MAP_APERTURE); 1711 nfit_mem->spa_bdw->length, ARCH_MEMREMAP_PMEM);
1702 if (!mmio->addr.base) { 1712 if (!mmio->addr.base) {
1703 dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, 1713 dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
1704 nvdimm_name(nvdimm)); 1714 nvdimm_name(nvdimm));
@@ -1720,8 +1730,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
1720 nfit_blk->cmd_offset = nfit_mem->dcr->command_offset; 1730 nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
1721 nfit_blk->stat_offset = nfit_mem->dcr->status_offset; 1731 nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
1722 mmio = &nfit_blk->mmio[DCR]; 1732 mmio = &nfit_blk->mmio[DCR];
1723 mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr, 1733 mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address,
1724 SPA_MAP_CONTROL); 1734 nfit_mem->spa_dcr->length);
1725 if (!mmio->addr.base) { 1735 if (!mmio->addr.base) {
1726 dev_dbg(dev, "%s: %s failed to map dcr\n", __func__, 1736 dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
1727 nvdimm_name(nvdimm)); 1737 nvdimm_name(nvdimm));
@@ -1746,15 +1756,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
1746 return rc; 1756 return rc;
1747 } 1757 }
1748 1758
1749 nfit_flush = nfit_mem->nfit_flush; 1759 if (nvdimm_has_flush(nfit_blk->nd_region) < 0)
1750 if (nfit_flush && nfit_flush->flush->hint_count != 0) {
1751 nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev,
1752 nfit_flush->flush->hint_address[0], 8);
1753 if (!nfit_blk->nvdimm_flush)
1754 return -ENOMEM;
1755 }
1756
1757 if (!arch_has_wmb_pmem() && !nfit_blk->nvdimm_flush)
1758 dev_warn(dev, "unable to guarantee persistence of writes\n"); 1760 dev_warn(dev, "unable to guarantee persistence of writes\n");
1759 1761
1760 if (mmio->line_size == 0) 1762 if (mmio->line_size == 0)
@@ -1773,29 +1775,6 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
1773 return 0; 1775 return 0;
1774} 1776}
1775 1777
1776static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
1777 struct device *dev)
1778{
1779 struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
1780 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
1781 struct nd_blk_region *ndbr = to_nd_blk_region(dev);
1782 struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
1783 int i;
1784
1785 if (!nfit_blk)
1786 return; /* never enabled */
1787
1788 /* auto-free BLK spa mappings */
1789 for (i = 0; i < 2; i++) {
1790 struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
1791
1792 if (mmio->addr.base)
1793 nfit_spa_unmap(acpi_desc, mmio->spa);
1794 }
1795 nd_blk_region_set_provider_data(ndbr, NULL);
1796 /* devm will free nfit_blk */
1797}
1798
1799static int ars_get_cap(struct acpi_nfit_desc *acpi_desc, 1778static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
1800 struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa) 1779 struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa)
1801{ 1780{
@@ -1919,11 +1898,11 @@ static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc,
1919 if (ret) 1898 if (ret)
1920 return ret; 1899 return ret;
1921 1900
1922 ret = devm_add_action(acpi_desc->dev, acpi_nfit_remove_resource, res); 1901 ret = devm_add_action_or_reset(acpi_desc->dev,
1923 if (ret) { 1902 acpi_nfit_remove_resource,
1924 remove_resource(res); 1903 res);
1904 if (ret)
1925 return ret; 1905 return ret;
1926 }
1927 1906
1928 return 0; 1907 return 0;
1929} 1908}
@@ -1969,7 +1948,6 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
1969 ndr_desc->num_mappings = blk_valid; 1948 ndr_desc->num_mappings = blk_valid;
1970 ndbr_desc = to_blk_region_desc(ndr_desc); 1949 ndbr_desc = to_blk_region_desc(ndr_desc);
1971 ndbr_desc->enable = acpi_nfit_blk_region_enable; 1950 ndbr_desc->enable = acpi_nfit_blk_region_enable;
1972 ndbr_desc->disable = acpi_nfit_blk_region_disable;
1973 ndbr_desc->do_io = acpi_desc->blk_do_io; 1951 ndbr_desc->do_io = acpi_desc->blk_do_io;
1974 nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus, 1952 nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus,
1975 ndr_desc); 1953 ndr_desc);
@@ -1981,6 +1959,14 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
1981 return 0; 1959 return 0;
1982} 1960}
1983 1961
1962static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa)
1963{
1964 return (nfit_spa_type(spa) == NFIT_SPA_VDISK ||
1965 nfit_spa_type(spa) == NFIT_SPA_VCD ||
1966 nfit_spa_type(spa) == NFIT_SPA_PDISK ||
1967 nfit_spa_type(spa) == NFIT_SPA_PCD);
1968}
1969
1984static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, 1970static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
1985 struct nfit_spa *nfit_spa) 1971 struct nfit_spa *nfit_spa)
1986{ 1972{
@@ -1996,7 +1982,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
1996 if (nfit_spa->nd_region) 1982 if (nfit_spa->nd_region)
1997 return 0; 1983 return 0;
1998 1984
1999 if (spa->range_index == 0) { 1985 if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) {
2000 dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n", 1986 dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
2001 __func__); 1987 __func__);
2002 return 0; 1988 return 0;
@@ -2060,6 +2046,11 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
2060 ndr_desc); 2046 ndr_desc);
2061 if (!nfit_spa->nd_region) 2047 if (!nfit_spa->nd_region)
2062 rc = -ENOMEM; 2048 rc = -ENOMEM;
2049 } else if (nfit_spa_is_virtual(spa)) {
2050 nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
2051 ndr_desc);
2052 if (!nfit_spa->nd_region)
2053 rc = -ENOMEM;
2063 } 2054 }
2064 2055
2065 out: 2056 out:
@@ -2139,7 +2130,7 @@ static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
2139 unsigned int tmo = scrub_timeout; 2130 unsigned int tmo = scrub_timeout;
2140 int rc; 2131 int rc;
2141 2132
2142 if (nfit_spa->ars_done || !nfit_spa->nd_region) 2133 if (!nfit_spa->ars_required || !nfit_spa->nd_region)
2143 return; 2134 return;
2144 2135
2145 rc = ars_start(acpi_desc, nfit_spa); 2136 rc = ars_start(acpi_desc, nfit_spa);
@@ -2228,7 +2219,9 @@ static void acpi_nfit_scrub(struct work_struct *work)
2228 * firmware initiated scrubs to complete and then we go search for the 2219 * firmware initiated scrubs to complete and then we go search for the
2229 * affected spa regions to mark them scanned. In the second phase we 2220 * affected spa regions to mark them scanned. In the second phase we
2230 * initiate a directed scrub for every range that was not scrubbed in 2221 * initiate a directed scrub for every range that was not scrubbed in
2231 * phase 1. 2222 * phase 1. If we're called for a 'rescan', we harmlessly pass through
2223 * the first phase, but really only care about running phase 2, where
2224 * regions can be notified of new poison.
2232 */ 2225 */
2233 2226
2234 /* process platform firmware initiated scrubs */ 2227 /* process platform firmware initiated scrubs */
@@ -2331,14 +2324,17 @@ static void acpi_nfit_scrub(struct work_struct *work)
2331 * Flag all the ranges that still need scrubbing, but 2324 * Flag all the ranges that still need scrubbing, but
2332 * register them now to make data available. 2325 * register them now to make data available.
2333 */ 2326 */
2334 if (nfit_spa->nd_region) 2327 if (!nfit_spa->nd_region) {
2335 nfit_spa->ars_done = 1; 2328 nfit_spa->ars_required = 1;
2336 else
2337 acpi_nfit_register_region(acpi_desc, nfit_spa); 2329 acpi_nfit_register_region(acpi_desc, nfit_spa);
2330 }
2338 } 2331 }
2339 2332
2340 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) 2333 list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
2341 acpi_nfit_async_scrub(acpi_desc, nfit_spa); 2334 acpi_nfit_async_scrub(acpi_desc, nfit_spa);
2335 acpi_desc->scrub_count++;
2336 if (acpi_desc->scrub_count_state)
2337 sysfs_notify_dirent(acpi_desc->scrub_count_state);
2342 mutex_unlock(&acpi_desc->init_mutex); 2338 mutex_unlock(&acpi_desc->init_mutex);
2343} 2339}
2344 2340
@@ -2376,14 +2372,89 @@ static int acpi_nfit_check_deletions(struct acpi_nfit_desc *acpi_desc,
2376 return 0; 2372 return 0;
2377} 2373}
2378 2374
2379int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) 2375static int acpi_nfit_desc_init_scrub_attr(struct acpi_nfit_desc *acpi_desc)
2376{
2377 struct device *dev = acpi_desc->dev;
2378 struct kernfs_node *nfit;
2379 struct device *bus_dev;
2380
2381 if (!ars_supported(acpi_desc->nvdimm_bus))
2382 return 0;
2383
2384 bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
2385 nfit = sysfs_get_dirent(bus_dev->kobj.sd, "nfit");
2386 if (!nfit) {
2387 dev_err(dev, "sysfs_get_dirent 'nfit' failed\n");
2388 return -ENODEV;
2389 }
2390 acpi_desc->scrub_count_state = sysfs_get_dirent(nfit, "scrub");
2391 sysfs_put(nfit);
2392 if (!acpi_desc->scrub_count_state) {
2393 dev_err(dev, "sysfs_get_dirent 'scrub' failed\n");
2394 return -ENODEV;
2395 }
2396
2397 return 0;
2398}
2399
2400static void acpi_nfit_destruct(void *data)
2401{
2402 struct acpi_nfit_desc *acpi_desc = data;
2403 struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
2404
2405 /*
2406 * Destruct under acpi_desc_lock so that nfit_handle_mce does not
2407 * race teardown
2408 */
2409 mutex_lock(&acpi_desc_lock);
2410 acpi_desc->cancel = 1;
2411 /*
2412 * Bounce the nvdimm bus lock to make sure any in-flight
2413 * acpi_nfit_ars_rescan() submissions have had a chance to
2414 * either submit or see ->cancel set.
2415 */
2416 device_lock(bus_dev);
2417 device_unlock(bus_dev);
2418
2419 flush_workqueue(nfit_wq);
2420 if (acpi_desc->scrub_count_state)
2421 sysfs_put(acpi_desc->scrub_count_state);
2422 nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
2423 acpi_desc->nvdimm_bus = NULL;
2424 list_del(&acpi_desc->list);
2425 mutex_unlock(&acpi_desc_lock);
2426}
2427
2428int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
2380{ 2429{
2381 struct device *dev = acpi_desc->dev; 2430 struct device *dev = acpi_desc->dev;
2382 struct nfit_table_prev prev; 2431 struct nfit_table_prev prev;
2383 const void *end; 2432 const void *end;
2384 u8 *data;
2385 int rc; 2433 int rc;
2386 2434
2435 if (!acpi_desc->nvdimm_bus) {
2436 acpi_nfit_init_dsms(acpi_desc);
2437
2438 acpi_desc->nvdimm_bus = nvdimm_bus_register(dev,
2439 &acpi_desc->nd_desc);
2440 if (!acpi_desc->nvdimm_bus)
2441 return -ENOMEM;
2442
2443 rc = devm_add_action_or_reset(dev, acpi_nfit_destruct,
2444 acpi_desc);
2445 if (rc)
2446 return rc;
2447
2448 rc = acpi_nfit_desc_init_scrub_attr(acpi_desc);
2449 if (rc)
2450 return rc;
2451
2452 /* register this acpi_desc for mce notifications */
2453 mutex_lock(&acpi_desc_lock);
2454 list_add_tail(&acpi_desc->list, &acpi_descs);
2455 mutex_unlock(&acpi_desc_lock);
2456 }
2457
2387 mutex_lock(&acpi_desc->init_mutex); 2458 mutex_lock(&acpi_desc->init_mutex);
2388 2459
2389 INIT_LIST_HEAD(&prev.spas); 2460 INIT_LIST_HEAD(&prev.spas);
@@ -2406,7 +2477,6 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
2406 list_cut_position(&prev.flushes, &acpi_desc->flushes, 2477 list_cut_position(&prev.flushes, &acpi_desc->flushes,
2407 acpi_desc->flushes.prev); 2478 acpi_desc->flushes.prev);
2408 2479
2409 data = (u8 *) acpi_desc->nfit;
2410 end = data + sz; 2480 end = data + sz;
2411 while (!IS_ERR_OR_NULL(data)) 2481 while (!IS_ERR_OR_NULL(data))
2412 data = add_table(acpi_desc, &prev, data, end); 2482 data = add_table(acpi_desc, &prev, data, end);
@@ -2422,12 +2492,9 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
2422 if (rc) 2492 if (rc)
2423 goto out_unlock; 2493 goto out_unlock;
2424 2494
2425 if (nfit_mem_init(acpi_desc) != 0) { 2495 rc = nfit_mem_init(acpi_desc);
2426 rc = -ENOMEM; 2496 if (rc)
2427 goto out_unlock; 2497 goto out_unlock;
2428 }
2429
2430 acpi_nfit_init_dsms(acpi_desc);
2431 2498
2432 rc = acpi_nfit_register_dimms(acpi_desc); 2499 rc = acpi_nfit_register_dimms(acpi_desc);
2433 if (rc) 2500 if (rc)
@@ -2496,6 +2563,33 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
2496 return 0; 2563 return 0;
2497} 2564}
2498 2565
2566int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc)
2567{
2568 struct device *dev = acpi_desc->dev;
2569 struct nfit_spa *nfit_spa;
2570
2571 if (work_busy(&acpi_desc->work))
2572 return -EBUSY;
2573
2574 if (acpi_desc->cancel)
2575 return 0;
2576
2577 mutex_lock(&acpi_desc->init_mutex);
2578 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
2579 struct acpi_nfit_system_address *spa = nfit_spa->spa;
2580
2581 if (nfit_spa_type(spa) != NFIT_SPA_PM)
2582 continue;
2583
2584 nfit_spa->ars_required = 1;
2585 }
2586 queue_work(nfit_wq, &acpi_desc->work);
2587 dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
2588 mutex_unlock(&acpi_desc->init_mutex);
2589
2590 return 0;
2591}
2592
2499void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) 2593void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
2500{ 2594{
2501 struct nvdimm_bus_descriptor *nd_desc; 2595 struct nvdimm_bus_descriptor *nd_desc;
@@ -2505,12 +2599,12 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
2505 acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io; 2599 acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io;
2506 nd_desc = &acpi_desc->nd_desc; 2600 nd_desc = &acpi_desc->nd_desc;
2507 nd_desc->provider_name = "ACPI.NFIT"; 2601 nd_desc->provider_name = "ACPI.NFIT";
2602 nd_desc->module = THIS_MODULE;
2508 nd_desc->ndctl = acpi_nfit_ctl; 2603 nd_desc->ndctl = acpi_nfit_ctl;
2509 nd_desc->flush_probe = acpi_nfit_flush_probe; 2604 nd_desc->flush_probe = acpi_nfit_flush_probe;
2510 nd_desc->clear_to_send = acpi_nfit_clear_to_send; 2605 nd_desc->clear_to_send = acpi_nfit_clear_to_send;
2511 nd_desc->attr_groups = acpi_nfit_attribute_groups; 2606 nd_desc->attr_groups = acpi_nfit_attribute_groups;
2512 2607
2513 INIT_LIST_HEAD(&acpi_desc->spa_maps);
2514 INIT_LIST_HEAD(&acpi_desc->spas); 2608 INIT_LIST_HEAD(&acpi_desc->spas);
2515 INIT_LIST_HEAD(&acpi_desc->dcrs); 2609 INIT_LIST_HEAD(&acpi_desc->dcrs);
2516 INIT_LIST_HEAD(&acpi_desc->bdws); 2610 INIT_LIST_HEAD(&acpi_desc->bdws);
@@ -2518,7 +2612,7 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
2518 INIT_LIST_HEAD(&acpi_desc->flushes); 2612 INIT_LIST_HEAD(&acpi_desc->flushes);
2519 INIT_LIST_HEAD(&acpi_desc->memdevs); 2613 INIT_LIST_HEAD(&acpi_desc->memdevs);
2520 INIT_LIST_HEAD(&acpi_desc->dimms); 2614 INIT_LIST_HEAD(&acpi_desc->dimms);
2521 mutex_init(&acpi_desc->spa_map_mutex); 2615 INIT_LIST_HEAD(&acpi_desc->list);
2522 mutex_init(&acpi_desc->init_mutex); 2616 mutex_init(&acpi_desc->init_mutex);
2523 INIT_WORK(&acpi_desc->work, acpi_nfit_scrub); 2617 INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
2524} 2618}
@@ -2532,7 +2626,7 @@ static int acpi_nfit_add(struct acpi_device *adev)
2532 struct acpi_table_header *tbl; 2626 struct acpi_table_header *tbl;
2533 acpi_status status = AE_OK; 2627 acpi_status status = AE_OK;
2534 acpi_size sz; 2628 acpi_size sz;
2535 int rc; 2629 int rc = 0;
2536 2630
2537 status = acpi_get_table_with_size(ACPI_SIG_NFIT, 0, &tbl, &sz); 2631 status = acpi_get_table_with_size(ACPI_SIG_NFIT, 0, &tbl, &sz);
2538 if (ACPI_FAILURE(status)) { 2632 if (ACPI_FAILURE(status)) {
@@ -2545,50 +2639,33 @@ static int acpi_nfit_add(struct acpi_device *adev)
2545 if (!acpi_desc) 2639 if (!acpi_desc)
2546 return -ENOMEM; 2640 return -ENOMEM;
2547 acpi_nfit_desc_init(acpi_desc, &adev->dev); 2641 acpi_nfit_desc_init(acpi_desc, &adev->dev);
2548 acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc);
2549 if (!acpi_desc->nvdimm_bus)
2550 return -ENOMEM;
2551 2642
2552 /* 2643 /* Save the acpi header for exporting the revision via sysfs */
2553 * Save the acpi header for later and then skip it,
2554 * making nfit point to the first nfit table header.
2555 */
2556 acpi_desc->acpi_header = *tbl; 2644 acpi_desc->acpi_header = *tbl;
2557 acpi_desc->nfit = (void *) tbl + sizeof(struct acpi_table_nfit);
2558 sz -= sizeof(struct acpi_table_nfit);
2559 2645
2560 /* Evaluate _FIT and override with that if present */ 2646 /* Evaluate _FIT and override with that if present */
2561 status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf); 2647 status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
2562 if (ACPI_SUCCESS(status) && buf.length > 0) { 2648 if (ACPI_SUCCESS(status) && buf.length > 0) {
2563 union acpi_object *obj; 2649 union acpi_object *obj = buf.pointer;
2564 /* 2650
2565 * Adjust for the acpi_object header of the _FIT 2651 if (obj->type == ACPI_TYPE_BUFFER)
2566 */ 2652 rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
2567 obj = buf.pointer; 2653 obj->buffer.length);
2568 if (obj->type == ACPI_TYPE_BUFFER) { 2654 else
2569 acpi_desc->nfit =
2570 (struct acpi_nfit_header *)obj->buffer.pointer;
2571 sz = obj->buffer.length;
2572 } else
2573 dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n", 2655 dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
2574 __func__, (int) obj->type); 2656 __func__, (int) obj->type);
2575 } 2657 kfree(buf.pointer);
2576 2658 } else
2577 rc = acpi_nfit_init(acpi_desc, sz); 2659 /* skip over the lead-in header table */
2578 if (rc) { 2660 rc = acpi_nfit_init(acpi_desc, (void *) tbl
2579 nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 2661 + sizeof(struct acpi_table_nfit),
2580 return rc; 2662 sz - sizeof(struct acpi_table_nfit));
2581 } 2663 return rc;
2582 return 0;
2583} 2664}
2584 2665
2585static int acpi_nfit_remove(struct acpi_device *adev) 2666static int acpi_nfit_remove(struct acpi_device *adev)
2586{ 2667{
2587 struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); 2668 /* see acpi_nfit_destruct */
2588
2589 acpi_desc->cancel = 1;
2590 flush_workqueue(nfit_wq);
2591 nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
2592 return 0; 2669 return 0;
2593} 2670}
2594 2671
@@ -2596,9 +2673,8 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
2596{ 2673{
2597 struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); 2674 struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
2598 struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; 2675 struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
2599 struct acpi_nfit_header *nfit_saved;
2600 union acpi_object *obj;
2601 struct device *dev = &adev->dev; 2676 struct device *dev = &adev->dev;
2677 union acpi_object *obj;
2602 acpi_status status; 2678 acpi_status status;
2603 int ret; 2679 int ret;
2604 2680
@@ -2616,9 +2692,6 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
2616 if (!acpi_desc) 2692 if (!acpi_desc)
2617 goto out_unlock; 2693 goto out_unlock;
2618 acpi_nfit_desc_init(acpi_desc, &adev->dev); 2694 acpi_nfit_desc_init(acpi_desc, &adev->dev);
2619 acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc);
2620 if (!acpi_desc->nvdimm_bus)
2621 goto out_unlock;
2622 } else { 2695 } else {
2623 /* 2696 /*
2624 * Finish previous registration before considering new 2697 * Finish previous registration before considering new
@@ -2634,21 +2707,14 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
2634 goto out_unlock; 2707 goto out_unlock;
2635 } 2708 }
2636 2709
2637 nfit_saved = acpi_desc->nfit;
2638 obj = buf.pointer; 2710 obj = buf.pointer;
2639 if (obj->type == ACPI_TYPE_BUFFER) { 2711 if (obj->type == ACPI_TYPE_BUFFER) {
2640 acpi_desc->nfit = 2712 ret = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
2641 (struct acpi_nfit_header *)obj->buffer.pointer; 2713 obj->buffer.length);
2642 ret = acpi_nfit_init(acpi_desc, obj->buffer.length); 2714 if (ret)
2643 if (ret) {
2644 /* Merge failed, restore old nfit, and exit */
2645 acpi_desc->nfit = nfit_saved;
2646 dev_err(dev, "failed to merge updated NFIT\n"); 2715 dev_err(dev, "failed to merge updated NFIT\n");
2647 } 2716 } else
2648 } else {
2649 /* Bad _FIT, restore old nfit */
2650 dev_err(dev, "Invalid _FIT\n"); 2717 dev_err(dev, "Invalid _FIT\n");
2651 }
2652 kfree(buf.pointer); 2718 kfree(buf.pointer);
2653 2719
2654 out_unlock: 2720 out_unlock:
@@ -2693,18 +2759,23 @@ static __init int nfit_init(void)
2693 acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]); 2759 acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
2694 acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]); 2760 acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
2695 acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]); 2761 acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
2762 acpi_str_to_uuid(UUID_NFIT_DIMM_N_MSFT, nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
2696 2763
2697 nfit_wq = create_singlethread_workqueue("nfit"); 2764 nfit_wq = create_singlethread_workqueue("nfit");
2698 if (!nfit_wq) 2765 if (!nfit_wq)
2699 return -ENOMEM; 2766 return -ENOMEM;
2700 2767
2768 nfit_mce_register();
2769
2701 return acpi_bus_register_driver(&acpi_nfit_driver); 2770 return acpi_bus_register_driver(&acpi_nfit_driver);
2702} 2771}
2703 2772
2704static __exit void nfit_exit(void) 2773static __exit void nfit_exit(void)
2705{ 2774{
2775 nfit_mce_unregister();
2706 acpi_bus_unregister_driver(&acpi_nfit_driver); 2776 acpi_bus_unregister_driver(&acpi_nfit_driver);
2707 destroy_workqueue(nfit_wq); 2777 destroy_workqueue(nfit_wq);
2778 WARN_ON(!list_empty(&acpi_descs));
2708} 2779}
2709 2780
2710module_init(nfit_init); 2781module_init(nfit_init);
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
new file mode 100644
index 000000000000..4c745bf389fe
--- /dev/null
+++ b/drivers/acpi/nfit/mce.c
@@ -0,0 +1,89 @@
1/*
2 * NFIT - Machine Check Handler
3 *
4 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of version 2 of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 */
15#include <linux/notifier.h>
16#include <linux/acpi.h>
17#include <asm/mce.h>
18#include "nfit.h"
19
20static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
21 void *data)
22{
23 struct mce *mce = (struct mce *)data;
24 struct acpi_nfit_desc *acpi_desc;
25 struct nfit_spa *nfit_spa;
26
27 /* We only care about memory errors */
28 if (!(mce->status & MCACOD))
29 return NOTIFY_DONE;
30
31 /*
32 * mce->addr contains the physical addr accessed that caused the
33 * machine check. We need to walk through the list of NFITs, and see
34 * if any of them matches that address, and only then start a scrub.
35 */
36 mutex_lock(&acpi_desc_lock);
37 list_for_each_entry(acpi_desc, &acpi_descs, list) {
38 struct device *dev = acpi_desc->dev;
39 int found_match = 0;
40
41 mutex_lock(&acpi_desc->init_mutex);
42 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
43 struct acpi_nfit_system_address *spa = nfit_spa->spa;
44
45 if (nfit_spa_type(spa) == NFIT_SPA_PM)
46 continue;
47 /* find the spa that covers the mce addr */
48 if (spa->address > mce->addr)
49 continue;
50 if ((spa->address + spa->length - 1) < mce->addr)
51 continue;
52 found_match = 1;
53 dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n",
54 __func__, spa->range_index, spa->address,
55 spa->length);
56 /*
57 * We can break at the first match because we're going
58 * to rescan all the SPA ranges. There shouldn't be any
59 * aliasing anyway.
60 */
61 break;
62 }
63 mutex_unlock(&acpi_desc->init_mutex);
64
65 /*
66 * We can ignore an -EBUSY here because if an ARS is already
67 * in progress, just let that be the last authoritative one
68 */
69 if (found_match)
70 acpi_nfit_ars_rescan(acpi_desc);
71 }
72
73 mutex_unlock(&acpi_desc_lock);
74 return NOTIFY_DONE;
75}
76
77static struct notifier_block nfit_mce_dec = {
78 .notifier_call = nfit_handle_mce,
79};
80
81void nfit_mce_register(void)
82{
83 mce_register_decode_chain(&nfit_mce_dec);
84}
85
86void nfit_mce_unregister(void)
87{
88 mce_unregister_decode_chain(&nfit_mce_dec);
89}
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit/nfit.h
index 02b9ea1e8d2e..e894ded24d99 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -16,6 +16,7 @@
16#define __NFIT_H__ 16#define __NFIT_H__
17#include <linux/workqueue.h> 17#include <linux/workqueue.h>
18#include <linux/libnvdimm.h> 18#include <linux/libnvdimm.h>
19#include <linux/ndctl.h>
19#include <linux/types.h> 20#include <linux/types.h>
20#include <linux/uuid.h> 21#include <linux/uuid.h>
21#include <linux/acpi.h> 22#include <linux/acpi.h>
@@ -31,6 +32,9 @@
31#define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6" 32#define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6"
32#define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e" 33#define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e"
33 34
35/* https://msdn.microsoft.com/library/windows/hardware/mt604741 */
36#define UUID_NFIT_DIMM_N_MSFT "1ee68b36-d4bd-4a1a-9a16-4f8e53d46e05"
37
34#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \ 38#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
35 | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ 39 | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
36 | ACPI_NFIT_MEM_NOT_ARMED) 40 | ACPI_NFIT_MEM_NOT_ARMED)
@@ -40,6 +44,7 @@ enum nfit_uuids {
40 NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL, 44 NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL,
41 NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1, 45 NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
42 NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2, 46 NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
47 NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT,
43 NFIT_SPA_VOLATILE, 48 NFIT_SPA_VOLATILE,
44 NFIT_SPA_PM, 49 NFIT_SPA_PM,
45 NFIT_SPA_DCR, 50 NFIT_SPA_DCR,
@@ -74,37 +79,37 @@ enum {
74}; 79};
75 80
76struct nfit_spa { 81struct nfit_spa {
77 struct acpi_nfit_system_address *spa;
78 struct list_head list; 82 struct list_head list;
79 struct nd_region *nd_region; 83 struct nd_region *nd_region;
80 unsigned int ars_done:1; 84 unsigned int ars_required:1;
81 u32 clear_err_unit; 85 u32 clear_err_unit;
82 u32 max_ars; 86 u32 max_ars;
87 struct acpi_nfit_system_address spa[0];
83}; 88};
84 89
85struct nfit_dcr { 90struct nfit_dcr {
86 struct acpi_nfit_control_region *dcr;
87 struct list_head list; 91 struct list_head list;
92 struct acpi_nfit_control_region dcr[0];
88}; 93};
89 94
90struct nfit_bdw { 95struct nfit_bdw {
91 struct acpi_nfit_data_region *bdw;
92 struct list_head list; 96 struct list_head list;
97 struct acpi_nfit_data_region bdw[0];
93}; 98};
94 99
95struct nfit_idt { 100struct nfit_idt {
96 struct acpi_nfit_interleave *idt;
97 struct list_head list; 101 struct list_head list;
102 struct acpi_nfit_interleave idt[0];
98}; 103};
99 104
100struct nfit_flush { 105struct nfit_flush {
101 struct acpi_nfit_flush_address *flush;
102 struct list_head list; 106 struct list_head list;
107 struct acpi_nfit_flush_address flush[0];
103}; 108};
104 109
105struct nfit_memdev { 110struct nfit_memdev {
106 struct acpi_nfit_memory_map *memdev;
107 struct list_head list; 111 struct list_head list;
112 struct acpi_nfit_memory_map memdev[0];
108}; 113};
109 114
110/* assembled tables for a given dimm/memory-device */ 115/* assembled tables for a given dimm/memory-device */
@@ -123,6 +128,7 @@ struct nfit_mem {
123 struct list_head list; 128 struct list_head list;
124 struct acpi_device *adev; 129 struct acpi_device *adev;
125 struct acpi_nfit_desc *acpi_desc; 130 struct acpi_nfit_desc *acpi_desc;
131 struct resource *flush_wpq;
126 unsigned long dsm_mask; 132 unsigned long dsm_mask;
127 int family; 133 int family;
128}; 134};
@@ -130,10 +136,7 @@ struct nfit_mem {
130struct acpi_nfit_desc { 136struct acpi_nfit_desc {
131 struct nvdimm_bus_descriptor nd_desc; 137 struct nvdimm_bus_descriptor nd_desc;
132 struct acpi_table_header acpi_header; 138 struct acpi_table_header acpi_header;
133 struct acpi_nfit_header *nfit;
134 struct mutex spa_map_mutex;
135 struct mutex init_mutex; 139 struct mutex init_mutex;
136 struct list_head spa_maps;
137 struct list_head memdevs; 140 struct list_head memdevs;
138 struct list_head flushes; 141 struct list_head flushes;
139 struct list_head dimms; 142 struct list_head dimms;
@@ -146,6 +149,9 @@ struct acpi_nfit_desc {
146 struct nd_cmd_ars_status *ars_status; 149 struct nd_cmd_ars_status *ars_status;
147 size_t ars_status_size; 150 size_t ars_status_size;
148 struct work_struct work; 151 struct work_struct work;
152 struct list_head list;
153 struct kernfs_node *scrub_count_state;
154 unsigned int scrub_count;
149 unsigned int cancel:1; 155 unsigned int cancel:1;
150 unsigned long dimm_cmd_force_en; 156 unsigned long dimm_cmd_force_en;
151 unsigned long bus_cmd_force_en; 157 unsigned long bus_cmd_force_en;
@@ -161,7 +167,7 @@ enum nd_blk_mmio_selector {
161struct nd_blk_addr { 167struct nd_blk_addr {
162 union { 168 union {
163 void __iomem *base; 169 void __iomem *base;
164 void __pmem *aperture; 170 void *aperture;
165 }; 171 };
166}; 172};
167 173
@@ -180,28 +186,26 @@ struct nfit_blk {
180 u64 bdw_offset; /* post interleave offset */ 186 u64 bdw_offset; /* post interleave offset */
181 u64 stat_offset; 187 u64 stat_offset;
182 u64 cmd_offset; 188 u64 cmd_offset;
183 void __iomem *nvdimm_flush;
184 u32 dimm_flags; 189 u32 dimm_flags;
185}; 190};
186 191
187enum spa_map_type { 192extern struct list_head acpi_descs;
188 SPA_MAP_CONTROL, 193extern struct mutex acpi_desc_lock;
189 SPA_MAP_APERTURE, 194int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);
190};
191
192struct nfit_spa_mapping {
193 struct acpi_nfit_desc *acpi_desc;
194 struct acpi_nfit_system_address *spa;
195 struct list_head list;
196 struct kref kref;
197 enum spa_map_type type;
198 struct nd_blk_addr addr;
199};
200 195
201static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref) 196#ifdef CONFIG_X86_MCE
197void nfit_mce_register(void);
198void nfit_mce_unregister(void);
199#else
200static inline void nfit_mce_register(void)
202{ 201{
203 return container_of(kref, struct nfit_spa_mapping, kref);
204} 202}
203static inline void nfit_mce_unregister(void)
204{
205}
206#endif
207
208int nfit_spa_type(struct acpi_nfit_system_address *spa);
205 209
206static inline struct acpi_nfit_memory_map *__to_nfit_memdev( 210static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
207 struct nfit_mem *nfit_mem) 211 struct nfit_mem *nfit_mem)
@@ -218,6 +222,6 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
218} 222}
219 223
220const u8 *to_nfit_uuid(enum nfit_uuids id); 224const u8 *to_nfit_uuid(enum nfit_uuids id);
221int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz); 225int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
222void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev); 226void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
223#endif /* __NFIT_H__ */ 227#endif /* __NFIT_H__ */
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index ba5145d384d8..3022dad24071 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -379,7 +379,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
379 379
380#ifdef CONFIG_BLK_DEV_RAM_DAX 380#ifdef CONFIG_BLK_DEV_RAM_DAX
381static long brd_direct_access(struct block_device *bdev, sector_t sector, 381static long brd_direct_access(struct block_device *bdev, sector_t sector,
382 void __pmem **kaddr, pfn_t *pfn, long size) 382 void **kaddr, pfn_t *pfn, long size)
383{ 383{
384 struct brd_device *brd = bdev->bd_disk->private_data; 384 struct brd_device *brd = bdev->bd_disk->private_data;
385 struct page *page; 385 struct page *page;
@@ -389,7 +389,7 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector,
389 page = brd_insert_page(brd, sector); 389 page = brd_insert_page(brd, sector);
390 if (!page) 390 if (!page)
391 return -ENOSPC; 391 return -ENOSPC;
392 *kaddr = (void __pmem *)page_address(page); 392 *kaddr = page_address(page);
393 *pfn = page_to_pfn_t(page); 393 *pfn = page_to_pfn_t(page);
394 394
395 return PAGE_SIZE; 395 return PAGE_SIZE;
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index b891a129b275..803f3953b341 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -211,11 +211,9 @@ int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
211 } 211 }
212 dax_dev->dev = dev; 212 dax_dev->dev = dev;
213 213
214 rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev); 214 rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev);
215 if (rc) { 215 if (rc)
216 unregister_dax_dev(dev);
217 return rc; 216 return rc;
218 }
219 217
220 return 0; 218 return 0;
221 219
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index 55d510e36cd1..dfb168568af1 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -102,21 +102,19 @@ static int dax_pmem_probe(struct device *dev)
102 if (rc) 102 if (rc)
103 return rc; 103 return rc;
104 104
105 rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref); 105 rc = devm_add_action_or_reset(dev, dax_pmem_percpu_exit,
106 if (rc) { 106 &dax_pmem->ref);
107 dax_pmem_percpu_exit(&dax_pmem->ref); 107 if (rc)
108 return rc; 108 return rc;
109 }
110 109
111 addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap); 110 addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
112 if (IS_ERR(addr)) 111 if (IS_ERR(addr))
113 return PTR_ERR(addr); 112 return PTR_ERR(addr);
114 113
115 rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref); 114 rc = devm_add_action_or_reset(dev, dax_pmem_percpu_kill,
116 if (rc) { 115 &dax_pmem->ref);
117 dax_pmem_percpu_kill(&dax_pmem->ref); 116 if (rc)
118 return rc; 117 return rc;
119 }
120 118
121 nd_region = to_nd_region(dev->parent); 119 nd_region = to_nd_region(dev->parent);
122 dax_region = alloc_dax_region(dev, nd_region->id, &res, 120 dax_region = alloc_dax_region(dev, nd_region->id, &res,
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 6d35dd4e9efb..4788b0b989a9 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -142,7 +142,7 @@ static int linear_iterate_devices(struct dm_target *ti,
142} 142}
143 143
144static long linear_direct_access(struct dm_target *ti, sector_t sector, 144static long linear_direct_access(struct dm_target *ti, sector_t sector,
145 void __pmem **kaddr, pfn_t *pfn, long size) 145 void **kaddr, pfn_t *pfn, long size)
146{ 146{
147 struct linear_c *lc = ti->private; 147 struct linear_c *lc = ti->private;
148 struct block_device *bdev = lc->dev->bdev; 148 struct block_device *bdev = lc->dev->bdev;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 731e1f5bd895..ce2a910709f7 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -2303,7 +2303,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
2303} 2303}
2304 2304
2305static long origin_direct_access(struct dm_target *ti, sector_t sector, 2305static long origin_direct_access(struct dm_target *ti, sector_t sector,
2306 void __pmem **kaddr, pfn_t *pfn, long size) 2306 void **kaddr, pfn_t *pfn, long size)
2307{ 2307{
2308 DMWARN("device does not support dax."); 2308 DMWARN("device does not support dax.");
2309 return -EIO; 2309 return -EIO;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 01bb9cf2a8c2..83f1d4667195 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -309,7 +309,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
309} 309}
310 310
311static long stripe_direct_access(struct dm_target *ti, sector_t sector, 311static long stripe_direct_access(struct dm_target *ti, sector_t sector,
312 void __pmem **kaddr, pfn_t *pfn, long size) 312 void **kaddr, pfn_t *pfn, long size)
313{ 313{
314 struct stripe_c *sc = ti->private; 314 struct stripe_c *sc = ti->private;
315 uint32_t stripe; 315 uint32_t stripe;
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 6eecd6b36f76..710ae28fd618 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -149,7 +149,7 @@ static void io_err_release_clone_rq(struct request *clone)
149} 149}
150 150
151static long io_err_direct_access(struct dm_target *ti, sector_t sector, 151static long io_err_direct_access(struct dm_target *ti, sector_t sector,
152 void __pmem **kaddr, pfn_t *pfn, long size) 152 void **kaddr, pfn_t *pfn, long size)
153{ 153{
154 return -EIO; 154 return -EIO;
155} 155}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ceb69fc0b10b..25d1d97154a8 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -906,7 +906,7 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
906EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); 906EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
907 907
908static long dm_blk_direct_access(struct block_device *bdev, sector_t sector, 908static long dm_blk_direct_access(struct block_device *bdev, sector_t sector,
909 void __pmem **kaddr, pfn_t *pfn, long size) 909 void **kaddr, pfn_t *pfn, long size)
910{ 910{
911 struct mapped_device *md = bdev->bd_disk->private_data; 911 struct mapped_device *md = bdev->bd_disk->private_data;
912 struct dm_table *map; 912 struct dm_table *map;
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 7c8a3bf07884..124c2432ac9c 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -1,6 +1,7 @@
1menuconfig LIBNVDIMM 1menuconfig LIBNVDIMM
2 tristate "NVDIMM (Non-Volatile Memory Device) Support" 2 tristate "NVDIMM (Non-Volatile Memory Device) Support"
3 depends on PHYS_ADDR_T_64BIT 3 depends on PHYS_ADDR_T_64BIT
4 depends on HAS_IOMEM
4 depends on BLK_DEV 5 depends on BLK_DEV
5 help 6 help
6 Generic support for non-volatile memory devices including 7 Generic support for non-volatile memory devices including
@@ -19,7 +20,6 @@ if LIBNVDIMM
19config BLK_DEV_PMEM 20config BLK_DEV_PMEM
20 tristate "PMEM: Persistent memory block device support" 21 tristate "PMEM: Persistent memory block device support"
21 default LIBNVDIMM 22 default LIBNVDIMM
22 depends on HAS_IOMEM
23 select ND_BTT if BTT 23 select ND_BTT if BTT
24 select ND_PFN if NVDIMM_PFN 24 select ND_PFN if NVDIMM_PFN
25 help 25 help
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 7e262ef06ede..9faaa9694d87 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -267,10 +267,8 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
267 q = blk_alloc_queue(GFP_KERNEL); 267 q = blk_alloc_queue(GFP_KERNEL);
268 if (!q) 268 if (!q)
269 return -ENOMEM; 269 return -ENOMEM;
270 if (devm_add_action(dev, nd_blk_release_queue, q)) { 270 if (devm_add_action_or_reset(dev, nd_blk_release_queue, q))
271 blk_cleanup_queue(q);
272 return -ENOMEM; 271 return -ENOMEM;
273 }
274 272
275 blk_queue_make_request(q, nd_blk_make_request); 273 blk_queue_make_request(q, nd_blk_make_request);
276 blk_queue_max_hw_sectors(q, UINT_MAX); 274 blk_queue_max_hw_sectors(q, UINT_MAX);
@@ -282,10 +280,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
282 disk = alloc_disk(0); 280 disk = alloc_disk(0);
283 if (!disk) 281 if (!disk)
284 return -ENOMEM; 282 return -ENOMEM;
285 if (devm_add_action(dev, nd_blk_release_disk, disk)) {
286 put_disk(disk);
287 return -ENOMEM;
288 }
289 283
290 disk->first_minor = 0; 284 disk->first_minor = 0;
291 disk->fops = &nd_blk_fops; 285 disk->fops = &nd_blk_fops;
@@ -295,6 +289,9 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
295 set_capacity(disk, 0); 289 set_capacity(disk, 0);
296 device_add_disk(dev, disk); 290 device_add_disk(dev, disk);
297 291
292 if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk))
293 return -ENOMEM;
294
298 if (nsblk_meta_size(nsblk)) { 295 if (nsblk_meta_size(nsblk)) {
299 int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk)); 296 int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk));
300 297
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 816d0dae6398..3fa7919f94a8 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -198,8 +198,7 @@ struct device *nd_btt_create(struct nd_region *nd_region)
198{ 198{
199 struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL); 199 struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL);
200 200
201 if (dev) 201 __nd_device_register(dev);
202 __nd_device_register(dev);
203 return dev; 202 return dev;
204} 203}
205 204
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 5e4e5c772ea5..458daf927336 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -31,6 +31,7 @@
31int nvdimm_major; 31int nvdimm_major;
32static int nvdimm_bus_major; 32static int nvdimm_bus_major;
33static struct class *nd_class; 33static struct class *nd_class;
34static DEFINE_IDA(nd_ida);
34 35
35static int to_nd_device_type(struct device *dev) 36static int to_nd_device_type(struct device *dev)
36{ 37{
@@ -60,20 +61,13 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
60 to_nd_device_type(dev)); 61 to_nd_device_type(dev));
61} 62}
62 63
63static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
64{
65 struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
66
67 return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
68}
69
70static struct module *to_bus_provider(struct device *dev) 64static struct module *to_bus_provider(struct device *dev)
71{ 65{
72 /* pin bus providers while regions are enabled */ 66 /* pin bus providers while regions are enabled */
73 if (is_nd_pmem(dev) || is_nd_blk(dev)) { 67 if (is_nd_pmem(dev) || is_nd_blk(dev)) {
74 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 68 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
75 69
76 return nvdimm_bus->module; 70 return nvdimm_bus->nd_desc->module;
77 } 71 }
78 return NULL; 72 return NULL;
79} 73}
@@ -136,6 +130,21 @@ static int nvdimm_bus_remove(struct device *dev)
136 return rc; 130 return rc;
137} 131}
138 132
133static void nvdimm_bus_shutdown(struct device *dev)
134{
135 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
136 struct nd_device_driver *nd_drv = NULL;
137
138 if (dev->driver)
139 nd_drv = to_nd_device_driver(dev->driver);
140
141 if (nd_drv && nd_drv->shutdown) {
142 nd_drv->shutdown(dev);
143 dev_dbg(&nvdimm_bus->dev, "%s.shutdown(%s)\n",
144 dev->driver->name, dev_name(dev));
145 }
146}
147
139void nd_device_notify(struct device *dev, enum nvdimm_event event) 148void nd_device_notify(struct device *dev, enum nvdimm_event event)
140{ 149{
141 device_lock(dev); 150 device_lock(dev);
@@ -208,14 +217,187 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
208} 217}
209EXPORT_SYMBOL_GPL(nvdimm_clear_poison); 218EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
210 219
220static int nvdimm_bus_match(struct device *dev, struct device_driver *drv);
221
211static struct bus_type nvdimm_bus_type = { 222static struct bus_type nvdimm_bus_type = {
212 .name = "nd", 223 .name = "nd",
213 .uevent = nvdimm_bus_uevent, 224 .uevent = nvdimm_bus_uevent,
214 .match = nvdimm_bus_match, 225 .match = nvdimm_bus_match,
215 .probe = nvdimm_bus_probe, 226 .probe = nvdimm_bus_probe,
216 .remove = nvdimm_bus_remove, 227 .remove = nvdimm_bus_remove,
228 .shutdown = nvdimm_bus_shutdown,
229};
230
231static void nvdimm_bus_release(struct device *dev)
232{
233 struct nvdimm_bus *nvdimm_bus;
234
235 nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
236 ida_simple_remove(&nd_ida, nvdimm_bus->id);
237 kfree(nvdimm_bus);
238}
239
240static bool is_nvdimm_bus(struct device *dev)
241{
242 return dev->release == nvdimm_bus_release;
243}
244
245struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
246{
247 struct device *dev;
248
249 for (dev = nd_dev; dev; dev = dev->parent)
250 if (is_nvdimm_bus(dev))
251 break;
252 dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
253 if (dev)
254 return to_nvdimm_bus(dev);
255 return NULL;
256}
257
258struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
259{
260 struct nvdimm_bus *nvdimm_bus;
261
262 nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
263 WARN_ON(!is_nvdimm_bus(dev));
264 return nvdimm_bus;
265}
266EXPORT_SYMBOL_GPL(to_nvdimm_bus);
267
268struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
269 struct nvdimm_bus_descriptor *nd_desc)
270{
271 struct nvdimm_bus *nvdimm_bus;
272 int rc;
273
274 nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
275 if (!nvdimm_bus)
276 return NULL;
277 INIT_LIST_HEAD(&nvdimm_bus->list);
278 INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
279 INIT_LIST_HEAD(&nvdimm_bus->poison_list);
280 init_waitqueue_head(&nvdimm_bus->probe_wait);
281 nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
282 mutex_init(&nvdimm_bus->reconfig_mutex);
283 if (nvdimm_bus->id < 0) {
284 kfree(nvdimm_bus);
285 return NULL;
286 }
287 nvdimm_bus->nd_desc = nd_desc;
288 nvdimm_bus->dev.parent = parent;
289 nvdimm_bus->dev.release = nvdimm_bus_release;
290 nvdimm_bus->dev.groups = nd_desc->attr_groups;
291 nvdimm_bus->dev.bus = &nvdimm_bus_type;
292 dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
293 rc = device_register(&nvdimm_bus->dev);
294 if (rc) {
295 dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
296 goto err;
297 }
298
299 return nvdimm_bus;
300 err:
301 put_device(&nvdimm_bus->dev);
302 return NULL;
303}
304EXPORT_SYMBOL_GPL(nvdimm_bus_register);
305
306void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
307{
308 if (!nvdimm_bus)
309 return;
310 device_unregister(&nvdimm_bus->dev);
311}
312EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
313
314static int child_unregister(struct device *dev, void *data)
315{
316 /*
317 * the singular ndctl class device per bus needs to be
318 * "device_destroy"ed, so skip it here
319 *
320 * i.e. remove classless children
321 */
322 if (dev->class)
323 /* pass */;
324 else
325 nd_device_unregister(dev, ND_SYNC);
326 return 0;
327}
328
329static void free_poison_list(struct list_head *poison_list)
330{
331 struct nd_poison *pl, *next;
332
333 list_for_each_entry_safe(pl, next, poison_list, list) {
334 list_del(&pl->list);
335 kfree(pl);
336 }
337 list_del_init(poison_list);
338}
339
340static int nd_bus_remove(struct device *dev)
341{
342 struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
343
344 mutex_lock(&nvdimm_bus_list_mutex);
345 list_del_init(&nvdimm_bus->list);
346 mutex_unlock(&nvdimm_bus_list_mutex);
347
348 nd_synchronize();
349 device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
350
351 nvdimm_bus_lock(&nvdimm_bus->dev);
352 free_poison_list(&nvdimm_bus->poison_list);
353 nvdimm_bus_unlock(&nvdimm_bus->dev);
354
355 nvdimm_bus_destroy_ndctl(nvdimm_bus);
356
357 return 0;
358}
359
360static int nd_bus_probe(struct device *dev)
361{
362 struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
363 int rc;
364
365 rc = nvdimm_bus_create_ndctl(nvdimm_bus);
366 if (rc)
367 return rc;
368
369 mutex_lock(&nvdimm_bus_list_mutex);
370 list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
371 mutex_unlock(&nvdimm_bus_list_mutex);
372
373 /* enable bus provider attributes to look up their local context */
374 dev_set_drvdata(dev, nvdimm_bus->nd_desc);
375
376 return 0;
377}
378
379static struct nd_device_driver nd_bus_driver = {
380 .probe = nd_bus_probe,
381 .remove = nd_bus_remove,
382 .drv = {
383 .name = "nd_bus",
384 .suppress_bind_attrs = true,
385 .bus = &nvdimm_bus_type,
386 .owner = THIS_MODULE,
387 .mod_name = KBUILD_MODNAME,
388 },
217}; 389};
218 390
391static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
392{
393 struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
394
395 if (is_nvdimm_bus(dev) && nd_drv == &nd_bus_driver)
396 return true;
397
398 return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
399}
400
219static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain); 401static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain);
220 402
221void nd_synchronize(void) 403void nd_synchronize(void)
@@ -395,12 +577,10 @@ int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus)
395 dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus, 577 dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus,
396 "ndctl%d", nvdimm_bus->id); 578 "ndctl%d", nvdimm_bus->id);
397 579
398 if (IS_ERR(dev)) { 580 if (IS_ERR(dev))
399 dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n", 581 dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n",
400 nvdimm_bus->id, PTR_ERR(dev)); 582 nvdimm_bus->id, PTR_ERR(dev));
401 return PTR_ERR(dev); 583 return PTR_ERR_OR_ZERO(dev);
402 }
403 return 0;
404} 584}
405 585
406void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus) 586void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus)
@@ -850,8 +1030,14 @@ int __init nvdimm_bus_init(void)
850 goto err_class; 1030 goto err_class;
851 } 1031 }
852 1032
1033 rc = driver_register(&nd_bus_driver.drv);
1034 if (rc)
1035 goto err_nd_bus;
1036
853 return 0; 1037 return 0;
854 1038
1039 err_nd_bus:
1040 class_destroy(nd_class);
855 err_class: 1041 err_class:
856 unregister_chrdev(nvdimm_major, "dimmctl"); 1042 unregister_chrdev(nvdimm_major, "dimmctl");
857 err_dimm_chrdev: 1043 err_dimm_chrdev:
@@ -864,8 +1050,10 @@ int __init nvdimm_bus_init(void)
864 1050
865void nvdimm_bus_exit(void) 1051void nvdimm_bus_exit(void)
866{ 1052{
1053 driver_unregister(&nd_bus_driver.drv);
867 class_destroy(nd_class); 1054 class_destroy(nd_class);
868 unregister_chrdev(nvdimm_bus_major, "ndctl"); 1055 unregister_chrdev(nvdimm_bus_major, "ndctl");
869 unregister_chrdev(nvdimm_major, "dimmctl"); 1056 unregister_chrdev(nvdimm_major, "dimmctl");
870 bus_unregister(&nvdimm_bus_type); 1057 bus_unregister(&nvdimm_bus_type);
1058 ida_destroy(&nd_ida);
871} 1059}
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 8b2e3c4fb0ad..d5dc80c48b4c 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -240,7 +240,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
240 return memcpy_from_pmem(buf, nsio->addr + offset, size); 240 return memcpy_from_pmem(buf, nsio->addr + offset, size);
241 } else { 241 } else {
242 memcpy_to_pmem(nsio->addr + offset, buf, size); 242 memcpy_to_pmem(nsio->addr + offset, buf, size);
243 wmb_pmem(); 243 nvdimm_flush(to_nd_region(ndns->dev.parent));
244 } 244 }
245 245
246 return 0; 246 return 0;
@@ -266,9 +266,8 @@ int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio)
266 266
267 nsio->addr = devm_memremap(dev, res->start, resource_size(res), 267 nsio->addr = devm_memremap(dev, res->start, resource_size(res),
268 ARCH_MEMREMAP_PMEM); 268 ARCH_MEMREMAP_PMEM);
269 if (IS_ERR(nsio->addr)) 269
270 return PTR_ERR(nsio->addr); 270 return PTR_ERR_OR_ZERO(nsio->addr);
271 return 0;
272} 271}
273EXPORT_SYMBOL_GPL(devm_nsio_enable); 272EXPORT_SYMBOL_GPL(devm_nsio_enable);
274 273
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index be89764315c2..715583f69d28 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -20,12 +20,12 @@
20#include <linux/ndctl.h> 20#include <linux/ndctl.h>
21#include <linux/mutex.h> 21#include <linux/mutex.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/io.h>
23#include "nd-core.h" 24#include "nd-core.h"
24#include "nd.h" 25#include "nd.h"
25 26
26LIST_HEAD(nvdimm_bus_list); 27LIST_HEAD(nvdimm_bus_list);
27DEFINE_MUTEX(nvdimm_bus_list_mutex); 28DEFINE_MUTEX(nvdimm_bus_list_mutex);
28static DEFINE_IDA(nd_ida);
29 29
30void nvdimm_bus_lock(struct device *dev) 30void nvdimm_bus_lock(struct device *dev)
31{ 31{
@@ -57,6 +57,127 @@ bool is_nvdimm_bus_locked(struct device *dev)
57} 57}
58EXPORT_SYMBOL(is_nvdimm_bus_locked); 58EXPORT_SYMBOL(is_nvdimm_bus_locked);
59 59
60struct nvdimm_map {
61 struct nvdimm_bus *nvdimm_bus;
62 struct list_head list;
63 resource_size_t offset;
64 unsigned long flags;
65 size_t size;
66 union {
67 void *mem;
68 void __iomem *iomem;
69 };
70 struct kref kref;
71};
72
73static struct nvdimm_map *find_nvdimm_map(struct device *dev,
74 resource_size_t offset)
75{
76 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
77 struct nvdimm_map *nvdimm_map;
78
79 list_for_each_entry(nvdimm_map, &nvdimm_bus->mapping_list, list)
80 if (nvdimm_map->offset == offset)
81 return nvdimm_map;
82 return NULL;
83}
84
85static struct nvdimm_map *alloc_nvdimm_map(struct device *dev,
86 resource_size_t offset, size_t size, unsigned long flags)
87{
88 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
89 struct nvdimm_map *nvdimm_map;
90
91 nvdimm_map = kzalloc(sizeof(*nvdimm_map), GFP_KERNEL);
92 if (!nvdimm_map)
93 return NULL;
94
95 INIT_LIST_HEAD(&nvdimm_map->list);
96 nvdimm_map->nvdimm_bus = nvdimm_bus;
97 nvdimm_map->offset = offset;
98 nvdimm_map->flags = flags;
99 nvdimm_map->size = size;
100 kref_init(&nvdimm_map->kref);
101
102 if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev)))
103 goto err_request_region;
104
105 if (flags)
106 nvdimm_map->mem = memremap(offset, size, flags);
107 else
108 nvdimm_map->iomem = ioremap(offset, size);
109
110 if (!nvdimm_map->mem)
111 goto err_map;
112
113 dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev), "%s: bus unlocked!",
114 __func__);
115 list_add(&nvdimm_map->list, &nvdimm_bus->mapping_list);
116
117 return nvdimm_map;
118
119 err_map:
120 release_mem_region(offset, size);
121 err_request_region:
122 kfree(nvdimm_map);
123 return NULL;
124}
125
126static void nvdimm_map_release(struct kref *kref)
127{
128 struct nvdimm_bus *nvdimm_bus;
129 struct nvdimm_map *nvdimm_map;
130
131 nvdimm_map = container_of(kref, struct nvdimm_map, kref);
132 nvdimm_bus = nvdimm_map->nvdimm_bus;
133
134 dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset);
135 list_del(&nvdimm_map->list);
136 if (nvdimm_map->flags)
137 memunmap(nvdimm_map->mem);
138 else
139 iounmap(nvdimm_map->iomem);
140 release_mem_region(nvdimm_map->offset, nvdimm_map->size);
141 kfree(nvdimm_map);
142}
143
144static void nvdimm_map_put(void *data)
145{
146 struct nvdimm_map *nvdimm_map = data;
147 struct nvdimm_bus *nvdimm_bus = nvdimm_map->nvdimm_bus;
148
149 nvdimm_bus_lock(&nvdimm_bus->dev);
150 kref_put(&nvdimm_map->kref, nvdimm_map_release);
151 nvdimm_bus_unlock(&nvdimm_bus->dev);
152}
153
154/**
155 * devm_nvdimm_memremap - map a resource that is shared across regions
156 * @dev: device that will own a reference to the shared mapping
157 * @offset: physical base address of the mapping
158 * @size: mapping size
159 * @flags: memremap flags, or, if zero, perform an ioremap instead
160 */
161void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset,
162 size_t size, unsigned long flags)
163{
164 struct nvdimm_map *nvdimm_map;
165
166 nvdimm_bus_lock(dev);
167 nvdimm_map = find_nvdimm_map(dev, offset);
168 if (!nvdimm_map)
169 nvdimm_map = alloc_nvdimm_map(dev, offset, size, flags);
170 else
171 kref_get(&nvdimm_map->kref);
172 nvdimm_bus_unlock(dev);
173
174 if (devm_add_action_or_reset(dev, nvdimm_map_put, nvdimm_map))
175 return NULL;
176
177 return nvdimm_map->mem;
178}
179EXPORT_SYMBOL_GPL(devm_nvdimm_memremap);
180
60u64 nd_fletcher64(void *addr, size_t len, bool le) 181u64 nd_fletcher64(void *addr, size_t len, bool le)
61{ 182{
62 u32 *buf = addr; 183 u32 *buf = addr;
@@ -73,25 +194,6 @@ u64 nd_fletcher64(void *addr, size_t len, bool le)
73} 194}
74EXPORT_SYMBOL_GPL(nd_fletcher64); 195EXPORT_SYMBOL_GPL(nd_fletcher64);
75 196
76static void nvdimm_bus_release(struct device *dev)
77{
78 struct nvdimm_bus *nvdimm_bus;
79
80 nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
81 ida_simple_remove(&nd_ida, nvdimm_bus->id);
82 kfree(nvdimm_bus);
83}
84
85struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
86{
87 struct nvdimm_bus *nvdimm_bus;
88
89 nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
90 WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release);
91 return nvdimm_bus;
92}
93EXPORT_SYMBOL_GPL(to_nvdimm_bus);
94
95struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus) 197struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
96{ 198{
97 /* struct nvdimm_bus definition is private to libnvdimm */ 199 /* struct nvdimm_bus definition is private to libnvdimm */
@@ -99,18 +201,12 @@ struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
99} 201}
100EXPORT_SYMBOL_GPL(to_nd_desc); 202EXPORT_SYMBOL_GPL(to_nd_desc);
101 203
102struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev) 204struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus)
103{ 205{
104 struct device *dev; 206 /* struct nvdimm_bus definition is private to libnvdimm */
105 207 return &nvdimm_bus->dev;
106 for (dev = nd_dev; dev; dev = dev->parent)
107 if (dev->release == nvdimm_bus_release)
108 break;
109 dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
110 if (dev)
111 return to_nvdimm_bus(dev);
112 return NULL;
113} 208}
209EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev);
114 210
115static bool is_uuid_sep(char sep) 211static bool is_uuid_sep(char sep)
116{ 212{
@@ -325,51 +421,6 @@ struct attribute_group nvdimm_bus_attribute_group = {
325}; 421};
326EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group); 422EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
327 423
328struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
329 struct nvdimm_bus_descriptor *nd_desc, struct module *module)
330{
331 struct nvdimm_bus *nvdimm_bus;
332 int rc;
333
334 nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
335 if (!nvdimm_bus)
336 return NULL;
337 INIT_LIST_HEAD(&nvdimm_bus->list);
338 INIT_LIST_HEAD(&nvdimm_bus->poison_list);
339 init_waitqueue_head(&nvdimm_bus->probe_wait);
340 nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
341 mutex_init(&nvdimm_bus->reconfig_mutex);
342 if (nvdimm_bus->id < 0) {
343 kfree(nvdimm_bus);
344 return NULL;
345 }
346 nvdimm_bus->nd_desc = nd_desc;
347 nvdimm_bus->module = module;
348 nvdimm_bus->dev.parent = parent;
349 nvdimm_bus->dev.release = nvdimm_bus_release;
350 nvdimm_bus->dev.groups = nd_desc->attr_groups;
351 dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
352 rc = device_register(&nvdimm_bus->dev);
353 if (rc) {
354 dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
355 goto err;
356 }
357
358 rc = nvdimm_bus_create_ndctl(nvdimm_bus);
359 if (rc)
360 goto err;
361
362 mutex_lock(&nvdimm_bus_list_mutex);
363 list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
364 mutex_unlock(&nvdimm_bus_list_mutex);
365
366 return nvdimm_bus;
367 err:
368 put_device(&nvdimm_bus->dev);
369 return NULL;
370}
371EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
372
373static void set_badblock(struct badblocks *bb, sector_t s, int num) 424static void set_badblock(struct badblocks *bb, sector_t s, int num)
374{ 425{
375 dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n", 426 dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n",
@@ -545,54 +596,6 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
545} 596}
546EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison); 597EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
547 598
548static void free_poison_list(struct list_head *poison_list)
549{
550 struct nd_poison *pl, *next;
551
552 list_for_each_entry_safe(pl, next, poison_list, list) {
553 list_del(&pl->list);
554 kfree(pl);
555 }
556 list_del_init(poison_list);
557}
558
559static int child_unregister(struct device *dev, void *data)
560{
561 /*
562 * the singular ndctl class device per bus needs to be
563 * "device_destroy"ed, so skip it here
564 *
565 * i.e. remove classless children
566 */
567 if (dev->class)
568 /* pass */;
569 else
570 nd_device_unregister(dev, ND_SYNC);
571 return 0;
572}
573
574void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
575{
576 if (!nvdimm_bus)
577 return;
578
579 mutex_lock(&nvdimm_bus_list_mutex);
580 list_del_init(&nvdimm_bus->list);
581 mutex_unlock(&nvdimm_bus_list_mutex);
582
583 nd_synchronize();
584 device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
585
586 nvdimm_bus_lock(&nvdimm_bus->dev);
587 free_poison_list(&nvdimm_bus->poison_list);
588 nvdimm_bus_unlock(&nvdimm_bus->dev);
589
590 nvdimm_bus_destroy_ndctl(nvdimm_bus);
591
592 device_unregister(&nvdimm_bus->dev);
593}
594EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
595
596#ifdef CONFIG_BLK_DEV_INTEGRITY 599#ifdef CONFIG_BLK_DEV_INTEGRITY
597int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) 600int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
598{ 601{
@@ -601,7 +604,8 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
601 if (meta_size == 0) 604 if (meta_size == 0)
602 return 0; 605 return 0;
603 606
604 bi.profile = NULL; 607 memset(&bi, 0, sizeof(bi));
608
605 bi.tuple_size = meta_size; 609 bi.tuple_size = meta_size;
606 bi.tag_size = meta_size; 610 bi.tag_size = meta_size;
607 611
@@ -650,7 +654,6 @@ static __exit void libnvdimm_exit(void)
650 nvdimm_bus_exit(); 654 nvdimm_bus_exit();
651 nd_region_devs_exit(); 655 nd_region_devs_exit();
652 nvdimm_devs_exit(); 656 nvdimm_devs_exit();
653 ida_destroy(&nd_ida);
654} 657}
655 658
656MODULE_LICENSE("GPL v2"); 659MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index bbde28d3dec5..d9bba5edd8dc 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -346,7 +346,8 @@ EXPORT_SYMBOL_GPL(nvdimm_attribute_group);
346 346
347struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, 347struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
348 const struct attribute_group **groups, unsigned long flags, 348 const struct attribute_group **groups, unsigned long flags,
349 unsigned long cmd_mask) 349 unsigned long cmd_mask, int num_flush,
350 struct resource *flush_wpq)
350{ 351{
351 struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL); 352 struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
352 struct device *dev; 353 struct device *dev;
@@ -362,6 +363,8 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
362 nvdimm->provider_data = provider_data; 363 nvdimm->provider_data = provider_data;
363 nvdimm->flags = flags; 364 nvdimm->flags = flags;
364 nvdimm->cmd_mask = cmd_mask; 365 nvdimm->cmd_mask = cmd_mask;
366 nvdimm->num_flush = num_flush;
367 nvdimm->flush_wpq = flush_wpq;
365 atomic_set(&nvdimm->busy, 0); 368 atomic_set(&nvdimm->busy, 0);
366 dev = &nvdimm->dev; 369 dev = &nvdimm->dev;
367 dev_set_name(dev, "nmem%d", nvdimm->id); 370 dev_set_name(dev, "nmem%d", nvdimm->id);
diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c
index 95825b38559a..11ea90120542 100644
--- a/drivers/nvdimm/e820.c
+++ b/drivers/nvdimm/e820.c
@@ -47,6 +47,7 @@ static int e820_pmem_probe(struct platform_device *pdev)
47 47
48 nd_desc.attr_groups = e820_pmem_attribute_groups; 48 nd_desc.attr_groups = e820_pmem_attribute_groups;
49 nd_desc.provider_name = "e820"; 49 nd_desc.provider_name = "e820";
50 nd_desc.module = THIS_MODULE;
50 nvdimm_bus = nvdimm_bus_register(dev, &nd_desc); 51 nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
51 if (!nvdimm_bus) 52 if (!nvdimm_bus)
52 goto err; 53 goto err;
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 284cdaa268cf..38ce6bbbc170 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -26,11 +26,11 @@ extern int nvdimm_major;
26struct nvdimm_bus { 26struct nvdimm_bus {
27 struct nvdimm_bus_descriptor *nd_desc; 27 struct nvdimm_bus_descriptor *nd_desc;
28 wait_queue_head_t probe_wait; 28 wait_queue_head_t probe_wait;
29 struct module *module;
30 struct list_head list; 29 struct list_head list;
31 struct device dev; 30 struct device dev;
32 int id, probe_active; 31 int id, probe_active;
33 struct list_head poison_list; 32 struct list_head poison_list;
33 struct list_head mapping_list;
34 struct mutex reconfig_mutex; 34 struct mutex reconfig_mutex;
35}; 35};
36 36
@@ -40,7 +40,8 @@ struct nvdimm {
40 unsigned long cmd_mask; 40 unsigned long cmd_mask;
41 struct device dev; 41 struct device dev;
42 atomic_t busy; 42 atomic_t busy;
43 int id; 43 int id, num_flush;
44 struct resource *flush_wpq;
44}; 45};
45 46
46bool is_nvdimm(struct device *dev); 47bool is_nvdimm(struct device *dev);
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index d0ac93c31dda..40476399d227 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -49,9 +49,11 @@ struct nvdimm_drvdata {
49 struct kref kref; 49 struct kref kref;
50}; 50};
51 51
52struct nd_region_namespaces { 52struct nd_region_data {
53 int count; 53 int ns_count;
54 int active; 54 int ns_active;
55 unsigned int flush_mask;
56 void __iomem *flush_wpq[0][0];
55}; 57};
56 58
57static inline struct nd_namespace_index *to_namespace_index( 59static inline struct nd_namespace_index *to_namespace_index(
@@ -119,7 +121,6 @@ struct nd_region {
119 121
120struct nd_blk_region { 122struct nd_blk_region {
121 int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); 123 int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
122 void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
123 int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, 124 int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
124 void *iobuf, u64 len, int rw); 125 void *iobuf, u64 len, int rw);
125 void *blk_provider_data; 126 void *blk_provider_data;
@@ -325,6 +326,7 @@ static inline void devm_nsio_disable(struct device *dev,
325} 326}
326#endif 327#endif
327int nd_blk_region_init(struct nd_region *nd_region); 328int nd_blk_region_init(struct nd_region *nd_region);
329int nd_region_activate(struct nd_region *nd_region);
328void __nd_iostat_start(struct bio *bio, unsigned long *start); 330void __nd_iostat_start(struct bio *bio, unsigned long *start);
329static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) 331static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
330{ 332{
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 36cb39047d5b..b511099457db 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -29,27 +29,28 @@
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/pmem.h> 30#include <linux/pmem.h>
31#include <linux/nd.h> 31#include <linux/nd.h>
32#include "pmem.h"
32#include "pfn.h" 33#include "pfn.h"
33#include "nd.h" 34#include "nd.h"
34 35
35struct pmem_device { 36static struct device *to_dev(struct pmem_device *pmem)
36 /* One contiguous memory region per device */ 37{
37 phys_addr_t phys_addr; 38 /*
38 /* when non-zero this device is hosting a 'pfn' instance */ 39 * nvdimm bus services need a 'dev' parameter, and we record the device
39 phys_addr_t data_offset; 40 * at init in bb.dev.
40 u64 pfn_flags; 41 */
41 void __pmem *virt_addr; 42 return pmem->bb.dev;
42 /* immutable base size of the namespace */ 43}
43 size_t size; 44
44 /* trim size when namespace capacity has been section aligned */ 45static struct nd_region *to_region(struct pmem_device *pmem)
45 u32 pfn_pad; 46{
46 struct badblocks bb; 47 return to_nd_region(to_dev(pmem)->parent);
47}; 48}
48 49
49static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, 50static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
50 unsigned int len) 51 unsigned int len)
51{ 52{
52 struct device *dev = pmem->bb.dev; 53 struct device *dev = to_dev(pmem);
53 sector_t sector; 54 sector_t sector;
54 long cleared; 55 long cleared;
55 56
@@ -57,7 +58,7 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
57 cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); 58 cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
58 59
59 if (cleared > 0 && cleared / 512) { 60 if (cleared > 0 && cleared / 512) {
60 dev_dbg(dev, "%s: %llx clear %ld sector%s\n", 61 dev_dbg(dev, "%s: %#llx clear %ld sector%s\n",
61 __func__, (unsigned long long) sector, 62 __func__, (unsigned long long) sector,
62 cleared / 512, cleared / 512 > 1 ? "s" : ""); 63 cleared / 512, cleared / 512 > 1 ? "s" : "");
63 badblocks_clear(&pmem->bb, sector, cleared / 512); 64 badblocks_clear(&pmem->bb, sector, cleared / 512);
@@ -73,7 +74,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
73 bool bad_pmem = false; 74 bool bad_pmem = false;
74 void *mem = kmap_atomic(page); 75 void *mem = kmap_atomic(page);
75 phys_addr_t pmem_off = sector * 512 + pmem->data_offset; 76 phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
76 void __pmem *pmem_addr = pmem->virt_addr + pmem_off; 77 void *pmem_addr = pmem->virt_addr + pmem_off;
77 78
78 if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) 79 if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
79 bad_pmem = true; 80 bad_pmem = true;
@@ -112,6 +113,11 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
112 return rc; 113 return rc;
113} 114}
114 115
116/* account for REQ_FLUSH rename, replace with REQ_PREFLUSH after v4.8-rc1 */
117#ifndef REQ_FLUSH
118#define REQ_FLUSH REQ_PREFLUSH
119#endif
120
115static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) 121static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
116{ 122{
117 int rc = 0; 123 int rc = 0;
@@ -120,6 +126,10 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
120 struct bio_vec bvec; 126 struct bio_vec bvec;
121 struct bvec_iter iter; 127 struct bvec_iter iter;
122 struct pmem_device *pmem = q->queuedata; 128 struct pmem_device *pmem = q->queuedata;
129 struct nd_region *nd_region = to_region(pmem);
130
131 if (bio->bi_rw & REQ_FLUSH)
132 nvdimm_flush(nd_region);
123 133
124 do_acct = nd_iostat_start(bio, &start); 134 do_acct = nd_iostat_start(bio, &start);
125 bio_for_each_segment(bvec, bio, iter) { 135 bio_for_each_segment(bvec, bio, iter) {
@@ -134,8 +144,8 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
134 if (do_acct) 144 if (do_acct)
135 nd_iostat_end(bio, start); 145 nd_iostat_end(bio, start);
136 146
137 if (bio_data_dir(bio)) 147 if (bio->bi_rw & REQ_FUA)
138 wmb_pmem(); 148 nvdimm_flush(nd_region);
139 149
140 bio_endio(bio); 150 bio_endio(bio);
141 return BLK_QC_T_NONE; 151 return BLK_QC_T_NONE;
@@ -148,8 +158,6 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
148 int rc; 158 int rc;
149 159
150 rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector); 160 rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector);
151 if (rw & WRITE)
152 wmb_pmem();
153 161
154 /* 162 /*
155 * The ->rw_page interface is subtle and tricky. The core 163 * The ->rw_page interface is subtle and tricky. The core
@@ -163,8 +171,9 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
163 return rc; 171 return rc;
164} 172}
165 173
166static long pmem_direct_access(struct block_device *bdev, sector_t sector, 174/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
167 void __pmem **kaddr, pfn_t *pfn, long size) 175__weak long pmem_direct_access(struct block_device *bdev, sector_t sector,
176 void **kaddr, pfn_t *pfn, long size)
168{ 177{
169 struct pmem_device *pmem = bdev->bd_queue->queuedata; 178 struct pmem_device *pmem = bdev->bd_queue->queuedata;
170 resource_size_t offset = sector * 512 + pmem->data_offset; 179 resource_size_t offset = sector * 512 + pmem->data_offset;
@@ -195,7 +204,7 @@ static void pmem_release_queue(void *q)
195 blk_cleanup_queue(q); 204 blk_cleanup_queue(q);
196} 205}
197 206
198void pmem_release_disk(void *disk) 207static void pmem_release_disk(void *disk)
199{ 208{
200 del_gendisk(disk); 209 del_gendisk(disk);
201 put_disk(disk); 210 put_disk(disk);
@@ -205,6 +214,7 @@ static int pmem_attach_disk(struct device *dev,
205 struct nd_namespace_common *ndns) 214 struct nd_namespace_common *ndns)
206{ 215{
207 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 216 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
217 struct nd_region *nd_region = to_nd_region(dev->parent);
208 struct vmem_altmap __altmap, *altmap = NULL; 218 struct vmem_altmap __altmap, *altmap = NULL;
209 struct resource *res = &nsio->res; 219 struct resource *res = &nsio->res;
210 struct nd_pfn *nd_pfn = NULL; 220 struct nd_pfn *nd_pfn = NULL;
@@ -234,7 +244,7 @@ static int pmem_attach_disk(struct device *dev,
234 dev_set_drvdata(dev, pmem); 244 dev_set_drvdata(dev, pmem);
235 pmem->phys_addr = res->start; 245 pmem->phys_addr = res->start;
236 pmem->size = resource_size(res); 246 pmem->size = resource_size(res);
237 if (!arch_has_wmb_pmem()) 247 if (nvdimm_has_flush(nd_region) < 0)
238 dev_warn(dev, "unable to guarantee persistence of writes\n"); 248 dev_warn(dev, "unable to guarantee persistence of writes\n");
239 249
240 if (!devm_request_mem_region(dev, res->start, resource_size(res), 250 if (!devm_request_mem_region(dev, res->start, resource_size(res),
@@ -269,15 +279,14 @@ static int pmem_attach_disk(struct device *dev,
269 * At release time the queue must be dead before 279 * At release time the queue must be dead before
270 * devm_memremap_pages is unwound 280 * devm_memremap_pages is unwound
271 */ 281 */
272 if (devm_add_action(dev, pmem_release_queue, q)) { 282 if (devm_add_action_or_reset(dev, pmem_release_queue, q))
273 blk_cleanup_queue(q);
274 return -ENOMEM; 283 return -ENOMEM;
275 }
276 284
277 if (IS_ERR(addr)) 285 if (IS_ERR(addr))
278 return PTR_ERR(addr); 286 return PTR_ERR(addr);
279 pmem->virt_addr = (void __pmem *) addr; 287 pmem->virt_addr = addr;
280 288
289 blk_queue_write_cache(q, true, true);
281 blk_queue_make_request(q, pmem_make_request); 290 blk_queue_make_request(q, pmem_make_request);
282 blk_queue_physical_block_size(q, PAGE_SIZE); 291 blk_queue_physical_block_size(q, PAGE_SIZE);
283 blk_queue_max_hw_sectors(q, UINT_MAX); 292 blk_queue_max_hw_sectors(q, UINT_MAX);
@@ -289,10 +298,6 @@ static int pmem_attach_disk(struct device *dev,
289 disk = alloc_disk_node(0, nid); 298 disk = alloc_disk_node(0, nid);
290 if (!disk) 299 if (!disk)
291 return -ENOMEM; 300 return -ENOMEM;
292 if (devm_add_action(dev, pmem_release_disk, disk)) {
293 put_disk(disk);
294 return -ENOMEM;
295 }
296 301
297 disk->fops = &pmem_fops; 302 disk->fops = &pmem_fops;
298 disk->queue = q; 303 disk->queue = q;
@@ -302,9 +307,13 @@ static int pmem_attach_disk(struct device *dev,
302 / 512); 307 / 512);
303 if (devm_init_badblocks(dev, &pmem->bb)) 308 if (devm_init_badblocks(dev, &pmem->bb))
304 return -ENOMEM; 309 return -ENOMEM;
305 nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res); 310 nvdimm_badblocks_populate(nd_region, &pmem->bb, res);
306 disk->bb = &pmem->bb; 311 disk->bb = &pmem->bb;
307 device_add_disk(dev, disk); 312 device_add_disk(dev, disk);
313
314 if (devm_add_action_or_reset(dev, pmem_release_disk, disk))
315 return -ENOMEM;
316
308 revalidate_disk(disk); 317 revalidate_disk(disk);
309 318
310 return 0; 319 return 0;
@@ -340,13 +349,20 @@ static int nd_pmem_remove(struct device *dev)
340{ 349{
341 if (is_nd_btt(dev)) 350 if (is_nd_btt(dev))
342 nvdimm_namespace_detach_btt(to_nd_btt(dev)); 351 nvdimm_namespace_detach_btt(to_nd_btt(dev));
352 nvdimm_flush(to_nd_region(dev->parent));
353
343 return 0; 354 return 0;
344} 355}
345 356
357static void nd_pmem_shutdown(struct device *dev)
358{
359 nvdimm_flush(to_nd_region(dev->parent));
360}
361
346static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) 362static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
347{ 363{
348 struct nd_region *nd_region = to_nd_region(dev->parent);
349 struct pmem_device *pmem = dev_get_drvdata(dev); 364 struct pmem_device *pmem = dev_get_drvdata(dev);
365 struct nd_region *nd_region = to_region(pmem);
350 resource_size_t offset = 0, end_trunc = 0; 366 resource_size_t offset = 0, end_trunc = 0;
351 struct nd_namespace_common *ndns; 367 struct nd_namespace_common *ndns;
352 struct nd_namespace_io *nsio; 368 struct nd_namespace_io *nsio;
@@ -382,6 +398,7 @@ static struct nd_device_driver nd_pmem_driver = {
382 .probe = nd_pmem_probe, 398 .probe = nd_pmem_probe,
383 .remove = nd_pmem_remove, 399 .remove = nd_pmem_remove,
384 .notify = nd_pmem_notify, 400 .notify = nd_pmem_notify,
401 .shutdown = nd_pmem_shutdown,
385 .drv = { 402 .drv = {
386 .name = "nd_pmem", 403 .name = "nd_pmem",
387 }, 404 },
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
new file mode 100644
index 000000000000..b4ee4f71b4a1
--- /dev/null
+++ b/drivers/nvdimm/pmem.h
@@ -0,0 +1,24 @@
1#ifndef __NVDIMM_PMEM_H__
2#define __NVDIMM_PMEM_H__
3#include <linux/badblocks.h>
4#include <linux/types.h>
5#include <linux/pfn_t.h>
6#include <linux/fs.h>
7
8long pmem_direct_access(struct block_device *bdev, sector_t sector,
9 void **kaddr, pfn_t *pfn, long size);
10/* this definition is in it's own header for tools/testing/nvdimm to consume */
11struct pmem_device {
12 /* One contiguous memory region per device */
13 phys_addr_t phys_addr;
14 /* when non-zero this device is hosting a 'pfn' instance */
15 phys_addr_t data_offset;
16 u64 pfn_flags;
17 void *virt_addr;
18 /* immutable base size of the namespace */
19 size_t size;
20 /* trim size when namespace capacity has been section aligned */
21 u32 pfn_pad;
22 struct badblocks bb;
23};
24#endif /* __NVDIMM_PMEM_H__ */
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 05a912359939..8f241772ec0b 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -20,7 +20,7 @@ static int nd_region_probe(struct device *dev)
20{ 20{
21 int err, rc; 21 int err, rc;
22 static unsigned long once; 22 static unsigned long once;
23 struct nd_region_namespaces *num_ns; 23 struct nd_region_data *ndrd;
24 struct nd_region *nd_region = to_nd_region(dev); 24 struct nd_region *nd_region = to_nd_region(dev);
25 25
26 if (nd_region->num_lanes > num_online_cpus() 26 if (nd_region->num_lanes > num_online_cpus()
@@ -33,21 +33,21 @@ static int nd_region_probe(struct device *dev)
33 nd_region->num_lanes); 33 nd_region->num_lanes);
34 } 34 }
35 35
36 rc = nd_region_activate(nd_region);
37 if (rc)
38 return rc;
39
36 rc = nd_blk_region_init(nd_region); 40 rc = nd_blk_region_init(nd_region);
37 if (rc) 41 if (rc)
38 return rc; 42 return rc;
39 43
40 rc = nd_region_register_namespaces(nd_region, &err); 44 rc = nd_region_register_namespaces(nd_region, &err);
41 num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL);
42 if (!num_ns)
43 return -ENOMEM;
44
45 if (rc < 0) 45 if (rc < 0)
46 return rc; 46 return rc;
47 47
48 num_ns->active = rc; 48 ndrd = dev_get_drvdata(dev);
49 num_ns->count = rc + err; 49 ndrd->ns_active = rc;
50 dev_set_drvdata(dev, num_ns); 50 ndrd->ns_count = rc + err;
51 51
52 if (rc && err && rc == err) 52 if (rc && err && rc == err)
53 return -ENODEV; 53 return -ENODEV;
@@ -82,6 +82,8 @@ static int nd_region_remove(struct device *dev)
82{ 82{
83 struct nd_region *nd_region = to_nd_region(dev); 83 struct nd_region *nd_region = to_nd_region(dev);
84 84
85 device_for_each_child(dev, NULL, child_unregister);
86
85 /* flush attribute readers and disable */ 87 /* flush attribute readers and disable */
86 nvdimm_bus_lock(dev); 88 nvdimm_bus_lock(dev);
87 nd_region->ns_seed = NULL; 89 nd_region->ns_seed = NULL;
@@ -91,7 +93,6 @@ static int nd_region_remove(struct device *dev)
91 dev_set_drvdata(dev, NULL); 93 dev_set_drvdata(dev, NULL);
92 nvdimm_bus_unlock(dev); 94 nvdimm_bus_unlock(dev);
93 95
94 device_for_each_child(dev, NULL, child_unregister);
95 return 0; 96 return 0;
96} 97}
97 98
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 40fcfea26fbb..e8d5ba7b29af 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -14,13 +14,97 @@
14#include <linux/highmem.h> 14#include <linux/highmem.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/hash.h>
18#include <linux/pmem.h>
17#include <linux/sort.h> 19#include <linux/sort.h>
18#include <linux/io.h> 20#include <linux/io.h>
19#include <linux/nd.h> 21#include <linux/nd.h>
20#include "nd-core.h" 22#include "nd-core.h"
21#include "nd.h" 23#include "nd.h"
22 24
25/*
26 * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
27 * irrelevant.
28 */
29#include <linux/io-64-nonatomic-hi-lo.h>
30
23static DEFINE_IDA(region_ida); 31static DEFINE_IDA(region_ida);
32static DEFINE_PER_CPU(int, flush_idx);
33
34static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
35 struct nd_region_data *ndrd)
36{
37 int i, j;
38
39 dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm),
40 nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es");
41 for (i = 0; i < nvdimm->num_flush; i++) {
42 struct resource *res = &nvdimm->flush_wpq[i];
43 unsigned long pfn = PHYS_PFN(res->start);
44 void __iomem *flush_page;
45
46 /* check if flush hints share a page */
47 for (j = 0; j < i; j++) {
48 struct resource *res_j = &nvdimm->flush_wpq[j];
49 unsigned long pfn_j = PHYS_PFN(res_j->start);
50
51 if (pfn == pfn_j)
52 break;
53 }
54
55 if (j < i)
56 flush_page = (void __iomem *) ((unsigned long)
57 ndrd->flush_wpq[dimm][j] & PAGE_MASK);
58 else
59 flush_page = devm_nvdimm_ioremap(dev,
60 PHYS_PFN(pfn), PAGE_SIZE);
61 if (!flush_page)
62 return -ENXIO;
63 ndrd->flush_wpq[dimm][i] = flush_page
64 + (res->start & ~PAGE_MASK);
65 }
66
67 return 0;
68}
69
70int nd_region_activate(struct nd_region *nd_region)
71{
72 int i, num_flush = 0;
73 struct nd_region_data *ndrd;
74 struct device *dev = &nd_region->dev;
75 size_t flush_data_size = sizeof(void *);
76
77 nvdimm_bus_lock(&nd_region->dev);
78 for (i = 0; i < nd_region->ndr_mappings; i++) {
79 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
80 struct nvdimm *nvdimm = nd_mapping->nvdimm;
81
82 /* at least one null hint slot per-dimm for the "no-hint" case */
83 flush_data_size += sizeof(void *);
84 num_flush = min_not_zero(num_flush, nvdimm->num_flush);
85 if (!nvdimm->num_flush)
86 continue;
87 flush_data_size += nvdimm->num_flush * sizeof(void *);
88 }
89 nvdimm_bus_unlock(&nd_region->dev);
90
91 ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL);
92 if (!ndrd)
93 return -ENOMEM;
94 dev_set_drvdata(dev, ndrd);
95
96 ndrd->flush_mask = (1 << ilog2(num_flush)) - 1;
97 for (i = 0; i < nd_region->ndr_mappings; i++) {
98 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
99 struct nvdimm *nvdimm = nd_mapping->nvdimm;
100 int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd);
101
102 if (rc)
103 return rc;
104 }
105
106 return 0;
107}
24 108
25static void nd_region_release(struct device *dev) 109static void nd_region_release(struct device *dev)
26{ 110{
@@ -242,12 +326,12 @@ static DEVICE_ATTR_RO(available_size);
242static ssize_t init_namespaces_show(struct device *dev, 326static ssize_t init_namespaces_show(struct device *dev,
243 struct device_attribute *attr, char *buf) 327 struct device_attribute *attr, char *buf)
244{ 328{
245 struct nd_region_namespaces *num_ns = dev_get_drvdata(dev); 329 struct nd_region_data *ndrd = dev_get_drvdata(dev);
246 ssize_t rc; 330 ssize_t rc;
247 331
248 nvdimm_bus_lock(dev); 332 nvdimm_bus_lock(dev);
249 if (num_ns) 333 if (ndrd)
250 rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count); 334 rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count);
251 else 335 else
252 rc = -ENXIO; 336 rc = -ENXIO;
253 nvdimm_bus_unlock(dev); 337 nvdimm_bus_unlock(dev);
@@ -433,8 +517,6 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
433 517
434 if (is_nd_pmem(dev)) 518 if (is_nd_pmem(dev))
435 return; 519 return;
436
437 to_nd_blk_region(dev)->disable(nvdimm_bus, dev);
438 } 520 }
439 if (dev->parent && is_nd_blk(dev->parent) && probe) { 521 if (dev->parent && is_nd_blk(dev->parent) && probe) {
440 nd_region = to_nd_region(dev->parent); 522 nd_region = to_nd_region(dev->parent);
@@ -698,7 +780,6 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
698 if (ndbr) { 780 if (ndbr) {
699 nd_region = &ndbr->nd_region; 781 nd_region = &ndbr->nd_region;
700 ndbr->enable = ndbr_desc->enable; 782 ndbr->enable = ndbr_desc->enable;
701 ndbr->disable = ndbr_desc->disable;
702 ndbr->do_io = ndbr_desc->do_io; 783 ndbr->do_io = ndbr_desc->do_io;
703 } 784 }
704 region_buf = ndbr; 785 region_buf = ndbr;
@@ -794,6 +875,67 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
794} 875}
795EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); 876EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
796 877
878/**
879 * nvdimm_flush - flush any posted write queues between the cpu and pmem media
880 * @nd_region: blk or interleaved pmem region
881 */
882void nvdimm_flush(struct nd_region *nd_region)
883{
884 struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
885 int i, idx;
886
887 /*
888 * Try to encourage some diversity in flush hint addresses
889 * across cpus assuming a limited number of flush hints.
890 */
891 idx = this_cpu_read(flush_idx);
892 idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
893
894 /*
895 * The first wmb() is needed to 'sfence' all previous writes
896 * such that they are architecturally visible for the platform
897 * buffer flush. Note that we've already arranged for pmem
898 * writes to avoid the cache via arch_memcpy_to_pmem(). The
899 * final wmb() ensures ordering for the NVDIMM flush write.
900 */
901 wmb();
902 for (i = 0; i < nd_region->ndr_mappings; i++)
903 if (ndrd->flush_wpq[i][0])
904 writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]);
905 wmb();
906}
907EXPORT_SYMBOL_GPL(nvdimm_flush);
908
909/**
910 * nvdimm_has_flush - determine write flushing requirements
911 * @nd_region: blk or interleaved pmem region
912 *
913 * Returns 1 if writes require flushing
914 * Returns 0 if writes do not require flushing
915 * Returns -ENXIO if flushing capability can not be determined
916 */
917int nvdimm_has_flush(struct nd_region *nd_region)
918{
919 struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
920 int i;
921
922 /* no nvdimm == flushing capability unknown */
923 if (nd_region->ndr_mappings == 0)
924 return -ENXIO;
925
926 for (i = 0; i < nd_region->ndr_mappings; i++)
927 /* flush hints present, flushing required */
928 if (ndrd->flush_wpq[i][0])
929 return 1;
930
931 /*
932 * The platform defines dimm devices without hints, assume
933 * platform persistence mechanism like ADR
934 */
935 return 0;
936}
937EXPORT_SYMBOL_GPL(nvdimm_has_flush);
938
797void __exit nd_region_devs_exit(void) 939void __exit nd_region_devs_exit(void)
798{ 940{
799 ida_destroy(&region_ida); 941 ida_destroy(&region_ida);
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index fac1b51ea0de..9d66b4fb174b 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -31,7 +31,7 @@ static void dcssblk_release(struct gendisk *disk, fmode_t mode);
31static blk_qc_t dcssblk_make_request(struct request_queue *q, 31static blk_qc_t dcssblk_make_request(struct request_queue *q,
32 struct bio *bio); 32 struct bio *bio);
33static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum, 33static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
34 void __pmem **kaddr, pfn_t *pfn, long size); 34 void **kaddr, pfn_t *pfn, long size);
35 35
36static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; 36static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
37 37
@@ -884,7 +884,7 @@ fail:
884 884
885static long 885static long
886dcssblk_direct_access (struct block_device *bdev, sector_t secnum, 886dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
887 void __pmem **kaddr, pfn_t *pfn, long size) 887 void **kaddr, pfn_t *pfn, long size)
888{ 888{
889 struct dcssblk_dev_info *dev_info; 889 struct dcssblk_dev_info *dev_info;
890 unsigned long offset, dev_sz; 890 unsigned long offset, dev_sz;
@@ -894,7 +894,7 @@ dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
894 return -ENODEV; 894 return -ENODEV;
895 dev_sz = dev_info->end - dev_info->start; 895 dev_sz = dev_info->end - dev_info->start;
896 offset = secnum * 512; 896 offset = secnum * 512;
897 *kaddr = (void __pmem *) (dev_info->start + offset); 897 *kaddr = (void *) dev_info->start + offset;
898 *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV); 898 *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV);
899 899
900 return dev_sz - offset; 900 return dev_sz - offset;
diff --git a/fs/dax.c b/fs/dax.c
index 432b9e6dd63b..993dc6fe0416 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -75,13 +75,13 @@ static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax)
75 struct request_queue *q = bdev->bd_queue; 75 struct request_queue *q = bdev->bd_queue;
76 long rc = -EIO; 76 long rc = -EIO;
77 77
78 dax->addr = (void __pmem *) ERR_PTR(-EIO); 78 dax->addr = ERR_PTR(-EIO);
79 if (blk_queue_enter(q, true) != 0) 79 if (blk_queue_enter(q, true) != 0)
80 return rc; 80 return rc;
81 81
82 rc = bdev_direct_access(bdev, dax); 82 rc = bdev_direct_access(bdev, dax);
83 if (rc < 0) { 83 if (rc < 0) {
84 dax->addr = (void __pmem *) ERR_PTR(rc); 84 dax->addr = ERR_PTR(rc);
85 blk_queue_exit(q); 85 blk_queue_exit(q);
86 return rc; 86 return rc;
87 } 87 }
@@ -147,12 +147,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
147 struct buffer_head *bh) 147 struct buffer_head *bh)
148{ 148{
149 loff_t pos = start, max = start, bh_max = start; 149 loff_t pos = start, max = start, bh_max = start;
150 bool hole = false, need_wmb = false; 150 bool hole = false;
151 struct block_device *bdev = NULL; 151 struct block_device *bdev = NULL;
152 int rw = iov_iter_rw(iter), rc; 152 int rw = iov_iter_rw(iter), rc;
153 long map_len = 0; 153 long map_len = 0;
154 struct blk_dax_ctl dax = { 154 struct blk_dax_ctl dax = {
155 .addr = (void __pmem *) ERR_PTR(-EIO), 155 .addr = ERR_PTR(-EIO),
156 }; 156 };
157 unsigned blkbits = inode->i_blkbits; 157 unsigned blkbits = inode->i_blkbits;
158 sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1) 158 sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1)
@@ -218,7 +218,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
218 218
219 if (iov_iter_rw(iter) == WRITE) { 219 if (iov_iter_rw(iter) == WRITE) {
220 len = copy_from_iter_pmem(dax.addr, max - pos, iter); 220 len = copy_from_iter_pmem(dax.addr, max - pos, iter);
221 need_wmb = true;
222 } else if (!hole) 221 } else if (!hole)
223 len = copy_to_iter((void __force *) dax.addr, max - pos, 222 len = copy_to_iter((void __force *) dax.addr, max - pos,
224 iter); 223 iter);
@@ -235,8 +234,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
235 dax.addr += len; 234 dax.addr += len;
236 } 235 }
237 236
238 if (need_wmb)
239 wmb_pmem();
240 dax_unmap_atomic(bdev, &dax); 237 dax_unmap_atomic(bdev, &dax);
241 238
242 return (pos == start) ? rc : pos - start; 239 return (pos == start) ? rc : pos - start;
@@ -788,7 +785,6 @@ int dax_writeback_mapping_range(struct address_space *mapping,
788 return ret; 785 return ret;
789 } 786 }
790 } 787 }
791 wmb_pmem();
792 return 0; 788 return 0;
793} 789}
794EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); 790EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
@@ -1187,7 +1183,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
1187 if (dax_map_atomic(bdev, &dax) < 0) 1183 if (dax_map_atomic(bdev, &dax) < 0)
1188 return PTR_ERR(dax.addr); 1184 return PTR_ERR(dax.addr);
1189 clear_pmem(dax.addr + offset, length); 1185 clear_pmem(dax.addr + offset, length);
1190 wmb_pmem();
1191 dax_unmap_atomic(bdev, &dax); 1186 dax_unmap_atomic(bdev, &dax);
1192 } 1187 }
1193 return 0; 1188 return 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c96db9c22d10..adf33079771e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1665,7 +1665,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
1665 */ 1665 */
1666struct blk_dax_ctl { 1666struct blk_dax_ctl {
1667 sector_t sector; 1667 sector_t sector;
1668 void __pmem *addr; 1668 void *addr;
1669 long size; 1669 long size;
1670 pfn_t pfn; 1670 pfn_t pfn;
1671}; 1671};
@@ -1676,8 +1676,8 @@ struct block_device_operations {
1676 int (*rw_page)(struct block_device *, sector_t, struct page *, int rw); 1676 int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
1677 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1677 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1678 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1678 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1679 long (*direct_access)(struct block_device *, sector_t, void __pmem **, 1679 long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *,
1680 pfn_t *, long); 1680 long);
1681 unsigned int (*check_events) (struct gendisk *disk, 1681 unsigned int (*check_events) (struct gendisk *disk,
1682 unsigned int clearing); 1682 unsigned int clearing);
1683 /* ->media_changed() is DEPRECATED, use ->check_events() instead */ 1683 /* ->media_changed() is DEPRECATED, use ->check_events() instead */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 2e853b679a5d..1bb954842725 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -17,7 +17,6 @@
17# define __release(x) __context__(x,-1) 17# define __release(x) __context__(x,-1)
18# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) 18# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
19# define __percpu __attribute__((noderef, address_space(3))) 19# define __percpu __attribute__((noderef, address_space(3)))
20# define __pmem __attribute__((noderef, address_space(5)))
21#ifdef CONFIG_SPARSE_RCU_POINTER 20#ifdef CONFIG_SPARSE_RCU_POINTER
22# define __rcu __attribute__((noderef, address_space(4))) 21# define __rcu __attribute__((noderef, address_space(4)))
23#else /* CONFIG_SPARSE_RCU_POINTER */ 22#else /* CONFIG_SPARSE_RCU_POINTER */
@@ -45,7 +44,6 @@ extern void __chk_io_ptr(const volatile void __iomem *);
45# define __cond_lock(x,c) (c) 44# define __cond_lock(x,c) (c)
46# define __percpu 45# define __percpu
47# define __rcu 46# define __rcu
48# define __pmem
49# define __private 47# define __private
50# define ACCESS_PRIVATE(p, member) ((p)->member) 48# define ACCESS_PRIVATE(p, member) ((p)->member)
51#endif /* __CHECKER__ */ 49#endif /* __CHECKER__ */
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index b0db857f334b..91acfce74a22 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -131,7 +131,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
131 * >= 0 : the number of bytes accessible at the address 131 * >= 0 : the number of bytes accessible at the address
132 */ 132 */
133typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector, 133typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector,
134 void __pmem **kaddr, pfn_t *pfn, long size); 134 void **kaddr, pfn_t *pfn, long size);
135 135
136void dm_error(const char *message); 136void dm_error(const char *message);
137 137
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 0c3c30cbbea5..b519e137b9b7 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -52,6 +52,7 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
52 52
53struct nd_namespace_label; 53struct nd_namespace_label;
54struct nvdimm_drvdata; 54struct nvdimm_drvdata;
55
55struct nd_mapping { 56struct nd_mapping {
56 struct nvdimm *nvdimm; 57 struct nvdimm *nvdimm;
57 struct nd_namespace_label **labels; 58 struct nd_namespace_label **labels;
@@ -69,6 +70,7 @@ struct nd_mapping {
69struct nvdimm_bus_descriptor { 70struct nvdimm_bus_descriptor {
70 const struct attribute_group **attr_groups; 71 const struct attribute_group **attr_groups;
71 unsigned long cmd_mask; 72 unsigned long cmd_mask;
73 struct module *module;
72 char *provider_name; 74 char *provider_name;
73 ndctl_fn ndctl; 75 ndctl_fn ndctl;
74 int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); 76 int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
@@ -99,13 +101,21 @@ struct nd_region_desc {
99 unsigned long flags; 101 unsigned long flags;
100}; 102};
101 103
104struct device;
105void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset,
106 size_t size, unsigned long flags);
107static inline void __iomem *devm_nvdimm_ioremap(struct device *dev,
108 resource_size_t offset, size_t size)
109{
110 return (void __iomem *) devm_nvdimm_memremap(dev, offset, size, 0);
111}
112
102struct nvdimm_bus; 113struct nvdimm_bus;
103struct module; 114struct module;
104struct device; 115struct device;
105struct nd_blk_region; 116struct nd_blk_region;
106struct nd_blk_region_desc { 117struct nd_blk_region_desc {
107 int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); 118 int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
108 void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
109 int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, 119 int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
110 void *iobuf, u64 len, int rw); 120 void *iobuf, u64 len, int rw);
111 struct nd_region_desc ndr_desc; 121 struct nd_region_desc ndr_desc;
@@ -119,22 +129,22 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
119} 129}
120 130
121int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length); 131int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
122struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, 132struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
123 struct nvdimm_bus_descriptor *nfit_desc, struct module *module); 133 struct nvdimm_bus_descriptor *nfit_desc);
124#define nvdimm_bus_register(parent, desc) \
125 __nvdimm_bus_register(parent, desc, THIS_MODULE)
126void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); 134void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
127struct nvdimm_bus *to_nvdimm_bus(struct device *dev); 135struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
128struct nvdimm *to_nvdimm(struct device *dev); 136struct nvdimm *to_nvdimm(struct device *dev);
129struct nd_region *to_nd_region(struct device *dev); 137struct nd_region *to_nd_region(struct device *dev);
130struct nd_blk_region *to_nd_blk_region(struct device *dev); 138struct nd_blk_region *to_nd_blk_region(struct device *dev);
131struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); 139struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
140struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
132const char *nvdimm_name(struct nvdimm *nvdimm); 141const char *nvdimm_name(struct nvdimm *nvdimm);
133unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm); 142unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm);
134void *nvdimm_provider_data(struct nvdimm *nvdimm); 143void *nvdimm_provider_data(struct nvdimm *nvdimm);
135struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, 144struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
136 const struct attribute_group **groups, unsigned long flags, 145 const struct attribute_group **groups, unsigned long flags,
137 unsigned long cmd_mask); 146 unsigned long cmd_mask, int num_flush,
147 struct resource *flush_wpq);
138const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); 148const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
139const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); 149const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
140u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, 150u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
@@ -156,4 +166,6 @@ struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr);
156unsigned int nd_region_acquire_lane(struct nd_region *nd_region); 166unsigned int nd_region_acquire_lane(struct nd_region *nd_region);
157void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); 167void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
158u64 nd_fletcher64(void *addr, size_t len, bool le); 168u64 nd_fletcher64(void *addr, size_t len, bool le);
169void nvdimm_flush(struct nd_region *nd_region);
170int nvdimm_has_flush(struct nd_region *nd_region);
159#endif /* __LIBNVDIMM_H__ */ 171#endif /* __LIBNVDIMM_H__ */
diff --git a/include/linux/nd.h b/include/linux/nd.h
index aee2761d294c..f1ea426d6a5e 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -26,6 +26,7 @@ struct nd_device_driver {
26 unsigned long type; 26 unsigned long type;
27 int (*probe)(struct device *dev); 27 int (*probe)(struct device *dev);
28 int (*remove)(struct device *dev); 28 int (*remove)(struct device *dev);
29 void (*shutdown)(struct device *dev);
29 void (*notify)(struct device *dev, enum nvdimm_event event); 30 void (*notify)(struct device *dev, enum nvdimm_event event);
30}; 31};
31 32
@@ -67,7 +68,7 @@ struct nd_namespace_io {
67 struct nd_namespace_common common; 68 struct nd_namespace_common common;
68 struct resource res; 69 struct resource res;
69 resource_size_t size; 70 resource_size_t size;
70 void __pmem *addr; 71 void *addr;
71 struct badblocks bb; 72 struct badblocks bb;
72}; 73};
73 74
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index 94994810c7c0..a3d90b9da18d 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -28,7 +28,10 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
28 return __pfn_to_pfn_t(pfn, 0); 28 return __pfn_to_pfn_t(pfn, 0);
29} 29}
30 30
31extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags); 31static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
32{
33 return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
34}
32 35
33static inline bool pfn_t_has_page(pfn_t pfn) 36static inline bool pfn_t_has_page(pfn_t pfn)
34{ 37{
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 57d146fe44dd..e856c2cb0fe8 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -26,47 +26,35 @@
26 * calling these symbols with arch_has_pmem_api() and redirect to the 26 * calling these symbols with arch_has_pmem_api() and redirect to the
27 * implementation in asm/pmem.h. 27 * implementation in asm/pmem.h.
28 */ 28 */
29static inline bool __arch_has_wmb_pmem(void) 29static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
30{
31 return false;
32}
33
34static inline void arch_wmb_pmem(void)
35{
36 BUG();
37}
38
39static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
40 size_t n)
41{ 30{
42 BUG(); 31 BUG();
43} 32}
44 33
45static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src, 34static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
46 size_t n)
47{ 35{
48 BUG(); 36 BUG();
49 return -EFAULT; 37 return -EFAULT;
50} 38}
51 39
52static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, 40static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
53 struct iov_iter *i) 41 struct iov_iter *i)
54{ 42{
55 BUG(); 43 BUG();
56 return 0; 44 return 0;
57} 45}
58 46
59static inline void arch_clear_pmem(void __pmem *addr, size_t size) 47static inline void arch_clear_pmem(void *addr, size_t size)
60{ 48{
61 BUG(); 49 BUG();
62} 50}
63 51
64static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size) 52static inline void arch_wb_cache_pmem(void *addr, size_t size)
65{ 53{
66 BUG(); 54 BUG();
67} 55}
68 56
69static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) 57static inline void arch_invalidate_pmem(void *addr, size_t size)
70{ 58{
71 BUG(); 59 BUG();
72} 60}
@@ -77,13 +65,6 @@ static inline bool arch_has_pmem_api(void)
77 return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); 65 return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
78} 66}
79 67
80static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src,
81 size_t size)
82{
83 memcpy(dst, (void __force *) src, size);
84 return 0;
85}
86
87/* 68/*
88 * memcpy_from_pmem - read from persistent memory with error handling 69 * memcpy_from_pmem - read from persistent memory with error handling
89 * @dst: destination buffer 70 * @dst: destination buffer
@@ -92,54 +73,13 @@ static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src,
92 * 73 *
93 * Returns 0 on success negative error code on failure. 74 * Returns 0 on success negative error code on failure.
94 */ 75 */
95static inline int memcpy_from_pmem(void *dst, void __pmem const *src, 76static inline int memcpy_from_pmem(void *dst, void const *src, size_t size)
96 size_t size)
97{ 77{
98 if (arch_has_pmem_api()) 78 if (arch_has_pmem_api())
99 return arch_memcpy_from_pmem(dst, src, size); 79 return arch_memcpy_from_pmem(dst, src, size);
100 else 80 else
101 return default_memcpy_from_pmem(dst, src, size); 81 memcpy(dst, src, size);
102} 82 return 0;
103
104/**
105 * arch_has_wmb_pmem - true if wmb_pmem() ensures durability
106 *
107 * For a given cpu implementation within an architecture it is possible
108 * that wmb_pmem() resolves to a nop. In the case this returns
109 * false, pmem api users are unable to ensure durability and may want to
110 * fall back to a different data consistency model, or otherwise notify
111 * the user.
112 */
113static inline bool arch_has_wmb_pmem(void)
114{
115 return arch_has_pmem_api() && __arch_has_wmb_pmem();
116}
117
118/*
119 * These defaults seek to offer decent performance and minimize the
120 * window between i/o completion and writes being durable on media.
121 * However, it is undefined / architecture specific whether
122 * ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for
123 * making data durable relative to i/o completion.
124 */
125static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
126 size_t size)
127{
128 memcpy((void __force *) dst, src, size);
129}
130
131static inline size_t default_copy_from_iter_pmem(void __pmem *addr,
132 size_t bytes, struct iov_iter *i)
133{
134 return copy_from_iter_nocache((void __force *)addr, bytes, i);
135}
136
137static inline void default_clear_pmem(void __pmem *addr, size_t size)
138{
139 if (size == PAGE_SIZE && ((unsigned long)addr & ~PAGE_MASK) == 0)
140 clear_page((void __force *)addr);
141 else
142 memset((void __force *)addr, 0, size);
143} 83}
144 84
145/** 85/**
@@ -152,29 +92,14 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size)
152 * being effectively evicted from, or never written to, the processor 92 * being effectively evicted from, or never written to, the processor
153 * cache hierarchy after the copy completes. After memcpy_to_pmem() 93 * cache hierarchy after the copy completes. After memcpy_to_pmem()
154 * data may still reside in cpu or platform buffers, so this operation 94 * data may still reside in cpu or platform buffers, so this operation
155 * must be followed by a wmb_pmem(). 95 * must be followed by a blkdev_issue_flush() on the pmem block device.
156 */ 96 */
157static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n) 97static inline void memcpy_to_pmem(void *dst, const void *src, size_t n)
158{ 98{
159 if (arch_has_pmem_api()) 99 if (arch_has_pmem_api())
160 arch_memcpy_to_pmem(dst, src, n); 100 arch_memcpy_to_pmem(dst, src, n);
161 else 101 else
162 default_memcpy_to_pmem(dst, src, n); 102 memcpy(dst, src, n);
163}
164
165/**
166 * wmb_pmem - synchronize writes to persistent memory
167 *
168 * After a series of memcpy_to_pmem() operations this drains data from
169 * cpu write buffers and any platform (memory controller) buffers to
170 * ensure that written data is durable on persistent memory media.
171 */
172static inline void wmb_pmem(void)
173{
174 if (arch_has_wmb_pmem())
175 arch_wmb_pmem();
176 else
177 wmb();
178} 103}
179 104
180/** 105/**
@@ -184,14 +109,14 @@ static inline void wmb_pmem(void)
184 * @i: iterator with source data 109 * @i: iterator with source data
185 * 110 *
186 * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. 111 * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
187 * This function requires explicit ordering with a wmb_pmem() call. 112 * See blkdev_issue_flush() note for memcpy_to_pmem().
188 */ 113 */
189static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes, 114static inline size_t copy_from_iter_pmem(void *addr, size_t bytes,
190 struct iov_iter *i) 115 struct iov_iter *i)
191{ 116{
192 if (arch_has_pmem_api()) 117 if (arch_has_pmem_api())
193 return arch_copy_from_iter_pmem(addr, bytes, i); 118 return arch_copy_from_iter_pmem(addr, bytes, i);
194 return default_copy_from_iter_pmem(addr, bytes, i); 119 return copy_from_iter_nocache(addr, bytes, i);
195} 120}
196 121
197/** 122/**
@@ -200,14 +125,14 @@ static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes,
200 * @size: number of bytes to zero 125 * @size: number of bytes to zero
201 * 126 *
202 * Write zeros into the memory range starting at 'addr' for 'size' bytes. 127 * Write zeros into the memory range starting at 'addr' for 'size' bytes.
203 * This function requires explicit ordering with a wmb_pmem() call. 128 * See blkdev_issue_flush() note for memcpy_to_pmem().
204 */ 129 */
205static inline void clear_pmem(void __pmem *addr, size_t size) 130static inline void clear_pmem(void *addr, size_t size)
206{ 131{
207 if (arch_has_pmem_api()) 132 if (arch_has_pmem_api())
208 arch_clear_pmem(addr, size); 133 arch_clear_pmem(addr, size);
209 else 134 else
210 default_clear_pmem(addr, size); 135 memset(addr, 0, size);
211} 136}
212 137
213/** 138/**
@@ -218,7 +143,7 @@ static inline void clear_pmem(void __pmem *addr, size_t size)
218 * For platforms that support clearing poison this flushes any poisoned 143 * For platforms that support clearing poison this flushes any poisoned
219 * ranges out of the cache 144 * ranges out of the cache
220 */ 145 */
221static inline void invalidate_pmem(void __pmem *addr, size_t size) 146static inline void invalidate_pmem(void *addr, size_t size)
222{ 147{
223 if (arch_has_pmem_api()) 148 if (arch_has_pmem_api())
224 arch_invalidate_pmem(addr, size); 149 arch_invalidate_pmem(addr, size);
@@ -230,9 +155,9 @@ static inline void invalidate_pmem(void __pmem *addr, size_t size)
230 * @size: number of bytes to write back 155 * @size: number of bytes to write back
231 * 156 *
232 * Write back the processor cache range starting at 'addr' for 'size' bytes. 157 * Write back the processor cache range starting at 'addr' for 'size' bytes.
233 * This function requires explicit ordering with a wmb_pmem() call. 158 * See blkdev_issue_flush() note for memcpy_to_pmem().
234 */ 159 */
235static inline void wb_cache_pmem(void __pmem *addr, size_t size) 160static inline void wb_cache_pmem(void *addr, size_t size)
236{ 161{
237 if (arch_has_pmem_api()) 162 if (arch_has_pmem_api())
238 arch_wb_cache_pmem(addr, size); 163 arch_wb_cache_pmem(addr, size);
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 309915f74492..ba5a8c79652a 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -298,6 +298,7 @@ struct nd_cmd_pkg {
298#define NVDIMM_FAMILY_INTEL 0 298#define NVDIMM_FAMILY_INTEL 0
299#define NVDIMM_FAMILY_HPE1 1 299#define NVDIMM_FAMILY_HPE1 1
300#define NVDIMM_FAMILY_HPE2 2 300#define NVDIMM_FAMILY_HPE2 2
301#define NVDIMM_FAMILY_MSFT 3
301 302
302#define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ 303#define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\
303 struct nd_cmd_pkg) 304 struct nd_cmd_pkg)
diff --git a/kernel/memremap.c b/kernel/memremap.c
index ddb3247a872a..251d16b4cb41 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -169,12 +169,6 @@ void devm_memunmap(struct device *dev, void *addr)
169} 169}
170EXPORT_SYMBOL(devm_memunmap); 170EXPORT_SYMBOL(devm_memunmap);
171 171
172pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
173{
174 return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
175}
176EXPORT_SYMBOL(phys_to_pfn_t);
177
178#ifdef CONFIG_ZONE_DEVICE 172#ifdef CONFIG_ZONE_DEVICE
179static DEFINE_MUTEX(pgmap_lock); 173static DEFINE_MUTEX(pgmap_lock);
180static RADIX_TREE(pgmap_radix, GFP_KERNEL); 174static RADIX_TREE(pgmap_radix, GFP_KERNEL);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 4904ced676d4..24a08363995a 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -313,7 +313,6 @@ our $Sparse = qr{
313 __kernel| 313 __kernel|
314 __force| 314 __force|
315 __iomem| 315 __iomem|
316 __pmem|
317 __must_check| 316 __must_check|
318 __init_refok| 317 __init_refok|
319 __kprobes| 318 __kprobes|
diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
index d388de72eaca..28632ee68377 100644
--- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt
+++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
@@ -947,7 +947,7 @@ GrpTable: Grp15
9474: XSAVE 9474: XSAVE
9485: XRSTOR | lfence (11B) 9485: XRSTOR | lfence (11B)
9496: XSAVEOPT | clwb (66) | mfence (11B) 9496: XSAVEOPT | clwb (66) | mfence (11B)
9507: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) 9507: clflush | clflushopt (66) | sfence (11B)
951EndTable 951EndTable
952 952
953GrpTable: Grp16 953GrpTable: Grp16
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
index 3918dd52e903..0f196eec9f48 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
@@ -1664,5 +1664,3 @@
1664"0f c7 1d 78 56 34 12 \txrstors 0x12345678",}, 1664"0f c7 1d 78 56 34 12 \txrstors 0x12345678",},
1665{{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", 1665{{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
1666"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",}, 1666"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",},
1667{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "",
1668"66 0f ae f8 \tpcommit ",},
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
index 9c8c61e06d5a..af25bc8240d0 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@@ -1696,5 +1696,3 @@
1696"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",}, 1696"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",},
1697{{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", 1697{{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
1698"41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",}, 1698"41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",},
1699{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "",
1700"66 0f ae f8 \tpcommit ",},
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
index 76e0ec379c8b..979487dae8d4 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -2655,10 +2655,6 @@ int main(void)
2655 2655
2656#endif /* #ifndef __x86_64__ */ 2656#endif /* #ifndef __x86_64__ */
2657 2657
2658 /* pcommit */
2659
2660 asm volatile("pcommit");
2661
2662 /* Following line is a marker for the awk script - do not change */ 2658 /* Following line is a marker for the awk script - do not change */
2663 asm volatile("rdtsc"); /* Stop here */ 2659 asm volatile("rdtsc"); /* Stop here */
2664 2660
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
index ec378cd7b71e..767be7c76034 100644
--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -1012,7 +1012,7 @@ GrpTable: Grp15
10124: XSAVE 10124: XSAVE
10135: XRSTOR | lfence (11B) 10135: XRSTOR | lfence (11B)
10146: XSAVEOPT | clwb (66) | mfence (11B) 10146: XSAVEOPT | clwb (66) | mfence (11B)
10157: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) 10157: clflush | clflushopt (66) | sfence (11B)
1016EndTable 1016EndTable
1017 1017
1018GrpTable: Grp16 1018GrpTable: Grp16
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 785985677159..ad6dd0543019 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -11,12 +11,14 @@ ldflags-y += --wrap=__devm_release_region
11ldflags-y += --wrap=__request_region 11ldflags-y += --wrap=__request_region
12ldflags-y += --wrap=__release_region 12ldflags-y += --wrap=__release_region
13ldflags-y += --wrap=devm_memremap_pages 13ldflags-y += --wrap=devm_memremap_pages
14ldflags-y += --wrap=phys_to_pfn_t 14ldflags-y += --wrap=insert_resource
15ldflags-y += --wrap=remove_resource
15 16
16DRIVERS := ../../../drivers 17DRIVERS := ../../../drivers
17NVDIMM_SRC := $(DRIVERS)/nvdimm 18NVDIMM_SRC := $(DRIVERS)/nvdimm
18ACPI_SRC := $(DRIVERS)/acpi 19ACPI_SRC := $(DRIVERS)/acpi/nfit
19DAX_SRC := $(DRIVERS)/dax 20DAX_SRC := $(DRIVERS)/dax
21ccflags-y := -I$(src)/$(NVDIMM_SRC)/
20 22
21obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o 23obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
22obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o 24obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
@@ -27,10 +29,12 @@ obj-$(CONFIG_ACPI_NFIT) += nfit.o
27obj-$(CONFIG_DEV_DAX) += dax.o 29obj-$(CONFIG_DEV_DAX) += dax.o
28obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o 30obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
29 31
30nfit-y := $(ACPI_SRC)/nfit.o 32nfit-y := $(ACPI_SRC)/core.o
33nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
31nfit-y += config_check.o 34nfit-y += config_check.o
32 35
33nd_pmem-y := $(NVDIMM_SRC)/pmem.o 36nd_pmem-y := $(NVDIMM_SRC)/pmem.o
37nd_pmem-y += pmem-dax.o
34nd_pmem-y += config_check.o 38nd_pmem-y += config_check.o
35 39
36nd_btt-y := $(NVDIMM_SRC)/btt.o 40nd_btt-y := $(NVDIMM_SRC)/btt.o
diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c
index adf18bfeca00..878daf3429e8 100644
--- a/tools/testing/nvdimm/config_check.c
+++ b/tools/testing/nvdimm/config_check.c
@@ -10,6 +10,7 @@ void check(void)
10 BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM)); 10 BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM));
11 BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM)); 11 BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM));
12 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); 12 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
13 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN));
13 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); 14 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
14 BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); 15 BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
15 BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX)); 16 BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));
diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c
new file mode 100644
index 000000000000..c9b8c48f85fc
--- /dev/null
+++ b/tools/testing/nvdimm/pmem-dax.c
@@ -0,0 +1,54 @@
1/*
2 * Copyright (c) 2014-2016, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#include "test/nfit_test.h"
14#include <linux/blkdev.h>
15#include <pmem.h>
16#include <nd.h>
17
18long pmem_direct_access(struct block_device *bdev, sector_t sector,
19 void **kaddr, pfn_t *pfn, long size)
20{
21 struct pmem_device *pmem = bdev->bd_queue->queuedata;
22 resource_size_t offset = sector * 512 + pmem->data_offset;
23
24 if (unlikely(is_bad_pmem(&pmem->bb, sector, size)))
25 return -EIO;
26
27 /*
28 * Limit dax to a single page at a time given vmalloc()-backed
29 * in the nfit_test case.
30 */
31 if (get_nfit_res(pmem->phys_addr + offset)) {
32 struct page *page;
33
34 *kaddr = pmem->virt_addr + offset;
35 page = vmalloc_to_page(pmem->virt_addr + offset);
36 *pfn = page_to_pfn_t(page);
37 dev_dbg_ratelimited(disk_to_dev(bdev->bd_disk)->parent,
38 "%s: sector: %#llx pfn: %#lx\n", __func__,
39 (unsigned long long) sector, page_to_pfn(page));
40
41 return PAGE_SIZE;
42 }
43
44 *kaddr = pmem->virt_addr + offset;
45 *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
46
47 /*
48 * If badblocks are present, limit known good range to the
49 * requested range.
50 */
51 if (unlikely(pmem->bb.count))
52 return size;
53 return pmem->size - pmem->pfn_pad - offset;
54}
diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild
index 9241064970fe..d32f25bba42a 100644
--- a/tools/testing/nvdimm/test/Kbuild
+++ b/tools/testing/nvdimm/test/Kbuild
@@ -1,5 +1,5 @@
1ccflags-y := -I$(src)/../../../../drivers/nvdimm/ 1ccflags-y := -I$(src)/../../../../drivers/nvdimm/
2ccflags-y += -I$(src)/../../../../drivers/acpi/ 2ccflags-y += -I$(src)/../../../../drivers/acpi/nfit/
3 3
4obj-m += nfit_test.o 4obj-m += nfit_test.o
5obj-m += nfit_test_iomap.o 5obj-m += nfit_test_iomap.o
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index c842095f2801..c29f8dca9e67 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -10,11 +10,13 @@
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details. 11 * General Public License for more details.
12 */ 12 */
13#include <linux/memremap.h>
13#include <linux/rculist.h> 14#include <linux/rculist.h>
14#include <linux/export.h> 15#include <linux/export.h>
15#include <linux/ioport.h> 16#include <linux/ioport.h>
16#include <linux/module.h> 17#include <linux/module.h>
17#include <linux/types.h> 18#include <linux/types.h>
19#include <linux/pfn_t.h>
18#include <linux/io.h> 20#include <linux/io.h>
19#include <linux/mm.h> 21#include <linux/mm.h>
20#include "nfit_test.h" 22#include "nfit_test.h"
@@ -52,7 +54,7 @@ static struct nfit_test_resource *__get_nfit_res(resource_size_t resource)
52 return NULL; 54 return NULL;
53} 55}
54 56
55static struct nfit_test_resource *get_nfit_res(resource_size_t resource) 57struct nfit_test_resource *get_nfit_res(resource_size_t resource)
56{ 58{
57 struct nfit_test_resource *res; 59 struct nfit_test_resource *res;
58 60
@@ -62,6 +64,7 @@ static struct nfit_test_resource *get_nfit_res(resource_size_t resource)
62 64
63 return res; 65 return res;
64} 66}
67EXPORT_SYMBOL(get_nfit_res);
65 68
66void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, 69void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
67 void __iomem *(*fallback_fn)(resource_size_t, unsigned long)) 70 void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
@@ -97,10 +100,6 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
97} 100}
98EXPORT_SYMBOL(__wrap_devm_memremap); 101EXPORT_SYMBOL(__wrap_devm_memremap);
99 102
100#ifdef __HAVE_ARCH_PTE_DEVMAP
101#include <linux/memremap.h>
102#include <linux/pfn_t.h>
103
104void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, 103void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
105 struct percpu_ref *ref, struct vmem_altmap *altmap) 104 struct percpu_ref *ref, struct vmem_altmap *altmap)
106{ 105{
@@ -122,19 +121,6 @@ pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
122 return phys_to_pfn_t(addr, flags); 121 return phys_to_pfn_t(addr, flags);
123} 122}
124EXPORT_SYMBOL(__wrap_phys_to_pfn_t); 123EXPORT_SYMBOL(__wrap_phys_to_pfn_t);
125#else
126/* to be removed post 4.5-rc1 */
127void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res)
128{
129 resource_size_t offset = res->start;
130 struct nfit_test_resource *nfit_res = get_nfit_res(offset);
131
132 if (nfit_res)
133 return nfit_res->buf + offset - nfit_res->res->start;
134 return devm_memremap_pages(dev, res);
135}
136EXPORT_SYMBOL(__wrap_devm_memremap_pages);
137#endif
138 124
139void *__wrap_memremap(resource_size_t offset, size_t size, 125void *__wrap_memremap(resource_size_t offset, size_t size,
140 unsigned long flags) 126 unsigned long flags)
@@ -229,6 +215,22 @@ struct resource *__wrap___request_region(struct resource *parent,
229} 215}
230EXPORT_SYMBOL(__wrap___request_region); 216EXPORT_SYMBOL(__wrap___request_region);
231 217
218int __wrap_insert_resource(struct resource *parent, struct resource *res)
219{
220 if (get_nfit_res(res->start))
221 return 0;
222 return insert_resource(parent, res);
223}
224EXPORT_SYMBOL(__wrap_insert_resource);
225
226int __wrap_remove_resource(struct resource *res)
227{
228 if (get_nfit_res(res->start))
229 return 0;
230 return remove_resource(res);
231}
232EXPORT_SYMBOL(__wrap_remove_resource);
233
232struct resource *__wrap___devm_request_region(struct device *dev, 234struct resource *__wrap___devm_request_region(struct device *dev,
233 struct resource *parent, resource_size_t start, 235 struct resource *parent, resource_size_t start,
234 resource_size_t n, const char *name) 236 resource_size_t n, const char *name)
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index c919866853a0..5404efa578a3 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -98,11 +98,13 @@
98enum { 98enum {
99 NUM_PM = 3, 99 NUM_PM = 3,
100 NUM_DCR = 5, 100 NUM_DCR = 5,
101 NUM_HINTS = 8,
101 NUM_BDW = NUM_DCR, 102 NUM_BDW = NUM_DCR,
102 NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW, 103 NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
103 NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */, 104 NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */,
104 DIMM_SIZE = SZ_32M, 105 DIMM_SIZE = SZ_32M,
105 LABEL_SIZE = SZ_128K, 106 LABEL_SIZE = SZ_128K,
107 SPA_VCD_SIZE = SZ_4M,
106 SPA0_SIZE = DIMM_SIZE, 108 SPA0_SIZE = DIMM_SIZE,
107 SPA1_SIZE = DIMM_SIZE*2, 109 SPA1_SIZE = DIMM_SIZE*2,
108 SPA2_SIZE = DIMM_SIZE, 110 SPA2_SIZE = DIMM_SIZE,
@@ -470,11 +472,7 @@ static void release_nfit_res(void *data)
470 list_del(&nfit_res->list); 472 list_del(&nfit_res->list);
471 spin_unlock(&nfit_test_lock); 473 spin_unlock(&nfit_test_lock);
472 474
473 if (is_vmalloc_addr(nfit_res->buf)) 475 vfree(nfit_res->buf);
474 vfree(nfit_res->buf);
475 else
476 dma_free_coherent(nfit_res->dev, resource_size(res),
477 nfit_res->buf, res->start);
478 kfree(res); 476 kfree(res);
479 kfree(nfit_res); 477 kfree(nfit_res);
480} 478}
@@ -507,9 +505,7 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
507 505
508 return nfit_res->buf; 506 return nfit_res->buf;
509 err: 507 err:
510 if (buf && !is_vmalloc_addr(buf)) 508 if (buf)
511 dma_free_coherent(dev, size, buf, *dma);
512 else if (buf)
513 vfree(buf); 509 vfree(buf);
514 kfree(res); 510 kfree(res);
515 kfree(nfit_res); 511 kfree(nfit_res);
@@ -524,15 +520,6 @@ static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma)
524 return __test_alloc(t, size, dma, buf); 520 return __test_alloc(t, size, dma, buf);
525} 521}
526 522
527static void *test_alloc_coherent(struct nfit_test *t, size_t size,
528 dma_addr_t *dma)
529{
530 struct device *dev = &t->pdev.dev;
531 void *buf = dma_alloc_coherent(dev, size, dma, GFP_KERNEL);
532
533 return __test_alloc(t, size, dma, buf);
534}
535
536static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr) 523static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
537{ 524{
538 int i; 525 int i;
@@ -584,7 +571,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
584 + offsetof(struct acpi_nfit_control_region, 571 + offsetof(struct acpi_nfit_control_region,
585 window_size) * NUM_DCR 572 window_size) * NUM_DCR
586 + sizeof(struct acpi_nfit_data_region) * NUM_BDW 573 + sizeof(struct acpi_nfit_data_region) * NUM_BDW
587 + sizeof(struct acpi_nfit_flush_address) * NUM_DCR; 574 + (sizeof(struct acpi_nfit_flush_address)
575 + sizeof(u64) * NUM_HINTS) * NUM_DCR;
588 int i; 576 int i;
589 577
590 t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); 578 t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
@@ -592,15 +580,15 @@ static int nfit_test0_alloc(struct nfit_test *t)
592 return -ENOMEM; 580 return -ENOMEM;
593 t->nfit_size = nfit_size; 581 t->nfit_size = nfit_size;
594 582
595 t->spa_set[0] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[0]); 583 t->spa_set[0] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[0]);
596 if (!t->spa_set[0]) 584 if (!t->spa_set[0])
597 return -ENOMEM; 585 return -ENOMEM;
598 586
599 t->spa_set[1] = test_alloc_coherent(t, SPA1_SIZE, &t->spa_set_dma[1]); 587 t->spa_set[1] = test_alloc(t, SPA1_SIZE, &t->spa_set_dma[1]);
600 if (!t->spa_set[1]) 588 if (!t->spa_set[1])
601 return -ENOMEM; 589 return -ENOMEM;
602 590
603 t->spa_set[2] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[2]); 591 t->spa_set[2] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[2]);
604 if (!t->spa_set[2]) 592 if (!t->spa_set[2])
605 return -ENOMEM; 593 return -ENOMEM;
606 594
@@ -614,7 +602,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
614 return -ENOMEM; 602 return -ENOMEM;
615 sprintf(t->label[i], "label%d", i); 603 sprintf(t->label[i], "label%d", i);
616 604
617 t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]); 605 t->flush[i] = test_alloc(t, sizeof(u64) * NUM_HINTS,
606 &t->flush_dma[i]);
618 if (!t->flush[i]) 607 if (!t->flush[i])
619 return -ENOMEM; 608 return -ENOMEM;
620 } 609 }
@@ -630,7 +619,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
630 619
631static int nfit_test1_alloc(struct nfit_test *t) 620static int nfit_test1_alloc(struct nfit_test *t)
632{ 621{
633 size_t nfit_size = sizeof(struct acpi_nfit_system_address) 622 size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2
634 + sizeof(struct acpi_nfit_memory_map) 623 + sizeof(struct acpi_nfit_memory_map)
635 + offsetof(struct acpi_nfit_control_region, window_size); 624 + offsetof(struct acpi_nfit_control_region, window_size);
636 625
@@ -639,15 +628,31 @@ static int nfit_test1_alloc(struct nfit_test *t)
639 return -ENOMEM; 628 return -ENOMEM;
640 t->nfit_size = nfit_size; 629 t->nfit_size = nfit_size;
641 630
642 t->spa_set[0] = test_alloc_coherent(t, SPA2_SIZE, &t->spa_set_dma[0]); 631 t->spa_set[0] = test_alloc(t, SPA2_SIZE, &t->spa_set_dma[0]);
643 if (!t->spa_set[0]) 632 if (!t->spa_set[0])
644 return -ENOMEM; 633 return -ENOMEM;
645 634
635 t->spa_set[1] = test_alloc(t, SPA_VCD_SIZE, &t->spa_set_dma[1]);
636 if (!t->spa_set[1])
637 return -ENOMEM;
638
646 return ars_state_init(&t->pdev.dev, &t->ars_state); 639 return ars_state_init(&t->pdev.dev, &t->ars_state);
647} 640}
648 641
642static void dcr_common_init(struct acpi_nfit_control_region *dcr)
643{
644 dcr->vendor_id = 0xabcd;
645 dcr->device_id = 0;
646 dcr->revision_id = 1;
647 dcr->valid_fields = 1;
648 dcr->manufacturing_location = 0xa;
649 dcr->manufacturing_date = cpu_to_be16(2016);
650}
651
649static void nfit_test0_setup(struct nfit_test *t) 652static void nfit_test0_setup(struct nfit_test *t)
650{ 653{
654 const int flush_hint_size = sizeof(struct acpi_nfit_flush_address)
655 + (sizeof(u64) * NUM_HINTS);
651 struct acpi_nfit_desc *acpi_desc; 656 struct acpi_nfit_desc *acpi_desc;
652 struct acpi_nfit_memory_map *memdev; 657 struct acpi_nfit_memory_map *memdev;
653 void *nfit_buf = t->nfit_buf; 658 void *nfit_buf = t->nfit_buf;
@@ -655,7 +660,7 @@ static void nfit_test0_setup(struct nfit_test *t)
655 struct acpi_nfit_control_region *dcr; 660 struct acpi_nfit_control_region *dcr;
656 struct acpi_nfit_data_region *bdw; 661 struct acpi_nfit_data_region *bdw;
657 struct acpi_nfit_flush_address *flush; 662 struct acpi_nfit_flush_address *flush;
658 unsigned int offset; 663 unsigned int offset, i;
659 664
660 /* 665 /*
661 * spa0 (interleave first half of dimm0 and dimm1, note storage 666 * spa0 (interleave first half of dimm0 and dimm1, note storage
@@ -972,9 +977,7 @@ static void nfit_test0_setup(struct nfit_test *t)
972 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 977 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
973 dcr->header.length = sizeof(struct acpi_nfit_control_region); 978 dcr->header.length = sizeof(struct acpi_nfit_control_region);
974 dcr->region_index = 0+1; 979 dcr->region_index = 0+1;
975 dcr->vendor_id = 0xabcd; 980 dcr_common_init(dcr);
976 dcr->device_id = 0;
977 dcr->revision_id = 1;
978 dcr->serial_number = ~handle[0]; 981 dcr->serial_number = ~handle[0];
979 dcr->code = NFIT_FIC_BLK; 982 dcr->code = NFIT_FIC_BLK;
980 dcr->windows = 1; 983 dcr->windows = 1;
@@ -989,9 +992,7 @@ static void nfit_test0_setup(struct nfit_test *t)
989 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 992 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
990 dcr->header.length = sizeof(struct acpi_nfit_control_region); 993 dcr->header.length = sizeof(struct acpi_nfit_control_region);
991 dcr->region_index = 1+1; 994 dcr->region_index = 1+1;
992 dcr->vendor_id = 0xabcd; 995 dcr_common_init(dcr);
993 dcr->device_id = 0;
994 dcr->revision_id = 1;
995 dcr->serial_number = ~handle[1]; 996 dcr->serial_number = ~handle[1];
996 dcr->code = NFIT_FIC_BLK; 997 dcr->code = NFIT_FIC_BLK;
997 dcr->windows = 1; 998 dcr->windows = 1;
@@ -1006,9 +1007,7 @@ static void nfit_test0_setup(struct nfit_test *t)
1006 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1007 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
1007 dcr->header.length = sizeof(struct acpi_nfit_control_region); 1008 dcr->header.length = sizeof(struct acpi_nfit_control_region);
1008 dcr->region_index = 2+1; 1009 dcr->region_index = 2+1;
1009 dcr->vendor_id = 0xabcd; 1010 dcr_common_init(dcr);
1010 dcr->device_id = 0;
1011 dcr->revision_id = 1;
1012 dcr->serial_number = ~handle[2]; 1011 dcr->serial_number = ~handle[2];
1013 dcr->code = NFIT_FIC_BLK; 1012 dcr->code = NFIT_FIC_BLK;
1014 dcr->windows = 1; 1013 dcr->windows = 1;
@@ -1023,9 +1022,7 @@ static void nfit_test0_setup(struct nfit_test *t)
1023 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1022 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
1024 dcr->header.length = sizeof(struct acpi_nfit_control_region); 1023 dcr->header.length = sizeof(struct acpi_nfit_control_region);
1025 dcr->region_index = 3+1; 1024 dcr->region_index = 3+1;
1026 dcr->vendor_id = 0xabcd; 1025 dcr_common_init(dcr);
1027 dcr->device_id = 0;
1028 dcr->revision_id = 1;
1029 dcr->serial_number = ~handle[3]; 1026 dcr->serial_number = ~handle[3];
1030 dcr->code = NFIT_FIC_BLK; 1027 dcr->code = NFIT_FIC_BLK;
1031 dcr->windows = 1; 1028 dcr->windows = 1;
@@ -1042,9 +1039,7 @@ static void nfit_test0_setup(struct nfit_test *t)
1042 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1039 dcr->header.length = offsetof(struct acpi_nfit_control_region,
1043 window_size); 1040 window_size);
1044 dcr->region_index = 4+1; 1041 dcr->region_index = 4+1;
1045 dcr->vendor_id = 0xabcd; 1042 dcr_common_init(dcr);
1046 dcr->device_id = 0;
1047 dcr->revision_id = 1;
1048 dcr->serial_number = ~handle[0]; 1043 dcr->serial_number = ~handle[0];
1049 dcr->code = NFIT_FIC_BYTEN; 1044 dcr->code = NFIT_FIC_BYTEN;
1050 dcr->windows = 0; 1045 dcr->windows = 0;
@@ -1056,9 +1051,7 @@ static void nfit_test0_setup(struct nfit_test *t)
1056 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1051 dcr->header.length = offsetof(struct acpi_nfit_control_region,
1057 window_size); 1052 window_size);
1058 dcr->region_index = 5+1; 1053 dcr->region_index = 5+1;
1059 dcr->vendor_id = 0xabcd; 1054 dcr_common_init(dcr);
1060 dcr->device_id = 0;
1061 dcr->revision_id = 1;
1062 dcr->serial_number = ~handle[1]; 1055 dcr->serial_number = ~handle[1];
1063 dcr->code = NFIT_FIC_BYTEN; 1056 dcr->code = NFIT_FIC_BYTEN;
1064 dcr->windows = 0; 1057 dcr->windows = 0;
@@ -1070,9 +1063,7 @@ static void nfit_test0_setup(struct nfit_test *t)
1070 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1063 dcr->header.length = offsetof(struct acpi_nfit_control_region,
1071 window_size); 1064 window_size);
1072 dcr->region_index = 6+1; 1065 dcr->region_index = 6+1;
1073 dcr->vendor_id = 0xabcd; 1066 dcr_common_init(dcr);
1074 dcr->device_id = 0;
1075 dcr->revision_id = 1;
1076 dcr->serial_number = ~handle[2]; 1067 dcr->serial_number = ~handle[2];
1077 dcr->code = NFIT_FIC_BYTEN; 1068 dcr->code = NFIT_FIC_BYTEN;
1078 dcr->windows = 0; 1069 dcr->windows = 0;
@@ -1084,9 +1075,7 @@ static void nfit_test0_setup(struct nfit_test *t)
1084 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1075 dcr->header.length = offsetof(struct acpi_nfit_control_region,
1085 window_size); 1076 window_size);
1086 dcr->region_index = 7+1; 1077 dcr->region_index = 7+1;
1087 dcr->vendor_id = 0xabcd; 1078 dcr_common_init(dcr);
1088 dcr->device_id = 0;
1089 dcr->revision_id = 1;
1090 dcr->serial_number = ~handle[3]; 1079 dcr->serial_number = ~handle[3];
1091 dcr->code = NFIT_FIC_BYTEN; 1080 dcr->code = NFIT_FIC_BYTEN;
1092 dcr->windows = 0; 1081 dcr->windows = 0;
@@ -1141,45 +1130,47 @@ static void nfit_test0_setup(struct nfit_test *t)
1141 /* flush0 (dimm0) */ 1130 /* flush0 (dimm0) */
1142 flush = nfit_buf + offset; 1131 flush = nfit_buf + offset;
1143 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1132 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
1144 flush->header.length = sizeof(struct acpi_nfit_flush_address); 1133 flush->header.length = flush_hint_size;
1145 flush->device_handle = handle[0]; 1134 flush->device_handle = handle[0];
1146 flush->hint_count = 1; 1135 flush->hint_count = NUM_HINTS;
1147 flush->hint_address[0] = t->flush_dma[0]; 1136 for (i = 0; i < NUM_HINTS; i++)
1137 flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64);
1148 1138
1149 /* flush1 (dimm1) */ 1139 /* flush1 (dimm1) */
1150 flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1; 1140 flush = nfit_buf + offset + flush_hint_size * 1;
1151 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1141 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
1152 flush->header.length = sizeof(struct acpi_nfit_flush_address); 1142 flush->header.length = flush_hint_size;
1153 flush->device_handle = handle[1]; 1143 flush->device_handle = handle[1];
1154 flush->hint_count = 1; 1144 flush->hint_count = NUM_HINTS;
1155 flush->hint_address[0] = t->flush_dma[1]; 1145 for (i = 0; i < NUM_HINTS; i++)
1146 flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64);
1156 1147
1157 /* flush2 (dimm2) */ 1148 /* flush2 (dimm2) */
1158 flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2; 1149 flush = nfit_buf + offset + flush_hint_size * 2;
1159 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1150 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
1160 flush->header.length = sizeof(struct acpi_nfit_flush_address); 1151 flush->header.length = flush_hint_size;
1161 flush->device_handle = handle[2]; 1152 flush->device_handle = handle[2];
1162 flush->hint_count = 1; 1153 flush->hint_count = NUM_HINTS;
1163 flush->hint_address[0] = t->flush_dma[2]; 1154 for (i = 0; i < NUM_HINTS; i++)
1155 flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64);
1164 1156
1165 /* flush3 (dimm3) */ 1157 /* flush3 (dimm3) */
1166 flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3; 1158 flush = nfit_buf + offset + flush_hint_size * 3;
1167 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1159 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
1168 flush->header.length = sizeof(struct acpi_nfit_flush_address); 1160 flush->header.length = flush_hint_size;
1169 flush->device_handle = handle[3]; 1161 flush->device_handle = handle[3];
1170 flush->hint_count = 1; 1162 flush->hint_count = NUM_HINTS;
1171 flush->hint_address[0] = t->flush_dma[3]; 1163 for (i = 0; i < NUM_HINTS; i++)
1164 flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
1172 1165
1173 if (t->setup_hotplug) { 1166 if (t->setup_hotplug) {
1174 offset = offset + sizeof(struct acpi_nfit_flush_address) * 4; 1167 offset = offset + flush_hint_size * 4;
1175 /* dcr-descriptor4: blk */ 1168 /* dcr-descriptor4: blk */
1176 dcr = nfit_buf + offset; 1169 dcr = nfit_buf + offset;
1177 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1170 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
1178 dcr->header.length = sizeof(struct acpi_nfit_control_region); 1171 dcr->header.length = sizeof(struct acpi_nfit_control_region);
1179 dcr->region_index = 8+1; 1172 dcr->region_index = 8+1;
1180 dcr->vendor_id = 0xabcd; 1173 dcr_common_init(dcr);
1181 dcr->device_id = 0;
1182 dcr->revision_id = 1;
1183 dcr->serial_number = ~handle[4]; 1174 dcr->serial_number = ~handle[4];
1184 dcr->code = NFIT_FIC_BLK; 1175 dcr->code = NFIT_FIC_BLK;
1185 dcr->windows = 1; 1176 dcr->windows = 1;
@@ -1196,9 +1187,7 @@ static void nfit_test0_setup(struct nfit_test *t)
1196 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1187 dcr->header.length = offsetof(struct acpi_nfit_control_region,
1197 window_size); 1188 window_size);
1198 dcr->region_index = 9+1; 1189 dcr->region_index = 9+1;
1199 dcr->vendor_id = 0xabcd; 1190 dcr_common_init(dcr);
1200 dcr->device_id = 0;
1201 dcr->revision_id = 1;
1202 dcr->serial_number = ~handle[4]; 1191 dcr->serial_number = ~handle[4];
1203 dcr->code = NFIT_FIC_BYTEN; 1192 dcr->code = NFIT_FIC_BYTEN;
1204 dcr->windows = 0; 1193 dcr->windows = 0;
@@ -1300,10 +1289,12 @@ static void nfit_test0_setup(struct nfit_test *t)
1300 /* flush3 (dimm4) */ 1289 /* flush3 (dimm4) */
1301 flush = nfit_buf + offset; 1290 flush = nfit_buf + offset;
1302 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1291 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
1303 flush->header.length = sizeof(struct acpi_nfit_flush_address); 1292 flush->header.length = flush_hint_size;
1304 flush->device_handle = handle[4]; 1293 flush->device_handle = handle[4];
1305 flush->hint_count = 1; 1294 flush->hint_count = NUM_HINTS;
1306 flush->hint_address[0] = t->flush_dma[4]; 1295 for (i = 0; i < NUM_HINTS; i++)
1296 flush->hint_address[i] = t->flush_dma[4]
1297 + i * sizeof(u64);
1307 } 1298 }
1308 1299
1309 post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE); 1300 post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE);
@@ -1339,7 +1330,16 @@ static void nfit_test1_setup(struct nfit_test *t)
1339 spa->address = t->spa_set_dma[0]; 1330 spa->address = t->spa_set_dma[0];
1340 spa->length = SPA2_SIZE; 1331 spa->length = SPA2_SIZE;
1341 1332
1342 offset += sizeof(*spa); 1333 /* virtual cd region */
1334 spa = nfit_buf + sizeof(*spa);
1335 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
1336 spa->header.length = sizeof(*spa);
1337 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
1338 spa->range_index = 0;
1339 spa->address = t->spa_set_dma[1];
1340 spa->length = SPA_VCD_SIZE;
1341
1342 offset += sizeof(*spa) * 2;
1343 /* mem-region0 (spa0, dimm0) */ 1343 /* mem-region0 (spa0, dimm0) */
1344 memdev = nfit_buf + offset; 1344 memdev = nfit_buf + offset;
1345 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; 1345 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1365,9 +1365,7 @@ static void nfit_test1_setup(struct nfit_test *t)
1365 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1365 dcr->header.length = offsetof(struct acpi_nfit_control_region,
1366 window_size); 1366 window_size);
1367 dcr->region_index = 0+1; 1367 dcr->region_index = 0+1;
1368 dcr->vendor_id = 0xabcd; 1368 dcr_common_init(dcr);
1369 dcr->device_id = 0;
1370 dcr->revision_id = 1;
1371 dcr->serial_number = ~0; 1369 dcr->serial_number = ~0;
1372 dcr->code = NFIT_FIC_BYTE; 1370 dcr->code = NFIT_FIC_BYTE;
1373 dcr->windows = 0; 1371 dcr->windows = 0;
@@ -1462,20 +1460,16 @@ static int nfit_test_probe(struct platform_device *pdev)
1462 nfit_test->setup(nfit_test); 1460 nfit_test->setup(nfit_test);
1463 acpi_desc = &nfit_test->acpi_desc; 1461 acpi_desc = &nfit_test->acpi_desc;
1464 acpi_nfit_desc_init(acpi_desc, &pdev->dev); 1462 acpi_nfit_desc_init(acpi_desc, &pdev->dev);
1465 acpi_desc->nfit = nfit_test->nfit_buf;
1466 acpi_desc->blk_do_io = nfit_test_blk_do_io; 1463 acpi_desc->blk_do_io = nfit_test_blk_do_io;
1467 nd_desc = &acpi_desc->nd_desc; 1464 nd_desc = &acpi_desc->nd_desc;
1468 nd_desc->provider_name = NULL; 1465 nd_desc->provider_name = NULL;
1466 nd_desc->module = THIS_MODULE;
1469 nd_desc->ndctl = nfit_test_ctl; 1467 nd_desc->ndctl = nfit_test_ctl;
1470 acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc);
1471 if (!acpi_desc->nvdimm_bus)
1472 return -ENXIO;
1473 1468
1474 rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); 1469 rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
1475 if (rc) { 1470 nfit_test->nfit_size);
1476 nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 1471 if (rc)
1477 return rc; 1472 return rc;
1478 }
1479 1473
1480 if (nfit_test->setup != nfit_test0_setup) 1474 if (nfit_test->setup != nfit_test0_setup)
1481 return 0; 1475 return 0;
@@ -1483,22 +1477,16 @@ static int nfit_test_probe(struct platform_device *pdev)
1483 nfit_test->setup_hotplug = 1; 1477 nfit_test->setup_hotplug = 1;
1484 nfit_test->setup(nfit_test); 1478 nfit_test->setup(nfit_test);
1485 1479
1486 rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); 1480 rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
1487 if (rc) { 1481 nfit_test->nfit_size);
1488 nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 1482 if (rc)
1489 return rc; 1483 return rc;
1490 }
1491 1484
1492 return 0; 1485 return 0;
1493} 1486}
1494 1487
1495static int nfit_test_remove(struct platform_device *pdev) 1488static int nfit_test_remove(struct platform_device *pdev)
1496{ 1489{
1497 struct nfit_test *nfit_test = to_nfit_test(&pdev->dev);
1498 struct acpi_nfit_desc *acpi_desc = &nfit_test->acpi_desc;
1499
1500 nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
1501
1502 return 0; 1490 return 0;
1503} 1491}
1504 1492
@@ -1523,12 +1511,6 @@ static struct platform_driver nfit_test_driver = {
1523 .id_table = nfit_test_id, 1511 .id_table = nfit_test_id,
1524}; 1512};
1525 1513
1526#ifdef CONFIG_CMA_SIZE_MBYTES
1527#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
1528#else
1529#define CMA_SIZE_MBYTES 0
1530#endif
1531
1532static __init int nfit_test_init(void) 1514static __init int nfit_test_init(void)
1533{ 1515{
1534 int rc, i; 1516 int rc, i;
@@ -1538,7 +1520,6 @@ static __init int nfit_test_init(void)
1538 for (i = 0; i < NUM_NFITS; i++) { 1520 for (i = 0; i < NUM_NFITS; i++) {
1539 struct nfit_test *nfit_test; 1521 struct nfit_test *nfit_test;
1540 struct platform_device *pdev; 1522 struct platform_device *pdev;
1541 static int once;
1542 1523
1543 nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL); 1524 nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL);
1544 if (!nfit_test) { 1525 if (!nfit_test) {
@@ -1577,20 +1558,6 @@ static __init int nfit_test_init(void)
1577 goto err_register; 1558 goto err_register;
1578 1559
1579 instances[i] = nfit_test; 1560 instances[i] = nfit_test;
1580
1581 if (!once++) {
1582 dma_addr_t dma;
1583 void *buf;
1584
1585 buf = dma_alloc_coherent(&pdev->dev, SZ_128M, &dma,
1586 GFP_KERNEL);
1587 if (!buf) {
1588 rc = -ENOMEM;
1589 dev_warn(&pdev->dev, "need 128M of free cma\n");
1590 goto err_register;
1591 }
1592 dma_free_coherent(&pdev->dev, SZ_128M, buf, dma);
1593 }
1594 } 1561 }
1595 1562
1596 rc = platform_driver_register(&nfit_test_driver); 1563 rc = platform_driver_register(&nfit_test_driver);
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index 96c5e16d7db9..9f18e2a4a862 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -12,6 +12,7 @@
12 */ 12 */
13#ifndef __NFIT_TEST_H__ 13#ifndef __NFIT_TEST_H__
14#define __NFIT_TEST_H__ 14#define __NFIT_TEST_H__
15#include <linux/list.h>
15 16
16struct nfit_test_resource { 17struct nfit_test_resource {
17 struct list_head list; 18 struct list_head list;
@@ -26,4 +27,5 @@ void __iomem *__wrap_ioremap_nocache(resource_size_t offset,
26void __wrap_iounmap(volatile void __iomem *addr); 27void __wrap_iounmap(volatile void __iomem *addr);
27void nfit_test_setup(nfit_test_lookup_fn lookup); 28void nfit_test_setup(nfit_test_lookup_fn lookup);
28void nfit_test_teardown(void); 29void nfit_test_teardown(void);
30struct nfit_test_resource *get_nfit_res(resource_size_t resource);
29#endif 31#endif