aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-02-06 13:41:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-02-06 13:41:33 -0500
commit3ff1b28caaff1d66d2be7e6eb7c56f78e9046fbb (patch)
tree32d75a6db7f4985d37a9cfb7f1a1270963cfa404
parent105cf3c8c6264dce4bcdab877feb8037bc4109b1 (diff)
parentee95f4059a833839bf52972191b2d4c3d3cec552 (diff)
Merge tag 'libnvdimm-for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Ross Zwisler: - Require struct page by default for filesystem DAX to remove a number of surprising failure cases. This includes failures with direct I/O, gdb and fork(2). - Add support for the new Platform Capabilities Structure added to the NFIT in ACPI 6.2a. This new table tells us whether the platform supports flushing of CPU and memory controller caches on unexpected power loss events. - Revamp vmem_altmap and dev_pagemap handling to clean up code and better support future future PCI P2P uses. - Deprecate the ND_IOCTL_SMART_THRESHOLD command whose payload has become out-of-sync with recent versions of the NVDIMM_FAMILY_INTEL spec, and instead rely on the generic ND_CMD_CALL approach used by the two other IOCTL families, NVDIMM_FAMILY_{HPE,MSFT}. - Enhance nfit_test so we can test some of the new things added in version 1.6 of the DSM specification. This includes testing firmware download and simulating the Last Shutdown State (LSS) status. * tag 'libnvdimm-for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (37 commits) libnvdimm, namespace: remove redundant initialization of 'nd_mapping' acpi, nfit: fix register dimm error handling libnvdimm, namespace: make min namespace size 4K tools/testing/nvdimm: force nfit_test to depend on instrumented modules libnvdimm/nfit_test: adding support for unit testing enable LSS status libnvdimm/nfit_test: add firmware download emulation nfit-test: Add platform cap support from ACPI 6.2a to test libnvdimm: expose platform persistence attribute for nd_region acpi: nfit: add persistent memory control flag for nd_region acpi: nfit: Add support for detect platform CPU cache flush on power loss device-dax: Fix trailing semicolon libnvdimm, btt: fix uninitialized err_lock dax: require 'struct page' by default for filesystem dax ext2: auto disable dax instead of failing mount ext4: auto disable dax instead of failing mount mm, dax: introduce pfn_t_special() mm: Fix devm_memremap_pages() collision handling mm: Fix memory size alignment in devm_memremap_pages_release() memremap: merge find_dev_pagemap into get_dev_pagemap memremap: change devm_memremap_pages interface to use struct dev_pagemap ...
-rw-r--r--arch/arm64/mm/mmu.c9
-rw-r--r--arch/ia64/mm/discontig.c6
-rw-r--r--arch/ia64/mm/init.c18
-rw-r--r--arch/powerpc/mm/init_64.c17
-rw-r--r--arch/powerpc/mm/mem.c11
-rw-r--r--arch/s390/mm/init.c7
-rw-r--r--arch/s390/mm/vmem.c6
-rw-r--r--arch/sh/mm/init.c10
-rw-r--r--arch/sparc/mm/init_64.c5
-rw-r--r--arch/x86/mm/init_32.c9
-rw-r--r--arch/x86/mm/init_64.c94
-rw-r--r--drivers/acpi/nfit/core.c26
-rw-r--r--drivers/acpi/nfit/nfit.h1
-rw-r--r--drivers/dax/device.c2
-rw-r--r--drivers/dax/pmem.c20
-rw-r--r--drivers/dax/super.c10
-rw-r--r--drivers/nvdimm/btt.c2
-rw-r--r--drivers/nvdimm/bus.c3
-rw-r--r--drivers/nvdimm/namespace_devs.c2
-rw-r--r--drivers/nvdimm/nd.h9
-rw-r--r--drivers/nvdimm/pfn_devs.c27
-rw-r--r--drivers/nvdimm/pmem.c41
-rw-r--r--drivers/nvdimm/pmem.h1
-rw-r--r--drivers/nvdimm/region_devs.c13
-rw-r--r--drivers/s390/block/Kconfig1
-rw-r--r--drivers/s390/block/dcssblk.c3
-rw-r--r--fs/Kconfig7
-rw-r--r--fs/ext2/super.c7
-rw-r--r--fs/ext4/super.c9
-rw-r--r--include/linux/libnvdimm.h11
-rw-r--r--include/linux/memory_hotplug.h29
-rw-r--r--include/linux/memremap.h77
-rw-r--r--include/linux/mm.h22
-rw-r--r--include/linux/pfn_t.h13
-rw-r--r--include/uapi/linux/ndctl.h56
-rw-r--r--kernel/memremap.c174
-rw-r--r--mm/gup.c7
-rw-r--r--mm/hmm.c13
-rw-r--r--mm/memory.c16
-rw-r--r--mm/memory_hotplug.c39
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/sparse-vmemmap.c67
-rw-r--r--mm/sparse.c43
-rw-r--r--tools/testing/nvdimm/Kbuild4
-rw-r--r--tools/testing/nvdimm/acpi_nfit_test.c8
-rw-r--r--tools/testing/nvdimm/device_dax_test.c8
-rw-r--r--tools/testing/nvdimm/libnvdimm_test.c8
-rw-r--r--tools/testing/nvdimm/pmem_test.c8
-rw-r--r--tools/testing/nvdimm/test/iomap.c7
-rw-r--r--tools/testing/nvdimm/test/nfit.c498
-rw-r--r--tools/testing/nvdimm/test/nfit_test.h134
-rw-r--r--tools/testing/nvdimm/watermark.h21
52 files changed, 1120 insertions, 525 deletions
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index b44992ec9643..4e369dfb83b1 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -685,12 +685,14 @@ int kern_addr_valid(unsigned long addr)
685} 685}
686#ifdef CONFIG_SPARSEMEM_VMEMMAP 686#ifdef CONFIG_SPARSEMEM_VMEMMAP
687#if !ARM64_SWAPPER_USES_SECTION_MAPS 687#if !ARM64_SWAPPER_USES_SECTION_MAPS
688int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 688int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
689 struct vmem_altmap *altmap)
689{ 690{
690 return vmemmap_populate_basepages(start, end, node); 691 return vmemmap_populate_basepages(start, end, node);
691} 692}
692#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ 693#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
693int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 694int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
695 struct vmem_altmap *altmap)
694{ 696{
695 unsigned long addr = start; 697 unsigned long addr = start;
696 unsigned long next; 698 unsigned long next;
@@ -725,7 +727,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
725 return 0; 727 return 0;
726} 728}
727#endif /* CONFIG_ARM64_64K_PAGES */ 729#endif /* CONFIG_ARM64_64K_PAGES */
728void vmemmap_free(unsigned long start, unsigned long end) 730void vmemmap_free(unsigned long start, unsigned long end,
731 struct vmem_altmap *altmap)
729{ 732{
730} 733}
731#endif /* CONFIG_SPARSEMEM_VMEMMAP */ 734#endif /* CONFIG_SPARSEMEM_VMEMMAP */
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index ac46f0d60b66..7d9bd20319ff 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -754,12 +754,14 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
754#endif 754#endif
755 755
756#ifdef CONFIG_SPARSEMEM_VMEMMAP 756#ifdef CONFIG_SPARSEMEM_VMEMMAP
757int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 757int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
758 struct vmem_altmap *altmap)
758{ 759{
759 return vmemmap_populate_basepages(start, end, node); 760 return vmemmap_populate_basepages(start, end, node);
760} 761}
761 762
762void vmemmap_free(unsigned long start, unsigned long end) 763void vmemmap_free(unsigned long start, unsigned long end,
764 struct vmem_altmap *altmap)
763{ 765{
764} 766}
765#endif 767#endif
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 7af4e05bb61e..18278b448530 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -501,7 +501,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
501 if (map_start < map_end) 501 if (map_start < map_end)
502 memmap_init_zone((unsigned long)(map_end - map_start), 502 memmap_init_zone((unsigned long)(map_end - map_start),
503 args->nid, args->zone, page_to_pfn(map_start), 503 args->nid, args->zone, page_to_pfn(map_start),
504 MEMMAP_EARLY); 504 MEMMAP_EARLY, NULL);
505 return 0; 505 return 0;
506} 506}
507 507
@@ -509,9 +509,10 @@ void __meminit
509memmap_init (unsigned long size, int nid, unsigned long zone, 509memmap_init (unsigned long size, int nid, unsigned long zone,
510 unsigned long start_pfn) 510 unsigned long start_pfn)
511{ 511{
512 if (!vmem_map) 512 if (!vmem_map) {
513 memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY); 513 memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY,
514 else { 514 NULL);
515 } else {
515 struct page *start; 516 struct page *start;
516 struct memmap_init_callback_data args; 517 struct memmap_init_callback_data args;
517 518
@@ -647,13 +648,14 @@ mem_init (void)
647} 648}
648 649
649#ifdef CONFIG_MEMORY_HOTPLUG 650#ifdef CONFIG_MEMORY_HOTPLUG
650int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) 651int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
652 bool want_memblock)
651{ 653{
652 unsigned long start_pfn = start >> PAGE_SHIFT; 654 unsigned long start_pfn = start >> PAGE_SHIFT;
653 unsigned long nr_pages = size >> PAGE_SHIFT; 655 unsigned long nr_pages = size >> PAGE_SHIFT;
654 int ret; 656 int ret;
655 657
656 ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); 658 ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
657 if (ret) 659 if (ret)
658 printk("%s: Problem encountered in __add_pages() as ret=%d\n", 660 printk("%s: Problem encountered in __add_pages() as ret=%d\n",
659 __func__, ret); 661 __func__, ret);
@@ -662,7 +664,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
662} 664}
663 665
664#ifdef CONFIG_MEMORY_HOTREMOVE 666#ifdef CONFIG_MEMORY_HOTREMOVE
665int arch_remove_memory(u64 start, u64 size) 667int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
666{ 668{
667 unsigned long start_pfn = start >> PAGE_SHIFT; 669 unsigned long start_pfn = start >> PAGE_SHIFT;
668 unsigned long nr_pages = size >> PAGE_SHIFT; 670 unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -670,7 +672,7 @@ int arch_remove_memory(u64 start, u64 size)
670 int ret; 672 int ret;
671 673
672 zone = page_zone(pfn_to_page(start_pfn)); 674 zone = page_zone(pfn_to_page(start_pfn));
673 ret = __remove_pages(zone, start_pfn, nr_pages); 675 ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
674 if (ret) 676 if (ret)
675 pr_warn("%s: Problem encountered in __remove_pages() as" 677 pr_warn("%s: Problem encountered in __remove_pages() as"
676 " ret=%d\n", __func__, ret); 678 " ret=%d\n", __func__, ret);
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index f6eb7e8f4c93..fdb424a29f03 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -183,7 +183,8 @@ static __meminit void vmemmap_list_populate(unsigned long phys,
183 vmemmap_list = vmem_back; 183 vmemmap_list = vmem_back;
184} 184}
185 185
186int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 186int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
187 struct vmem_altmap *altmap)
187{ 188{
188 unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; 189 unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
189 190
@@ -193,17 +194,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
193 pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); 194 pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
194 195
195 for (; start < end; start += page_size) { 196 for (; start < end; start += page_size) {
196 struct vmem_altmap *altmap;
197 void *p; 197 void *p;
198 int rc; 198 int rc;
199 199
200 if (vmemmap_populated(start, page_size)) 200 if (vmemmap_populated(start, page_size))
201 continue; 201 continue;
202 202
203 /* altmap lookups only work at section boundaries */ 203 if (altmap)
204 altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start)); 204 p = altmap_alloc_block_buf(page_size, altmap);
205 205 else
206 p = __vmemmap_alloc_block_buf(page_size, node, altmap); 206 p = vmemmap_alloc_block_buf(page_size, node);
207 if (!p) 207 if (!p)
208 return -ENOMEM; 208 return -ENOMEM;
209 209
@@ -256,7 +256,8 @@ static unsigned long vmemmap_list_free(unsigned long start)
256 return vmem_back->phys; 256 return vmem_back->phys;
257} 257}
258 258
259void __ref vmemmap_free(unsigned long start, unsigned long end) 259void __ref vmemmap_free(unsigned long start, unsigned long end,
260 struct vmem_altmap *altmap)
260{ 261{
261 unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; 262 unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
262 unsigned long page_order = get_order(page_size); 263 unsigned long page_order = get_order(page_size);
@@ -267,7 +268,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
267 268
268 for (; start < end; start += page_size) { 269 for (; start < end; start += page_size) {
269 unsigned long nr_pages, addr; 270 unsigned long nr_pages, addr;
270 struct vmem_altmap *altmap;
271 struct page *section_base; 271 struct page *section_base;
272 struct page *page; 272 struct page *page;
273 273
@@ -287,7 +287,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
287 section_base = pfn_to_page(vmemmap_section_start(start)); 287 section_base = pfn_to_page(vmemmap_section_start(start));
288 nr_pages = 1 << page_order; 288 nr_pages = 1 << page_order;
289 289
290 altmap = to_vmem_altmap((unsigned long) section_base);
291 if (altmap) { 290 if (altmap) {
292 vmem_altmap_free(altmap, nr_pages); 291 vmem_altmap_free(altmap, nr_pages);
293 } else if (PageReserved(page)) { 292 } else if (PageReserved(page)) {
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 1281c6eb3a85..fe8c61149fb8 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -127,7 +127,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
127 return -ENODEV; 127 return -ENODEV;
128} 128}
129 129
130int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) 130int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
131 bool want_memblock)
131{ 132{
132 unsigned long start_pfn = start >> PAGE_SHIFT; 133 unsigned long start_pfn = start >> PAGE_SHIFT;
133 unsigned long nr_pages = size >> PAGE_SHIFT; 134 unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -143,15 +144,14 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
143 return -EFAULT; 144 return -EFAULT;
144 } 145 }
145 146
146 return __add_pages(nid, start_pfn, nr_pages, want_memblock); 147 return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
147} 148}
148 149
149#ifdef CONFIG_MEMORY_HOTREMOVE 150#ifdef CONFIG_MEMORY_HOTREMOVE
150int arch_remove_memory(u64 start, u64 size) 151int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
151{ 152{
152 unsigned long start_pfn = start >> PAGE_SHIFT; 153 unsigned long start_pfn = start >> PAGE_SHIFT;
153 unsigned long nr_pages = size >> PAGE_SHIFT; 154 unsigned long nr_pages = size >> PAGE_SHIFT;
154 struct vmem_altmap *altmap;
155 struct page *page; 155 struct page *page;
156 int ret; 156 int ret;
157 157
@@ -160,11 +160,10 @@ int arch_remove_memory(u64 start, u64 size)
160 * when querying the zone. 160 * when querying the zone.
161 */ 161 */
162 page = pfn_to_page(start_pfn); 162 page = pfn_to_page(start_pfn);
163 altmap = to_vmem_altmap((unsigned long) page);
164 if (altmap) 163 if (altmap)
165 page += vmem_altmap_offset(altmap); 164 page += vmem_altmap_offset(altmap);
166 165
167 ret = __remove_pages(page_zone(page), start_pfn, nr_pages); 166 ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
168 if (ret) 167 if (ret)
169 return ret; 168 return ret;
170 169
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 671535e64aba..3fa3e5323612 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -222,7 +222,8 @@ device_initcall(s390_cma_mem_init);
222 222
223#endif /* CONFIG_CMA */ 223#endif /* CONFIG_CMA */
224 224
225int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) 225int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
226 bool want_memblock)
226{ 227{
227 unsigned long start_pfn = PFN_DOWN(start); 228 unsigned long start_pfn = PFN_DOWN(start);
228 unsigned long size_pages = PFN_DOWN(size); 229 unsigned long size_pages = PFN_DOWN(size);
@@ -232,14 +233,14 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
232 if (rc) 233 if (rc)
233 return rc; 234 return rc;
234 235
235 rc = __add_pages(nid, start_pfn, size_pages, want_memblock); 236 rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock);
236 if (rc) 237 if (rc)
237 vmem_remove_mapping(start, size); 238 vmem_remove_mapping(start, size);
238 return rc; 239 return rc;
239} 240}
240 241
241#ifdef CONFIG_MEMORY_HOTREMOVE 242#ifdef CONFIG_MEMORY_HOTREMOVE
242int arch_remove_memory(u64 start, u64 size) 243int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
243{ 244{
244 /* 245 /*
245 * There is no hardware or firmware interface which could trigger a 246 * There is no hardware or firmware interface which could trigger a
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 3316d463fc29..db55561c5981 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -211,7 +211,8 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
211/* 211/*
212 * Add a backed mem_map array to the virtual mem_map array. 212 * Add a backed mem_map array to the virtual mem_map array.
213 */ 213 */
214int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 214int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
215 struct vmem_altmap *altmap)
215{ 216{
216 unsigned long pgt_prot, sgt_prot; 217 unsigned long pgt_prot, sgt_prot;
217 unsigned long address = start; 218 unsigned long address = start;
@@ -296,7 +297,8 @@ out:
296 return ret; 297 return ret;
297} 298}
298 299
299void vmemmap_free(unsigned long start, unsigned long end) 300void vmemmap_free(unsigned long start, unsigned long end,
301 struct vmem_altmap *altmap)
300{ 302{
301} 303}
302 304
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index bf726af5f1a5..ce0bbaa7e404 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -485,20 +485,20 @@ void free_initrd_mem(unsigned long start, unsigned long end)
485#endif 485#endif
486 486
487#ifdef CONFIG_MEMORY_HOTPLUG 487#ifdef CONFIG_MEMORY_HOTPLUG
488int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) 488int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
489 bool want_memblock)
489{ 490{
490 unsigned long start_pfn = PFN_DOWN(start); 491 unsigned long start_pfn = PFN_DOWN(start);
491 unsigned long nr_pages = size >> PAGE_SHIFT; 492 unsigned long nr_pages = size >> PAGE_SHIFT;
492 int ret; 493 int ret;
493 494
494 /* We only have ZONE_NORMAL, so this is easy.. */ 495 /* We only have ZONE_NORMAL, so this is easy.. */
495 ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); 496 ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
496 if (unlikely(ret)) 497 if (unlikely(ret))
497 printk("%s: Failed, __add_pages() == %d\n", __func__, ret); 498 printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
498 499
499 return ret; 500 return ret;
500} 501}
501EXPORT_SYMBOL_GPL(arch_add_memory);
502 502
503#ifdef CONFIG_NUMA 503#ifdef CONFIG_NUMA
504int memory_add_physaddr_to_nid(u64 addr) 504int memory_add_physaddr_to_nid(u64 addr)
@@ -510,7 +510,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
510#endif 510#endif
511 511
512#ifdef CONFIG_MEMORY_HOTREMOVE 512#ifdef CONFIG_MEMORY_HOTREMOVE
513int arch_remove_memory(u64 start, u64 size) 513int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
514{ 514{
515 unsigned long start_pfn = PFN_DOWN(start); 515 unsigned long start_pfn = PFN_DOWN(start);
516 unsigned long nr_pages = size >> PAGE_SHIFT; 516 unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -518,7 +518,7 @@ int arch_remove_memory(u64 start, u64 size)
518 int ret; 518 int ret;
519 519
520 zone = page_zone(pfn_to_page(start_pfn)); 520 zone = page_zone(pfn_to_page(start_pfn));
521 ret = __remove_pages(zone, start_pfn, nr_pages); 521 ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
522 if (unlikely(ret)) 522 if (unlikely(ret))
523 pr_warn("%s: Failed, __remove_pages() == %d\n", __func__, 523 pr_warn("%s: Failed, __remove_pages() == %d\n", __func__,
524 ret); 524 ret);
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 55ba62957e64..995f9490334d 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2628,7 +2628,7 @@ EXPORT_SYMBOL(_PAGE_CACHE);
2628 2628
2629#ifdef CONFIG_SPARSEMEM_VMEMMAP 2629#ifdef CONFIG_SPARSEMEM_VMEMMAP
2630int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, 2630int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
2631 int node) 2631 int node, struct vmem_altmap *altmap)
2632{ 2632{
2633 unsigned long pte_base; 2633 unsigned long pte_base;
2634 2634
@@ -2671,7 +2671,8 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
2671 return 0; 2671 return 0;
2672} 2672}
2673 2673
2674void vmemmap_free(unsigned long start, unsigned long end) 2674void vmemmap_free(unsigned long start, unsigned long end,
2675 struct vmem_altmap *altmap)
2675{ 2676{
2676} 2677}
2677#endif /* CONFIG_SPARSEMEM_VMEMMAP */ 2678#endif /* CONFIG_SPARSEMEM_VMEMMAP */
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 135c9a7898c7..79cb066f40c0 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -829,23 +829,24 @@ void __init mem_init(void)
829} 829}
830 830
831#ifdef CONFIG_MEMORY_HOTPLUG 831#ifdef CONFIG_MEMORY_HOTPLUG
832int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) 832int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
833 bool want_memblock)
833{ 834{
834 unsigned long start_pfn = start >> PAGE_SHIFT; 835 unsigned long start_pfn = start >> PAGE_SHIFT;
835 unsigned long nr_pages = size >> PAGE_SHIFT; 836 unsigned long nr_pages = size >> PAGE_SHIFT;
836 837
837 return __add_pages(nid, start_pfn, nr_pages, want_memblock); 838 return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
838} 839}
839 840
840#ifdef CONFIG_MEMORY_HOTREMOVE 841#ifdef CONFIG_MEMORY_HOTREMOVE
841int arch_remove_memory(u64 start, u64 size) 842int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
842{ 843{
843 unsigned long start_pfn = start >> PAGE_SHIFT; 844 unsigned long start_pfn = start >> PAGE_SHIFT;
844 unsigned long nr_pages = size >> PAGE_SHIFT; 845 unsigned long nr_pages = size >> PAGE_SHIFT;
845 struct zone *zone; 846 struct zone *zone;
846 847
847 zone = page_zone(pfn_to_page(start_pfn)); 848 zone = page_zone(pfn_to_page(start_pfn));
848 return __remove_pages(zone, start_pfn, nr_pages); 849 return __remove_pages(zone, start_pfn, nr_pages, altmap);
849} 850}
850#endif 851#endif
851#endif 852#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 4a837289f2ad..1ab42c852069 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -772,12 +772,12 @@ static void update_end_of_memory_vars(u64 start, u64 size)
772 } 772 }
773} 773}
774 774
775int add_pages(int nid, unsigned long start_pfn, 775int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
776 unsigned long nr_pages, bool want_memblock) 776 struct vmem_altmap *altmap, bool want_memblock)
777{ 777{
778 int ret; 778 int ret;
779 779
780 ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); 780 ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
781 WARN_ON_ONCE(ret); 781 WARN_ON_ONCE(ret);
782 782
783 /* update max_pfn, max_low_pfn and high_memory */ 783 /* update max_pfn, max_low_pfn and high_memory */
@@ -787,24 +787,24 @@ int add_pages(int nid, unsigned long start_pfn,
787 return ret; 787 return ret;
788} 788}
789 789
790int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) 790int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
791 bool want_memblock)
791{ 792{
792 unsigned long start_pfn = start >> PAGE_SHIFT; 793 unsigned long start_pfn = start >> PAGE_SHIFT;
793 unsigned long nr_pages = size >> PAGE_SHIFT; 794 unsigned long nr_pages = size >> PAGE_SHIFT;
794 795
795 init_memory_mapping(start, start + size); 796 init_memory_mapping(start, start + size);
796 797
797 return add_pages(nid, start_pfn, nr_pages, want_memblock); 798 return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
798} 799}
799EXPORT_SYMBOL_GPL(arch_add_memory);
800 800
801#define PAGE_INUSE 0xFD 801#define PAGE_INUSE 0xFD
802 802
803static void __meminit free_pagetable(struct page *page, int order) 803static void __meminit free_pagetable(struct page *page, int order,
804 struct vmem_altmap *altmap)
804{ 805{
805 unsigned long magic; 806 unsigned long magic;
806 unsigned int nr_pages = 1 << order; 807 unsigned int nr_pages = 1 << order;
807 struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page);
808 808
809 if (altmap) { 809 if (altmap) {
810 vmem_altmap_free(altmap, nr_pages); 810 vmem_altmap_free(altmap, nr_pages);
@@ -826,7 +826,8 @@ static void __meminit free_pagetable(struct page *page, int order)
826 free_pages((unsigned long)page_address(page), order); 826 free_pages((unsigned long)page_address(page), order);
827} 827}
828 828
829static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) 829static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd,
830 struct vmem_altmap *altmap)
830{ 831{
831 pte_t *pte; 832 pte_t *pte;
832 int i; 833 int i;
@@ -838,13 +839,14 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
838 } 839 }
839 840
840 /* free a pte talbe */ 841 /* free a pte talbe */
841 free_pagetable(pmd_page(*pmd), 0); 842 free_pagetable(pmd_page(*pmd), 0, altmap);
842 spin_lock(&init_mm.page_table_lock); 843 spin_lock(&init_mm.page_table_lock);
843 pmd_clear(pmd); 844 pmd_clear(pmd);
844 spin_unlock(&init_mm.page_table_lock); 845 spin_unlock(&init_mm.page_table_lock);
845} 846}
846 847
847static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) 848static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud,
849 struct vmem_altmap *altmap)
848{ 850{
849 pmd_t *pmd; 851 pmd_t *pmd;
850 int i; 852 int i;
@@ -856,13 +858,14 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
856 } 858 }
857 859
858 /* free a pmd talbe */ 860 /* free a pmd talbe */
859 free_pagetable(pud_page(*pud), 0); 861 free_pagetable(pud_page(*pud), 0, altmap);
860 spin_lock(&init_mm.page_table_lock); 862 spin_lock(&init_mm.page_table_lock);
861 pud_clear(pud); 863 pud_clear(pud);
862 spin_unlock(&init_mm.page_table_lock); 864 spin_unlock(&init_mm.page_table_lock);
863} 865}
864 866
865static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) 867static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
868 struct vmem_altmap *altmap)
866{ 869{
867 pud_t *pud; 870 pud_t *pud;
868 int i; 871 int i;
@@ -874,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
874 } 877 }
875 878
876 /* free a pud talbe */ 879 /* free a pud talbe */
877 free_pagetable(p4d_page(*p4d), 0); 880 free_pagetable(p4d_page(*p4d), 0, altmap);
878 spin_lock(&init_mm.page_table_lock); 881 spin_lock(&init_mm.page_table_lock);
879 p4d_clear(p4d); 882 p4d_clear(p4d);
880 spin_unlock(&init_mm.page_table_lock); 883 spin_unlock(&init_mm.page_table_lock);
@@ -882,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
882 885
883static void __meminit 886static void __meminit
884remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, 887remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
885 bool direct) 888 struct vmem_altmap *altmap, bool direct)
886{ 889{
887 unsigned long next, pages = 0; 890 unsigned long next, pages = 0;
888 pte_t *pte; 891 pte_t *pte;
@@ -913,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
913 * freed when offlining, or simplely not in use. 916 * freed when offlining, or simplely not in use.
914 */ 917 */
915 if (!direct) 918 if (!direct)
916 free_pagetable(pte_page(*pte), 0); 919 free_pagetable(pte_page(*pte), 0, altmap);
917 920
918 spin_lock(&init_mm.page_table_lock); 921 spin_lock(&init_mm.page_table_lock);
919 pte_clear(&init_mm, addr, pte); 922 pte_clear(&init_mm, addr, pte);
@@ -936,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
936 939
937 page_addr = page_address(pte_page(*pte)); 940 page_addr = page_address(pte_page(*pte));
938 if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { 941 if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
939 free_pagetable(pte_page(*pte), 0); 942 free_pagetable(pte_page(*pte), 0, altmap);
940 943
941 spin_lock(&init_mm.page_table_lock); 944 spin_lock(&init_mm.page_table_lock);
942 pte_clear(&init_mm, addr, pte); 945 pte_clear(&init_mm, addr, pte);
@@ -953,7 +956,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
953 956
954static void __meminit 957static void __meminit
955remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, 958remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
956 bool direct) 959 bool direct, struct vmem_altmap *altmap)
957{ 960{
958 unsigned long next, pages = 0; 961 unsigned long next, pages = 0;
959 pte_t *pte_base; 962 pte_t *pte_base;
@@ -972,7 +975,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
972 IS_ALIGNED(next, PMD_SIZE)) { 975 IS_ALIGNED(next, PMD_SIZE)) {
973 if (!direct) 976 if (!direct)
974 free_pagetable(pmd_page(*pmd), 977 free_pagetable(pmd_page(*pmd),
975 get_order(PMD_SIZE)); 978 get_order(PMD_SIZE),
979 altmap);
976 980
977 spin_lock(&init_mm.page_table_lock); 981 spin_lock(&init_mm.page_table_lock);
978 pmd_clear(pmd); 982 pmd_clear(pmd);
@@ -986,7 +990,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
986 if (!memchr_inv(page_addr, PAGE_INUSE, 990 if (!memchr_inv(page_addr, PAGE_INUSE,
987 PMD_SIZE)) { 991 PMD_SIZE)) {
988 free_pagetable(pmd_page(*pmd), 992 free_pagetable(pmd_page(*pmd),
989 get_order(PMD_SIZE)); 993 get_order(PMD_SIZE),
994 altmap);
990 995
991 spin_lock(&init_mm.page_table_lock); 996 spin_lock(&init_mm.page_table_lock);
992 pmd_clear(pmd); 997 pmd_clear(pmd);
@@ -998,8 +1003,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
998 } 1003 }
999 1004
1000 pte_base = (pte_t *)pmd_page_vaddr(*pmd); 1005 pte_base = (pte_t *)pmd_page_vaddr(*pmd);
1001 remove_pte_table(pte_base, addr, next, direct); 1006 remove_pte_table(pte_base, addr, next, altmap, direct);
1002 free_pte_table(pte_base, pmd); 1007 free_pte_table(pte_base, pmd, altmap);
1003 } 1008 }
1004 1009
1005 /* Call free_pmd_table() in remove_pud_table(). */ 1010 /* Call free_pmd_table() in remove_pud_table(). */
@@ -1009,7 +1014,7 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
1009 1014
1010static void __meminit 1015static void __meminit
1011remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, 1016remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
1012 bool direct) 1017 struct vmem_altmap *altmap, bool direct)
1013{ 1018{
1014 unsigned long next, pages = 0; 1019 unsigned long next, pages = 0;
1015 pmd_t *pmd_base; 1020 pmd_t *pmd_base;
@@ -1028,7 +1033,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
1028 IS_ALIGNED(next, PUD_SIZE)) { 1033 IS_ALIGNED(next, PUD_SIZE)) {
1029 if (!direct) 1034 if (!direct)
1030 free_pagetable(pud_page(*pud), 1035 free_pagetable(pud_page(*pud),
1031 get_order(PUD_SIZE)); 1036 get_order(PUD_SIZE),
1037 altmap);
1032 1038
1033 spin_lock(&init_mm.page_table_lock); 1039 spin_lock(&init_mm.page_table_lock);
1034 pud_clear(pud); 1040 pud_clear(pud);
@@ -1042,7 +1048,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
1042 if (!memchr_inv(page_addr, PAGE_INUSE, 1048 if (!memchr_inv(page_addr, PAGE_INUSE,
1043 PUD_SIZE)) { 1049 PUD_SIZE)) {
1044 free_pagetable(pud_page(*pud), 1050 free_pagetable(pud_page(*pud),
1045 get_order(PUD_SIZE)); 1051 get_order(PUD_SIZE),
1052 altmap);
1046 1053
1047 spin_lock(&init_mm.page_table_lock); 1054 spin_lock(&init_mm.page_table_lock);
1048 pud_clear(pud); 1055 pud_clear(pud);
@@ -1054,8 +1061,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
1054 } 1061 }
1055 1062
1056 pmd_base = pmd_offset(pud, 0); 1063 pmd_base = pmd_offset(pud, 0);
1057 remove_pmd_table(pmd_base, addr, next, direct); 1064 remove_pmd_table(pmd_base, addr, next, direct, altmap);
1058 free_pmd_table(pmd_base, pud); 1065 free_pmd_table(pmd_base, pud, altmap);
1059 } 1066 }
1060 1067
1061 if (direct) 1068 if (direct)
@@ -1064,7 +1071,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
1064 1071
1065static void __meminit 1072static void __meminit
1066remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, 1073remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
1067 bool direct) 1074 struct vmem_altmap *altmap, bool direct)
1068{ 1075{
1069 unsigned long next, pages = 0; 1076 unsigned long next, pages = 0;
1070 pud_t *pud_base; 1077 pud_t *pud_base;
@@ -1080,14 +1087,14 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
1080 BUILD_BUG_ON(p4d_large(*p4d)); 1087 BUILD_BUG_ON(p4d_large(*p4d));
1081 1088
1082 pud_base = pud_offset(p4d, 0); 1089 pud_base = pud_offset(p4d, 0);
1083 remove_pud_table(pud_base, addr, next, direct); 1090 remove_pud_table(pud_base, addr, next, altmap, direct);
1084 /* 1091 /*
1085 * For 4-level page tables we do not want to free PUDs, but in the 1092 * For 4-level page tables we do not want to free PUDs, but in the
1086 * 5-level case we should free them. This code will have to change 1093 * 5-level case we should free them. This code will have to change
1087 * to adapt for boot-time switching between 4 and 5 level page tables. 1094 * to adapt for boot-time switching between 4 and 5 level page tables.
1088 */ 1095 */
1089 if (CONFIG_PGTABLE_LEVELS == 5) 1096 if (CONFIG_PGTABLE_LEVELS == 5)
1090 free_pud_table(pud_base, p4d); 1097 free_pud_table(pud_base, p4d, altmap);
1091 } 1098 }
1092 1099
1093 if (direct) 1100 if (direct)
@@ -1096,7 +1103,8 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
1096 1103
1097/* start and end are both virtual address. */ 1104/* start and end are both virtual address. */
1098static void __meminit 1105static void __meminit
1099remove_pagetable(unsigned long start, unsigned long end, bool direct) 1106remove_pagetable(unsigned long start, unsigned long end, bool direct,
1107 struct vmem_altmap *altmap)
1100{ 1108{
1101 unsigned long next; 1109 unsigned long next;
1102 unsigned long addr; 1110 unsigned long addr;
@@ -1111,15 +1119,16 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
1111 continue; 1119 continue;
1112 1120
1113 p4d = p4d_offset(pgd, 0); 1121 p4d = p4d_offset(pgd, 0);
1114 remove_p4d_table(p4d, addr, next, direct); 1122 remove_p4d_table(p4d, addr, next, altmap, direct);
1115 } 1123 }
1116 1124
1117 flush_tlb_all(); 1125 flush_tlb_all();
1118} 1126}
1119 1127
1120void __ref vmemmap_free(unsigned long start, unsigned long end) 1128void __ref vmemmap_free(unsigned long start, unsigned long end,
1129 struct vmem_altmap *altmap)
1121{ 1130{
1122 remove_pagetable(start, end, false); 1131 remove_pagetable(start, end, false, altmap);
1123} 1132}
1124 1133
1125#ifdef CONFIG_MEMORY_HOTREMOVE 1134#ifdef CONFIG_MEMORY_HOTREMOVE
@@ -1129,24 +1138,22 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end)
1129 start = (unsigned long)__va(start); 1138 start = (unsigned long)__va(start);
1130 end = (unsigned long)__va(end); 1139 end = (unsigned long)__va(end);
1131 1140
1132 remove_pagetable(start, end, true); 1141 remove_pagetable(start, end, true, NULL);
1133} 1142}
1134 1143
1135int __ref arch_remove_memory(u64 start, u64 size) 1144int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
1136{ 1145{
1137 unsigned long start_pfn = start >> PAGE_SHIFT; 1146 unsigned long start_pfn = start >> PAGE_SHIFT;
1138 unsigned long nr_pages = size >> PAGE_SHIFT; 1147 unsigned long nr_pages = size >> PAGE_SHIFT;
1139 struct page *page = pfn_to_page(start_pfn); 1148 struct page *page = pfn_to_page(start_pfn);
1140 struct vmem_altmap *altmap;
1141 struct zone *zone; 1149 struct zone *zone;
1142 int ret; 1150 int ret;
1143 1151
1144 /* With altmap the first mapped page is offset from @start */ 1152 /* With altmap the first mapped page is offset from @start */
1145 altmap = to_vmem_altmap((unsigned long) page);
1146 if (altmap) 1153 if (altmap)
1147 page += vmem_altmap_offset(altmap); 1154 page += vmem_altmap_offset(altmap);
1148 zone = page_zone(page); 1155 zone = page_zone(page);
1149 ret = __remove_pages(zone, start_pfn, nr_pages); 1156 ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
1150 WARN_ON_ONCE(ret); 1157 WARN_ON_ONCE(ret);
1151 kernel_physical_mapping_remove(start, start + size); 1158 kernel_physical_mapping_remove(start, start + size);
1152 1159
@@ -1378,7 +1385,10 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
1378 if (pmd_none(*pmd)) { 1385 if (pmd_none(*pmd)) {
1379 void *p; 1386 void *p;
1380 1387
1381 p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); 1388 if (altmap)
1389 p = altmap_alloc_block_buf(PMD_SIZE, altmap);
1390 else
1391 p = vmemmap_alloc_block_buf(PMD_SIZE, node);
1382 if (p) { 1392 if (p) {
1383 pte_t entry; 1393 pte_t entry;
1384 1394
@@ -1411,9 +1421,9 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
1411 return 0; 1421 return 0;
1412} 1422}
1413 1423
1414int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 1424int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
1425 struct vmem_altmap *altmap)
1415{ 1426{
1416 struct vmem_altmap *altmap = to_vmem_altmap(start);
1417 int err; 1427 int err;
1418 1428
1419 if (boot_cpu_has(X86_FEATURE_PSE)) 1429 if (boot_cpu_has(X86_FEATURE_PSE))
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index abeb4df4f22e..bbe48ad20886 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -838,6 +838,18 @@ static bool add_flush(struct acpi_nfit_desc *acpi_desc,
838 return true; 838 return true;
839} 839}
840 840
841static bool add_platform_cap(struct acpi_nfit_desc *acpi_desc,
842 struct acpi_nfit_capabilities *pcap)
843{
844 struct device *dev = acpi_desc->dev;
845 u32 mask;
846
847 mask = (1 << (pcap->highest_capability + 1)) - 1;
848 acpi_desc->platform_cap = pcap->capabilities & mask;
849 dev_dbg(dev, "%s: cap: %#x\n", __func__, acpi_desc->platform_cap);
850 return true;
851}
852
841static void *add_table(struct acpi_nfit_desc *acpi_desc, 853static void *add_table(struct acpi_nfit_desc *acpi_desc,
842 struct nfit_table_prev *prev, void *table, const void *end) 854 struct nfit_table_prev *prev, void *table, const void *end)
843{ 855{
@@ -883,6 +895,10 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc,
883 case ACPI_NFIT_TYPE_SMBIOS: 895 case ACPI_NFIT_TYPE_SMBIOS:
884 dev_dbg(dev, "%s: smbios\n", __func__); 896 dev_dbg(dev, "%s: smbios\n", __func__);
885 break; 897 break;
898 case ACPI_NFIT_TYPE_CAPABILITIES:
899 if (!add_platform_cap(acpi_desc, table))
900 return err;
901 break;
886 default: 902 default:
887 dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type); 903 dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type);
888 break; 904 break;
@@ -1867,6 +1883,9 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
1867 struct kernfs_node *nfit_kernfs; 1883 struct kernfs_node *nfit_kernfs;
1868 1884
1869 nvdimm = nfit_mem->nvdimm; 1885 nvdimm = nfit_mem->nvdimm;
1886 if (!nvdimm)
1887 continue;
1888
1870 nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit"); 1889 nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit");
1871 if (nfit_kernfs) 1890 if (nfit_kernfs)
1872 nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs, 1891 nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs,
@@ -2656,6 +2675,12 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
2656 else 2675 else
2657 ndr_desc->numa_node = NUMA_NO_NODE; 2676 ndr_desc->numa_node = NUMA_NO_NODE;
2658 2677
2678 if(acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_CACHE_FLUSH)
2679 set_bit(ND_REGION_PERSIST_CACHE, &ndr_desc->flags);
2680
2681 if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_MEM_FLUSH)
2682 set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc->flags);
2683
2659 list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { 2684 list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
2660 struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev; 2685 struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
2661 struct nd_mapping_desc *mapping; 2686 struct nd_mapping_desc *mapping;
@@ -3464,6 +3489,7 @@ static __init int nfit_init(void)
3464 BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9); 3489 BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9);
3465 BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80); 3490 BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
3466 BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40); 3491 BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
3492 BUILD_BUG_ON(sizeof(struct acpi_nfit_capabilities) != 16);
3467 3493
3468 guid_parse(UUID_VOLATILE_MEMORY, &nfit_uuid[NFIT_SPA_VOLATILE]); 3494 guid_parse(UUID_VOLATILE_MEMORY, &nfit_uuid[NFIT_SPA_VOLATILE]);
3469 guid_parse(UUID_PERSISTENT_MEMORY, &nfit_uuid[NFIT_SPA_PM]); 3495 guid_parse(UUID_PERSISTENT_MEMORY, &nfit_uuid[NFIT_SPA_PM]);
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index f0cf18b2da8b..50d36e166d70 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -202,6 +202,7 @@ struct acpi_nfit_desc {
202 unsigned long dimm_cmd_force_en; 202 unsigned long dimm_cmd_force_en;
203 unsigned long bus_cmd_force_en; 203 unsigned long bus_cmd_force_en;
204 unsigned long bus_nfit_cmd_force_en; 204 unsigned long bus_nfit_cmd_force_en;
205 unsigned int platform_cap;
205 int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, 206 int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
206 void *iobuf, u64 len, int rw); 207 void *iobuf, u64 len, int rw);
207}; 208};
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 7b0bf825c4e7..2137dbc29877 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -133,7 +133,7 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
133 dax_region->base = addr; 133 dax_region->base = addr;
134 if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { 134 if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
135 kfree(dax_region); 135 kfree(dax_region);
136 return NULL;; 136 return NULL;
137 } 137 }
138 138
139 kref_get(&dax_region->kref); 139 kref_get(&dax_region->kref);
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index 8d8c852ba8f2..31b6ecce4c64 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -21,6 +21,7 @@
21struct dax_pmem { 21struct dax_pmem {
22 struct device *dev; 22 struct device *dev;
23 struct percpu_ref ref; 23 struct percpu_ref ref;
24 struct dev_pagemap pgmap;
24 struct completion cmp; 25 struct completion cmp;
25}; 26};
26 27
@@ -69,20 +70,23 @@ static int dax_pmem_probe(struct device *dev)
69 struct nd_namespace_common *ndns; 70 struct nd_namespace_common *ndns;
70 struct nd_dax *nd_dax = to_nd_dax(dev); 71 struct nd_dax *nd_dax = to_nd_dax(dev);
71 struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; 72 struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
72 struct vmem_altmap __altmap, *altmap = NULL;
73 73
74 ndns = nvdimm_namespace_common_probe(dev); 74 ndns = nvdimm_namespace_common_probe(dev);
75 if (IS_ERR(ndns)) 75 if (IS_ERR(ndns))
76 return PTR_ERR(ndns); 76 return PTR_ERR(ndns);
77 nsio = to_nd_namespace_io(&ndns->dev); 77 nsio = to_nd_namespace_io(&ndns->dev);
78 78
79 dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
80 if (!dax_pmem)
81 return -ENOMEM;
82
79 /* parse the 'pfn' info block via ->rw_bytes */ 83 /* parse the 'pfn' info block via ->rw_bytes */
80 rc = devm_nsio_enable(dev, nsio); 84 rc = devm_nsio_enable(dev, nsio);
81 if (rc) 85 if (rc)
82 return rc; 86 return rc;
83 altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap); 87 rc = nvdimm_setup_pfn(nd_pfn, &dax_pmem->pgmap);
84 if (IS_ERR(altmap)) 88 if (rc)
85 return PTR_ERR(altmap); 89 return rc;
86 devm_nsio_disable(dev, nsio); 90 devm_nsio_disable(dev, nsio);
87 91
88 pfn_sb = nd_pfn->pfn_sb; 92 pfn_sb = nd_pfn->pfn_sb;
@@ -94,10 +98,6 @@ static int dax_pmem_probe(struct device *dev)
94 return -EBUSY; 98 return -EBUSY;
95 } 99 }
96 100
97 dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
98 if (!dax_pmem)
99 return -ENOMEM;
100
101 dax_pmem->dev = dev; 101 dax_pmem->dev = dev;
102 init_completion(&dax_pmem->cmp); 102 init_completion(&dax_pmem->cmp);
103 rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0, 103 rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0,
@@ -110,7 +110,8 @@ static int dax_pmem_probe(struct device *dev)
110 if (rc) 110 if (rc)
111 return rc; 111 return rc;
112 112
113 addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap); 113 dax_pmem->pgmap.ref = &dax_pmem->ref;
114 addr = devm_memremap_pages(dev, &dax_pmem->pgmap);
114 if (IS_ERR(addr)) 115 if (IS_ERR(addr))
115 return PTR_ERR(addr); 116 return PTR_ERR(addr);
116 117
@@ -120,6 +121,7 @@ static int dax_pmem_probe(struct device *dev)
120 return rc; 121 return rc;
121 122
122 /* adjust the dax_region resource to the start of data */ 123 /* adjust the dax_region resource to the start of data */
124 memcpy(&res, &dax_pmem->pgmap.res, sizeof(res));
123 res.start += le64_to_cpu(pfn_sb->dataoff); 125 res.start += le64_to_cpu(pfn_sb->dataoff);
124 126
125 rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", &region_id, &id); 127 rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", &region_id, &id);
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 3ec804672601..473af694ad1c 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -15,6 +15,7 @@
15#include <linux/mount.h> 15#include <linux/mount.h>
16#include <linux/magic.h> 16#include <linux/magic.h>
17#include <linux/genhd.h> 17#include <linux/genhd.h>
18#include <linux/pfn_t.h>
18#include <linux/cdev.h> 19#include <linux/cdev.h>
19#include <linux/hash.h> 20#include <linux/hash.h>
20#include <linux/slab.h> 21#include <linux/slab.h>
@@ -123,6 +124,15 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
123 return len < 0 ? len : -EIO; 124 return len < 0 ? len : -EIO;
124 } 125 }
125 126
127 if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn))
128 || pfn_t_devmap(pfn))
129 /* pass */;
130 else {
131 pr_debug("VFS (%s): error: dax support not enabled\n",
132 sb->s_id);
133 return -EOPNOTSUPP;
134 }
135
126 return 0; 136 return 0;
127} 137}
128EXPORT_SYMBOL_GPL(__bdev_dax_supported); 138EXPORT_SYMBOL_GPL(__bdev_dax_supported);
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index c586bcdb5190..2ef544f10ec8 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -753,6 +753,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
753 return NULL; 753 return NULL;
754 arena->nd_btt = btt->nd_btt; 754 arena->nd_btt = btt->nd_btt;
755 arena->sector_size = btt->sector_size; 755 arena->sector_size = btt->sector_size;
756 mutex_init(&arena->err_lock);
756 757
757 if (!size) 758 if (!size)
758 return arena; 759 return arena;
@@ -891,7 +892,6 @@ static int discover_arenas(struct btt *btt)
891 goto out; 892 goto out;
892 } 893 }
893 894
894 mutex_init(&arena->err_lock);
895 ret = btt_freelist_init(arena); 895 ret = btt_freelist_init(arena);
896 if (ret) 896 if (ret)
897 goto out; 897 goto out;
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 0a5e6cd758fe..78eabc3a1ab1 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -1142,9 +1142,6 @@ int __init nvdimm_bus_init(void)
1142{ 1142{
1143 int rc; 1143 int rc;
1144 1144
1145 BUILD_BUG_ON(sizeof(struct nd_smart_payload) != 128);
1146 BUILD_BUG_ON(sizeof(struct nd_smart_threshold_payload) != 8);
1147
1148 rc = bus_register(&nvdimm_bus_type); 1145 rc = bus_register(&nvdimm_bus_type);
1149 if (rc) 1146 if (rc)
1150 return rc; 1147 return rc;
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index bb3ba8cf24d4..658ada497be0 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -2408,7 +2408,7 @@ static struct device **scan_labels(struct nd_region *nd_region)
2408 2408
2409static struct device **create_namespaces(struct nd_region *nd_region) 2409static struct device **create_namespaces(struct nd_region *nd_region)
2410{ 2410{
2411 struct nd_mapping *nd_mapping = &nd_region->mapping[0]; 2411 struct nd_mapping *nd_mapping;
2412 struct device **devs; 2412 struct device **devs;
2413 int i; 2413 int i;
2414 2414
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index e958f3724c41..8d6375ee0fda 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -368,15 +368,14 @@ unsigned int pmem_sector_size(struct nd_namespace_common *ndns);
368void nvdimm_badblocks_populate(struct nd_region *nd_region, 368void nvdimm_badblocks_populate(struct nd_region *nd_region,
369 struct badblocks *bb, const struct resource *res); 369 struct badblocks *bb, const struct resource *res);
370#if IS_ENABLED(CONFIG_ND_CLAIM) 370#if IS_ENABLED(CONFIG_ND_CLAIM)
371struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, 371int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap);
372 struct resource *res, struct vmem_altmap *altmap);
373int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio); 372int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio);
374void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio); 373void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio);
375#else 374#else
376static inline struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, 375static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
377 struct resource *res, struct vmem_altmap *altmap) 376 struct dev_pagemap *pgmap)
378{ 377{
379 return ERR_PTR(-ENXIO); 378 return -ENXIO;
380} 379}
381static inline int devm_nsio_enable(struct device *dev, 380static inline int devm_nsio_enable(struct device *dev,
382 struct nd_namespace_io *nsio) 381 struct nd_namespace_io *nsio)
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 2adada1a5855..f5c4e8c6e29d 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -542,9 +542,10 @@ static unsigned long init_altmap_reserve(resource_size_t base)
542 return reserve; 542 return reserve;
543} 543}
544 544
545static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, 545static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
546 struct resource *res, struct vmem_altmap *altmap)
547{ 546{
547 struct resource *res = &pgmap->res;
548 struct vmem_altmap *altmap = &pgmap->altmap;
548 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 549 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
549 u64 offset = le64_to_cpu(pfn_sb->dataoff); 550 u64 offset = le64_to_cpu(pfn_sb->dataoff);
550 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 551 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
@@ -561,11 +562,13 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
561 res->start += start_pad; 562 res->start += start_pad;
562 res->end -= end_trunc; 563 res->end -= end_trunc;
563 564
565 pgmap->type = MEMORY_DEVICE_HOST;
566
564 if (nd_pfn->mode == PFN_MODE_RAM) { 567 if (nd_pfn->mode == PFN_MODE_RAM) {
565 if (offset < SZ_8K) 568 if (offset < SZ_8K)
566 return ERR_PTR(-EINVAL); 569 return -EINVAL;
567 nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); 570 nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
568 altmap = NULL; 571 pgmap->altmap_valid = false;
569 } else if (nd_pfn->mode == PFN_MODE_PMEM) { 572 } else if (nd_pfn->mode == PFN_MODE_PMEM) {
570 nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res) 573 nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res)
571 - offset) / PAGE_SIZE); 574 - offset) / PAGE_SIZE);
@@ -577,10 +580,11 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
577 memcpy(altmap, &__altmap, sizeof(*altmap)); 580 memcpy(altmap, &__altmap, sizeof(*altmap));
578 altmap->free = PHYS_PFN(offset - SZ_8K); 581 altmap->free = PHYS_PFN(offset - SZ_8K);
579 altmap->alloc = 0; 582 altmap->alloc = 0;
583 pgmap->altmap_valid = true;
580 } else 584 } else
581 return ERR_PTR(-ENXIO); 585 return -ENXIO;
582 586
583 return altmap; 587 return 0;
584} 588}
585 589
586static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) 590static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
@@ -708,19 +712,18 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
708 * Determine the effective resource range and vmem_altmap from an nd_pfn 712 * Determine the effective resource range and vmem_altmap from an nd_pfn
709 * instance. 713 * instance.
710 */ 714 */
711struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, 715int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
712 struct resource *res, struct vmem_altmap *altmap)
713{ 716{
714 int rc; 717 int rc;
715 718
716 if (!nd_pfn->uuid || !nd_pfn->ndns) 719 if (!nd_pfn->uuid || !nd_pfn->ndns)
717 return ERR_PTR(-ENODEV); 720 return -ENODEV;
718 721
719 rc = nd_pfn_init(nd_pfn); 722 rc = nd_pfn_init(nd_pfn);
720 if (rc) 723 if (rc)
721 return ERR_PTR(rc); 724 return rc;
722 725
723 /* we need a valid pfn_sb before we can init a vmem_altmap */ 726 /* we need a valid pfn_sb before we can init a dev_pagemap */
724 return __nvdimm_setup_pfn(nd_pfn, res, altmap); 727 return __nvdimm_setup_pfn(nd_pfn, pgmap);
725} 728}
726EXPORT_SYMBOL_GPL(nvdimm_setup_pfn); 729EXPORT_SYMBOL_GPL(nvdimm_setup_pfn);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 7fbc5c5dc8e1..10041ac4032c 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -35,6 +35,7 @@
35#include "pmem.h" 35#include "pmem.h"
36#include "pfn.h" 36#include "pfn.h"
37#include "nd.h" 37#include "nd.h"
38#include "nd-core.h"
38 39
39static struct device *to_dev(struct pmem_device *pmem) 40static struct device *to_dev(struct pmem_device *pmem)
40{ 41{
@@ -298,34 +299,34 @@ static int pmem_attach_disk(struct device *dev,
298{ 299{
299 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 300 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
300 struct nd_region *nd_region = to_nd_region(dev->parent); 301 struct nd_region *nd_region = to_nd_region(dev->parent);
301 struct vmem_altmap __altmap, *altmap = NULL;
302 int nid = dev_to_node(dev), fua, wbc; 302 int nid = dev_to_node(dev), fua, wbc;
303 struct resource *res = &nsio->res; 303 struct resource *res = &nsio->res;
304 struct resource bb_res;
304 struct nd_pfn *nd_pfn = NULL; 305 struct nd_pfn *nd_pfn = NULL;
305 struct dax_device *dax_dev; 306 struct dax_device *dax_dev;
306 struct nd_pfn_sb *pfn_sb; 307 struct nd_pfn_sb *pfn_sb;
307 struct pmem_device *pmem; 308 struct pmem_device *pmem;
308 struct resource pfn_res;
309 struct request_queue *q; 309 struct request_queue *q;
310 struct device *gendev; 310 struct device *gendev;
311 struct gendisk *disk; 311 struct gendisk *disk;
312 void *addr; 312 void *addr;
313 int rc;
314
315 pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
316 if (!pmem)
317 return -ENOMEM;
313 318
314 /* while nsio_rw_bytes is active, parse a pfn info block if present */ 319 /* while nsio_rw_bytes is active, parse a pfn info block if present */
315 if (is_nd_pfn(dev)) { 320 if (is_nd_pfn(dev)) {
316 nd_pfn = to_nd_pfn(dev); 321 nd_pfn = to_nd_pfn(dev);
317 altmap = nvdimm_setup_pfn(nd_pfn, &pfn_res, &__altmap); 322 rc = nvdimm_setup_pfn(nd_pfn, &pmem->pgmap);
318 if (IS_ERR(altmap)) 323 if (rc)
319 return PTR_ERR(altmap); 324 return rc;
320 } 325 }
321 326
322 /* we're attaching a block device, disable raw namespace access */ 327 /* we're attaching a block device, disable raw namespace access */
323 devm_nsio_disable(dev, nsio); 328 devm_nsio_disable(dev, nsio);
324 329
325 pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
326 if (!pmem)
327 return -ENOMEM;
328
329 dev_set_drvdata(dev, pmem); 330 dev_set_drvdata(dev, pmem);
330 pmem->phys_addr = res->start; 331 pmem->phys_addr = res->start;
331 pmem->size = resource_size(res); 332 pmem->size = resource_size(res);
@@ -334,7 +335,8 @@ static int pmem_attach_disk(struct device *dev,
334 dev_warn(dev, "unable to guarantee persistence of writes\n"); 335 dev_warn(dev, "unable to guarantee persistence of writes\n");
335 fua = 0; 336 fua = 0;
336 } 337 }
337 wbc = nvdimm_has_cache(nd_region); 338 wbc = nvdimm_has_cache(nd_region) &&
339 !test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags);
338 340
339 if (!devm_request_mem_region(dev, res->start, resource_size(res), 341 if (!devm_request_mem_region(dev, res->start, resource_size(res),
340 dev_name(&ndns->dev))) { 342 dev_name(&ndns->dev))) {
@@ -350,19 +352,22 @@ static int pmem_attach_disk(struct device *dev,
350 return -ENOMEM; 352 return -ENOMEM;
351 353
352 pmem->pfn_flags = PFN_DEV; 354 pmem->pfn_flags = PFN_DEV;
355 pmem->pgmap.ref = &q->q_usage_counter;
353 if (is_nd_pfn(dev)) { 356 if (is_nd_pfn(dev)) {
354 addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter, 357 addr = devm_memremap_pages(dev, &pmem->pgmap);
355 altmap);
356 pfn_sb = nd_pfn->pfn_sb; 358 pfn_sb = nd_pfn->pfn_sb;
357 pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); 359 pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
358 pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res); 360 pmem->pfn_pad = resource_size(res) -
361 resource_size(&pmem->pgmap.res);
359 pmem->pfn_flags |= PFN_MAP; 362 pmem->pfn_flags |= PFN_MAP;
360 res = &pfn_res; /* for badblocks populate */ 363 memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
361 res->start += pmem->data_offset; 364 bb_res.start += pmem->data_offset;
362 } else if (pmem_should_map_pages(dev)) { 365 } else if (pmem_should_map_pages(dev)) {
363 addr = devm_memremap_pages(dev, &nsio->res, 366 memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res));
364 &q->q_usage_counter, NULL); 367 pmem->pgmap.altmap_valid = false;
368 addr = devm_memremap_pages(dev, &pmem->pgmap);
365 pmem->pfn_flags |= PFN_MAP; 369 pmem->pfn_flags |= PFN_MAP;
370 memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
366 } else 371 } else
367 addr = devm_memremap(dev, pmem->phys_addr, 372 addr = devm_memremap(dev, pmem->phys_addr,
368 pmem->size, ARCH_MEMREMAP_PMEM); 373 pmem->size, ARCH_MEMREMAP_PMEM);
@@ -401,7 +406,7 @@ static int pmem_attach_disk(struct device *dev,
401 / 512); 406 / 512);
402 if (devm_init_badblocks(dev, &pmem->bb)) 407 if (devm_init_badblocks(dev, &pmem->bb))
403 return -ENOMEM; 408 return -ENOMEM;
404 nvdimm_badblocks_populate(nd_region, &pmem->bb, res); 409 nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res);
405 disk->bb = &pmem->bb; 410 disk->bb = &pmem->bb;
406 411
407 dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops); 412 dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops);
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
index 6a3cd2a10db6..a64ebc78b5df 100644
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@@ -22,6 +22,7 @@ struct pmem_device {
22 struct badblocks bb; 22 struct badblocks bb;
23 struct dax_device *dax_dev; 23 struct dax_device *dax_dev;
24 struct gendisk *disk; 24 struct gendisk *disk;
25 struct dev_pagemap pgmap;
25}; 26};
26 27
27long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, 28long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index abaf38c61220..e6d01911e092 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -528,6 +528,18 @@ static ssize_t resource_show(struct device *dev,
528} 528}
529static DEVICE_ATTR_RO(resource); 529static DEVICE_ATTR_RO(resource);
530 530
531static ssize_t persistence_domain_show(struct device *dev,
532 struct device_attribute *attr, char *buf)
533{
534 struct nd_region *nd_region = to_nd_region(dev);
535 unsigned long flags = nd_region->flags;
536
537 return sprintf(buf, "%s%s\n",
538 flags & BIT(ND_REGION_PERSIST_CACHE) ? "cpu_cache " : "",
539 flags & BIT(ND_REGION_PERSIST_MEMCTRL) ? "memory_controller " : "");
540}
541static DEVICE_ATTR_RO(persistence_domain);
542
531static struct attribute *nd_region_attributes[] = { 543static struct attribute *nd_region_attributes[] = {
532 &dev_attr_size.attr, 544 &dev_attr_size.attr,
533 &dev_attr_nstype.attr, 545 &dev_attr_nstype.attr,
@@ -543,6 +555,7 @@ static struct attribute *nd_region_attributes[] = {
543 &dev_attr_init_namespaces.attr, 555 &dev_attr_init_namespaces.attr,
544 &dev_attr_badblocks.attr, 556 &dev_attr_badblocks.attr,
545 &dev_attr_resource.attr, 557 &dev_attr_resource.attr,
558 &dev_attr_persistence_domain.attr,
546 NULL, 559 NULL,
547}; 560};
548 561
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index bc27d716aa6b..1444333210c7 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -16,6 +16,7 @@ config BLK_DEV_XPRAM
16config DCSSBLK 16config DCSSBLK
17 def_tristate m 17 def_tristate m
18 select DAX 18 select DAX
19 select FS_DAX_LIMITED
19 prompt "DCSSBLK support" 20 prompt "DCSSBLK support"
20 depends on S390 && BLOCK 21 depends on S390 && BLOCK
21 help 22 help
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 6aaefb780436..9cae08b36b80 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -916,7 +916,8 @@ __dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff,
916 916
917 dev_sz = dev_info->end - dev_info->start + 1; 917 dev_sz = dev_info->end - dev_info->start + 1;
918 *kaddr = (void *) dev_info->start + offset; 918 *kaddr = (void *) dev_info->start + offset;
919 *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV); 919 *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset),
920 PFN_DEV|PFN_SPECIAL);
920 921
921 return (dev_sz - offset) / PAGE_SIZE; 922 return (dev_sz - offset) / PAGE_SIZE;
922} 923}
diff --git a/fs/Kconfig b/fs/Kconfig
index 9774588da60e..bc821a86d965 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -58,6 +58,13 @@ config FS_DAX_PMD
58 depends on ZONE_DEVICE 58 depends on ZONE_DEVICE
59 depends on TRANSPARENT_HUGEPAGE 59 depends on TRANSPARENT_HUGEPAGE
60 60
61# Selected by DAX drivers that do not expect filesystem DAX to support
62# get_user_pages() of DAX mappings. I.e. "limited" indicates no support
63# for fork() of processes with MAP_SHARED mappings or support for
64# direct-I/O to a DAX mapping.
65config FS_DAX_LIMITED
66 bool
67
61endif # BLOCK 68endif # BLOCK
62 69
63# Posix ACL utility routines 70# Posix ACL utility routines
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index db5f9daa7780..0a638e79bf7c 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -962,8 +962,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
962 962
963 if (sbi->s_mount_opt & EXT2_MOUNT_DAX) { 963 if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
964 err = bdev_dax_supported(sb, blocksize); 964 err = bdev_dax_supported(sb, blocksize);
965 if (err) 965 if (err) {
966 goto failed_mount; 966 ext2_msg(sb, KERN_ERR,
967 "DAX unsupported by block device. Turning off DAX.");
968 sbi->s_mount_opt &= ~EXT2_MOUNT_DAX;
969 }
967 } 970 }
968 971
969 /* If the blocksize doesn't match, re-read the thing.. */ 972 /* If the blocksize doesn't match, re-read the thing.. */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 421222ec3509..39bf464c35f1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3712,11 +3712,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3712 if (ext4_has_feature_inline_data(sb)) { 3712 if (ext4_has_feature_inline_data(sb)) {
3713 ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem" 3713 ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
3714 " that may contain inline data"); 3714 " that may contain inline data");
3715 goto failed_mount; 3715 sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
3716 } 3716 }
3717 err = bdev_dax_supported(sb, blocksize); 3717 err = bdev_dax_supported(sb, blocksize);
3718 if (err) 3718 if (err) {
3719 goto failed_mount; 3719 ext4_msg(sb, KERN_ERR,
3720 "DAX unsupported by block device. Turning off DAX.");
3721 sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
3722 }
3720 } 3723 }
3721 3724
3722 if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) { 3725 if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index f8109ddb5ef1..ff855ed965fb 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -47,6 +47,17 @@ enum {
47 47
48 /* region flag indicating to direct-map persistent memory by default */ 48 /* region flag indicating to direct-map persistent memory by default */
49 ND_REGION_PAGEMAP = 0, 49 ND_REGION_PAGEMAP = 0,
50 /*
51 * Platform ensures entire CPU store data path is flushed to pmem on
52 * system power loss.
53 */
54 ND_REGION_PERSIST_CACHE = 1,
55 /*
56 * Platform provides mechanisms to automatically flush outstanding
57 * write data from memory controler to pmem on system power loss.
58 * (ADR)
59 */
60 ND_REGION_PERSIST_MEMCTRL = 2,
50 61
51 /* mark newly adjusted resources as requiring a label update */ 62 /* mark newly adjusted resources as requiring a label update */
52 DPA_RESOURCE_ADJUSTED = 1 << 0, 63 DPA_RESOURCE_ADJUSTED = 1 << 0,
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 58e110aee7ab..aba5f86eb038 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -13,6 +13,7 @@ struct pglist_data;
13struct mem_section; 13struct mem_section;
14struct memory_block; 14struct memory_block;
15struct resource; 15struct resource;
16struct vmem_altmap;
16 17
17#ifdef CONFIG_MEMORY_HOTPLUG 18#ifdef CONFIG_MEMORY_HOTPLUG
18/* 19/*
@@ -125,24 +126,26 @@ static inline bool movable_node_is_enabled(void)
125 126
126#ifdef CONFIG_MEMORY_HOTREMOVE 127#ifdef CONFIG_MEMORY_HOTREMOVE
127extern bool is_pageblock_removable_nolock(struct page *page); 128extern bool is_pageblock_removable_nolock(struct page *page);
128extern int arch_remove_memory(u64 start, u64 size); 129extern int arch_remove_memory(u64 start, u64 size,
130 struct vmem_altmap *altmap);
129extern int __remove_pages(struct zone *zone, unsigned long start_pfn, 131extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
130 unsigned long nr_pages); 132 unsigned long nr_pages, struct vmem_altmap *altmap);
131#endif /* CONFIG_MEMORY_HOTREMOVE */ 133#endif /* CONFIG_MEMORY_HOTREMOVE */
132 134
133/* reasonably generic interface to expand the physical pages */ 135/* reasonably generic interface to expand the physical pages */
134extern int __add_pages(int nid, unsigned long start_pfn, 136extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
135 unsigned long nr_pages, bool want_memblock); 137 struct vmem_altmap *altmap, bool want_memblock);
136 138
137#ifndef CONFIG_ARCH_HAS_ADD_PAGES 139#ifndef CONFIG_ARCH_HAS_ADD_PAGES
138static inline int add_pages(int nid, unsigned long start_pfn, 140static inline int add_pages(int nid, unsigned long start_pfn,
139 unsigned long nr_pages, bool want_memblock) 141 unsigned long nr_pages, struct vmem_altmap *altmap,
142 bool want_memblock)
140{ 143{
141 return __add_pages(nid, start_pfn, nr_pages, want_memblock); 144 return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
142} 145}
143#else /* ARCH_HAS_ADD_PAGES */ 146#else /* ARCH_HAS_ADD_PAGES */
144int add_pages(int nid, unsigned long start_pfn, 147int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
145 unsigned long nr_pages, bool want_memblock); 148 struct vmem_altmap *altmap, bool want_memblock);
146#endif /* ARCH_HAS_ADD_PAGES */ 149#endif /* ARCH_HAS_ADD_PAGES */
147 150
148#ifdef CONFIG_NUMA 151#ifdef CONFIG_NUMA
@@ -318,15 +321,17 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
318 void *arg, int (*func)(struct memory_block *, void *)); 321 void *arg, int (*func)(struct memory_block *, void *));
319extern int add_memory(int nid, u64 start, u64 size); 322extern int add_memory(int nid, u64 start, u64 size);
320extern int add_memory_resource(int nid, struct resource *resource, bool online); 323extern int add_memory_resource(int nid, struct resource *resource, bool online);
321extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock); 324extern int arch_add_memory(int nid, u64 start, u64 size,
325 struct vmem_altmap *altmap, bool want_memblock);
322extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, 326extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
323 unsigned long nr_pages); 327 unsigned long nr_pages, struct vmem_altmap *altmap);
324extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); 328extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
325extern bool is_memblock_offlined(struct memory_block *mem); 329extern bool is_memblock_offlined(struct memory_block *mem);
326extern void remove_memory(int nid, u64 start, u64 size); 330extern void remove_memory(int nid, u64 start, u64 size);
327extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn); 331extern int sparse_add_one_section(struct pglist_data *pgdat,
332 unsigned long start_pfn, struct vmem_altmap *altmap);
328extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, 333extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
329 unsigned long map_offset); 334 unsigned long map_offset, struct vmem_altmap *altmap);
330extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, 335extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
331 unsigned long pnum); 336 unsigned long pnum);
332extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, 337extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 10d23c367048..7b4899c06f49 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -26,18 +26,6 @@ struct vmem_altmap {
26 unsigned long alloc; 26 unsigned long alloc;
27}; 27};
28 28
29unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
30void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
31
32#ifdef CONFIG_ZONE_DEVICE
33struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
34#else
35static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
36{
37 return NULL;
38}
39#endif
40
41/* 29/*
42 * Specialize ZONE_DEVICE memory into multiple types each having differents 30 * Specialize ZONE_DEVICE memory into multiple types each having differents
43 * usage. 31 * usage.
@@ -125,8 +113,9 @@ typedef void (*dev_page_free_t)(struct page *page, void *data);
125struct dev_pagemap { 113struct dev_pagemap {
126 dev_page_fault_t page_fault; 114 dev_page_fault_t page_fault;
127 dev_page_free_t page_free; 115 dev_page_free_t page_free;
128 struct vmem_altmap *altmap; 116 struct vmem_altmap altmap;
129 const struct resource *res; 117 bool altmap_valid;
118 struct resource res;
130 struct percpu_ref *ref; 119 struct percpu_ref *ref;
131 struct device *dev; 120 struct device *dev;
132 void *data; 121 void *data;
@@ -134,15 +123,17 @@ struct dev_pagemap {
134}; 123};
135 124
136#ifdef CONFIG_ZONE_DEVICE 125#ifdef CONFIG_ZONE_DEVICE
137void *devm_memremap_pages(struct device *dev, struct resource *res, 126void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
138 struct percpu_ref *ref, struct vmem_altmap *altmap); 127struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
139struct dev_pagemap *find_dev_pagemap(resource_size_t phys); 128 struct dev_pagemap *pgmap);
129
130unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
131void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
140 132
141static inline bool is_zone_device_page(const struct page *page); 133static inline bool is_zone_device_page(const struct page *page);
142#else 134#else
143static inline void *devm_memremap_pages(struct device *dev, 135static inline void *devm_memremap_pages(struct device *dev,
144 struct resource *res, struct percpu_ref *ref, 136 struct dev_pagemap *pgmap)
145 struct vmem_altmap *altmap)
146{ 137{
147 /* 138 /*
148 * Fail attempts to call devm_memremap_pages() without 139 * Fail attempts to call devm_memremap_pages() without
@@ -153,11 +144,22 @@ static inline void *devm_memremap_pages(struct device *dev,
153 return ERR_PTR(-ENXIO); 144 return ERR_PTR(-ENXIO);
154} 145}
155 146
156static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys) 147static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
148 struct dev_pagemap *pgmap)
157{ 149{
158 return NULL; 150 return NULL;
159} 151}
160#endif 152
153static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
154{
155 return 0;
156}
157
158static inline void vmem_altmap_free(struct vmem_altmap *altmap,
159 unsigned long nr_pfns)
160{
161}
162#endif /* CONFIG_ZONE_DEVICE */
161 163
162#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) 164#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
163static inline bool is_device_private_page(const struct page *page) 165static inline bool is_device_private_page(const struct page *page)
@@ -173,39 +175,6 @@ static inline bool is_device_public_page(const struct page *page)
173} 175}
174#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ 176#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
175 177
176/**
177 * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
178 * @pfn: page frame number to lookup page_map
179 * @pgmap: optional known pgmap that already has a reference
180 *
181 * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the
182 * same mapping.
183 */
184static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
185 struct dev_pagemap *pgmap)
186{
187 const struct resource *res = pgmap ? pgmap->res : NULL;
188 resource_size_t phys = PFN_PHYS(pfn);
189
190 /*
191 * In the cached case we're already holding a live reference so
192 * we can simply do a blind increment
193 */
194 if (res && phys >= res->start && phys <= res->end) {
195 percpu_ref_get(pgmap->ref);
196 return pgmap;
197 }
198
199 /* fall back to slow path lookup */
200 rcu_read_lock();
201 pgmap = find_dev_pagemap(phys);
202 if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
203 pgmap = NULL;
204 rcu_read_unlock();
205
206 return pgmap;
207}
208
209static inline void put_dev_pagemap(struct dev_pagemap *pgmap) 178static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
210{ 179{
211 if (pgmap) 180 if (pgmap)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 173d2484f6e3..ad06d42adb1a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2075,8 +2075,8 @@ static inline void zero_resv_unavail(void) {}
2075#endif 2075#endif
2076 2076
2077extern void set_dma_reserve(unsigned long new_dma_reserve); 2077extern void set_dma_reserve(unsigned long new_dma_reserve);
2078extern void memmap_init_zone(unsigned long, int, unsigned long, 2078extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
2079 unsigned long, enum memmap_context); 2079 enum memmap_context, struct vmem_altmap *);
2080extern void setup_per_zone_wmarks(void); 2080extern void setup_per_zone_wmarks(void);
2081extern int __meminit init_per_zone_wmark_min(void); 2081extern int __meminit init_per_zone_wmark_min(void);
2082extern void mem_init(void); 2082extern void mem_init(void);
@@ -2544,7 +2544,8 @@ void sparse_mem_maps_populate_node(struct page **map_map,
2544 unsigned long map_count, 2544 unsigned long map_count,
2545 int nodeid); 2545 int nodeid);
2546 2546
2547struct page *sparse_mem_map_populate(unsigned long pnum, int nid); 2547struct page *sparse_mem_map_populate(unsigned long pnum, int nid,
2548 struct vmem_altmap *altmap);
2548pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); 2549pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
2549p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); 2550p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
2550pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); 2551pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
@@ -2552,20 +2553,17 @@ pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
2552pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); 2553pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
2553void *vmemmap_alloc_block(unsigned long size, int node); 2554void *vmemmap_alloc_block(unsigned long size, int node);
2554struct vmem_altmap; 2555struct vmem_altmap;
2555void *__vmemmap_alloc_block_buf(unsigned long size, int node, 2556void *vmemmap_alloc_block_buf(unsigned long size, int node);
2556 struct vmem_altmap *altmap); 2557void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap);
2557static inline void *vmemmap_alloc_block_buf(unsigned long size, int node)
2558{
2559 return __vmemmap_alloc_block_buf(size, node, NULL);
2560}
2561
2562void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); 2558void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
2563int vmemmap_populate_basepages(unsigned long start, unsigned long end, 2559int vmemmap_populate_basepages(unsigned long start, unsigned long end,
2564 int node); 2560 int node);
2565int vmemmap_populate(unsigned long start, unsigned long end, int node); 2561int vmemmap_populate(unsigned long start, unsigned long end, int node,
2562 struct vmem_altmap *altmap);
2566void vmemmap_populate_print_last(void); 2563void vmemmap_populate_print_last(void);
2567#ifdef CONFIG_MEMORY_HOTPLUG 2564#ifdef CONFIG_MEMORY_HOTPLUG
2568void vmemmap_free(unsigned long start, unsigned long end); 2565void vmemmap_free(unsigned long start, unsigned long end,
2566 struct vmem_altmap *altmap);
2569#endif 2567#endif
2570void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, 2568void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
2571 unsigned long nr_pages); 2569 unsigned long nr_pages);
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index 43b1d7648e82..a03c2642a87c 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -15,8 +15,10 @@
15#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2)) 15#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
16#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) 16#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
17#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4)) 17#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
18#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5))
18 19
19#define PFN_FLAGS_TRACE \ 20#define PFN_FLAGS_TRACE \
21 { PFN_SPECIAL, "SPECIAL" }, \
20 { PFN_SG_CHAIN, "SG_CHAIN" }, \ 22 { PFN_SG_CHAIN, "SG_CHAIN" }, \
21 { PFN_SG_LAST, "SG_LAST" }, \ 23 { PFN_SG_LAST, "SG_LAST" }, \
22 { PFN_DEV, "DEV" }, \ 24 { PFN_DEV, "DEV" }, \
@@ -120,4 +122,15 @@ pud_t pud_mkdevmap(pud_t pud);
120#endif 122#endif
121#endif /* __HAVE_ARCH_PTE_DEVMAP */ 123#endif /* __HAVE_ARCH_PTE_DEVMAP */
122 124
125#ifdef __HAVE_ARCH_PTE_SPECIAL
126static inline bool pfn_t_special(pfn_t pfn)
127{
128 return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL;
129}
130#else
131static inline bool pfn_t_special(pfn_t pfn)
132{
133 return false;
134}
135#endif /* __HAVE_ARCH_PTE_SPECIAL */
123#endif /* _LINUX_PFN_T_H_ */ 136#endif /* _LINUX_PFN_T_H_ */
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 3f03567631cb..7e27070b9440 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -15,54 +15,6 @@
15 15
16#include <linux/types.h> 16#include <linux/types.h>
17 17
18struct nd_cmd_smart {
19 __u32 status;
20 __u8 data[128];
21} __packed;
22
23#define ND_SMART_HEALTH_VALID (1 << 0)
24#define ND_SMART_SPARES_VALID (1 << 1)
25#define ND_SMART_USED_VALID (1 << 2)
26#define ND_SMART_TEMP_VALID (1 << 3)
27#define ND_SMART_CTEMP_VALID (1 << 4)
28#define ND_SMART_ALARM_VALID (1 << 9)
29#define ND_SMART_SHUTDOWN_VALID (1 << 10)
30#define ND_SMART_VENDOR_VALID (1 << 11)
31#define ND_SMART_SPARE_TRIP (1 << 0)
32#define ND_SMART_TEMP_TRIP (1 << 1)
33#define ND_SMART_CTEMP_TRIP (1 << 2)
34#define ND_SMART_NON_CRITICAL_HEALTH (1 << 0)
35#define ND_SMART_CRITICAL_HEALTH (1 << 1)
36#define ND_SMART_FATAL_HEALTH (1 << 2)
37
38struct nd_smart_payload {
39 __u32 flags;
40 __u8 reserved0[4];
41 __u8 health;
42 __u8 spares;
43 __u8 life_used;
44 __u8 alarm_flags;
45 __u16 temperature;
46 __u16 ctrl_temperature;
47 __u8 reserved1[15];
48 __u8 shutdown_state;
49 __u32 vendor_size;
50 __u8 vendor_data[92];
51} __packed;
52
53struct nd_cmd_smart_threshold {
54 __u32 status;
55 __u8 data[8];
56} __packed;
57
58struct nd_smart_threshold_payload {
59 __u8 alarm_control;
60 __u8 reserved0;
61 __u16 temperature;
62 __u8 spares;
63 __u8 reserved[3];
64} __packed;
65
66struct nd_cmd_dimm_flags { 18struct nd_cmd_dimm_flags {
67 __u32 status; 19 __u32 status;
68 __u32 flags; 20 __u32 flags;
@@ -211,12 +163,6 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
211 163
212#define ND_IOCTL 'N' 164#define ND_IOCTL 'N'
213 165
214#define ND_IOCTL_SMART _IOWR(ND_IOCTL, ND_CMD_SMART,\
215 struct nd_cmd_smart)
216
217#define ND_IOCTL_SMART_THRESHOLD _IOWR(ND_IOCTL, ND_CMD_SMART_THRESHOLD,\
218 struct nd_cmd_smart_threshold)
219
220#define ND_IOCTL_DIMM_FLAGS _IOWR(ND_IOCTL, ND_CMD_DIMM_FLAGS,\ 166#define ND_IOCTL_DIMM_FLAGS _IOWR(ND_IOCTL, ND_CMD_DIMM_FLAGS,\
221 struct nd_cmd_dimm_flags) 167 struct nd_cmd_dimm_flags)
222 168
@@ -263,7 +209,7 @@ enum nd_driver_flags {
263}; 209};
264 210
265enum { 211enum {
266 ND_MIN_NAMESPACE_SIZE = 0x00400000, 212 ND_MIN_NAMESPACE_SIZE = PAGE_SIZE,
267}; 213};
268 214
269enum ars_masks { 215enum ars_masks {
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 403ab9cdb949..4849be5f9b3c 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -188,13 +188,6 @@ static RADIX_TREE(pgmap_radix, GFP_KERNEL);
188#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1) 188#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
189#define SECTION_SIZE (1UL << PA_SECTION_SHIFT) 189#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
190 190
191struct page_map {
192 struct resource res;
193 struct percpu_ref *ref;
194 struct dev_pagemap pgmap;
195 struct vmem_altmap altmap;
196};
197
198static unsigned long order_at(struct resource *res, unsigned long pgoff) 191static unsigned long order_at(struct resource *res, unsigned long pgoff)
199{ 192{
200 unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff; 193 unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff;
@@ -248,34 +241,36 @@ int device_private_entry_fault(struct vm_area_struct *vma,
248EXPORT_SYMBOL(device_private_entry_fault); 241EXPORT_SYMBOL(device_private_entry_fault);
249#endif /* CONFIG_DEVICE_PRIVATE */ 242#endif /* CONFIG_DEVICE_PRIVATE */
250 243
251static void pgmap_radix_release(struct resource *res) 244static void pgmap_radix_release(struct resource *res, unsigned long end_pgoff)
252{ 245{
253 unsigned long pgoff, order; 246 unsigned long pgoff, order;
254 247
255 mutex_lock(&pgmap_lock); 248 mutex_lock(&pgmap_lock);
256 foreach_order_pgoff(res, order, pgoff) 249 foreach_order_pgoff(res, order, pgoff) {
250 if (pgoff >= end_pgoff)
251 break;
257 radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff); 252 radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff);
253 }
258 mutex_unlock(&pgmap_lock); 254 mutex_unlock(&pgmap_lock);
259 255
260 synchronize_rcu(); 256 synchronize_rcu();
261} 257}
262 258
263static unsigned long pfn_first(struct page_map *page_map) 259static unsigned long pfn_first(struct dev_pagemap *pgmap)
264{ 260{
265 struct dev_pagemap *pgmap = &page_map->pgmap; 261 const struct resource *res = &pgmap->res;
266 const struct resource *res = &page_map->res; 262 struct vmem_altmap *altmap = &pgmap->altmap;
267 struct vmem_altmap *altmap = pgmap->altmap;
268 unsigned long pfn; 263 unsigned long pfn;
269 264
270 pfn = res->start >> PAGE_SHIFT; 265 pfn = res->start >> PAGE_SHIFT;
271 if (altmap) 266 if (pgmap->altmap_valid)
272 pfn += vmem_altmap_offset(altmap); 267 pfn += vmem_altmap_offset(altmap);
273 return pfn; 268 return pfn;
274} 269}
275 270
276static unsigned long pfn_end(struct page_map *page_map) 271static unsigned long pfn_end(struct dev_pagemap *pgmap)
277{ 272{
278 const struct resource *res = &page_map->res; 273 const struct resource *res = &pgmap->res;
279 274
280 return (res->start + resource_size(res)) >> PAGE_SHIFT; 275 return (res->start + resource_size(res)) >> PAGE_SHIFT;
281} 276}
@@ -283,15 +278,15 @@ static unsigned long pfn_end(struct page_map *page_map)
283#define for_each_device_pfn(pfn, map) \ 278#define for_each_device_pfn(pfn, map) \
284 for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++) 279 for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++)
285 280
286static void devm_memremap_pages_release(struct device *dev, void *data) 281static void devm_memremap_pages_release(void *data)
287{ 282{
288 struct page_map *page_map = data; 283 struct dev_pagemap *pgmap = data;
289 struct resource *res = &page_map->res; 284 struct device *dev = pgmap->dev;
285 struct resource *res = &pgmap->res;
290 resource_size_t align_start, align_size; 286 resource_size_t align_start, align_size;
291 struct dev_pagemap *pgmap = &page_map->pgmap;
292 unsigned long pfn; 287 unsigned long pfn;
293 288
294 for_each_device_pfn(pfn, page_map) 289 for_each_device_pfn(pfn, pgmap)
295 put_page(pfn_to_page(pfn)); 290 put_page(pfn_to_page(pfn));
296 291
297 if (percpu_ref_tryget_live(pgmap->ref)) { 292 if (percpu_ref_tryget_live(pgmap->ref)) {
@@ -301,56 +296,51 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
301 296
302 /* pages are dead and unused, undo the arch mapping */ 297 /* pages are dead and unused, undo the arch mapping */
303 align_start = res->start & ~(SECTION_SIZE - 1); 298 align_start = res->start & ~(SECTION_SIZE - 1);
304 align_size = ALIGN(resource_size(res), SECTION_SIZE); 299 align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
300 - align_start;
305 301
306 mem_hotplug_begin(); 302 mem_hotplug_begin();
307 arch_remove_memory(align_start, align_size); 303 arch_remove_memory(align_start, align_size, pgmap->altmap_valid ?
304 &pgmap->altmap : NULL);
308 mem_hotplug_done(); 305 mem_hotplug_done();
309 306
310 untrack_pfn(NULL, PHYS_PFN(align_start), align_size); 307 untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
311 pgmap_radix_release(res); 308 pgmap_radix_release(res, -1);
312 dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, 309 dev_WARN_ONCE(dev, pgmap->altmap.alloc,
313 "%s: failed to free all reserved pages\n", __func__); 310 "%s: failed to free all reserved pages\n", __func__);
314}
315
316/* assumes rcu_read_lock() held at entry */
317struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
318{
319 struct page_map *page_map;
320
321 WARN_ON_ONCE(!rcu_read_lock_held());
322
323 page_map = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
324 return page_map ? &page_map->pgmap : NULL;
325} 311}
326 312
327/** 313/**
328 * devm_memremap_pages - remap and provide memmap backing for the given resource 314 * devm_memremap_pages - remap and provide memmap backing for the given resource
329 * @dev: hosting device for @res 315 * @dev: hosting device for @res
330 * @res: "host memory" address range 316 * @pgmap: pointer to a struct dev_pgmap
331 * @ref: a live per-cpu reference count
332 * @altmap: optional descriptor for allocating the memmap from @res
333 * 317 *
334 * Notes: 318 * Notes:
335 * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time 319 * 1/ At a minimum the res, ref and type members of @pgmap must be initialized
336 * (or devm release event). The expected order of events is that @ref has 320 * by the caller before passing it to this function
321 *
322 * 2/ The altmap field may optionally be initialized, in which case altmap_valid
323 * must be set to true
324 *
325 * 3/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages()
326 * time (or devm release event). The expected order of events is that ref has
337 * been through percpu_ref_kill() before devm_memremap_pages_release(). The 327 * been through percpu_ref_kill() before devm_memremap_pages_release(). The
338 * wait for the completion of all references being dropped and 328 * wait for the completion of all references being dropped and
339 * percpu_ref_exit() must occur after devm_memremap_pages_release(). 329 * percpu_ref_exit() must occur after devm_memremap_pages_release().
340 * 330 *
341 * 2/ @res is expected to be a host memory range that could feasibly be 331 * 4/ res is expected to be a host memory range that could feasibly be
342 * treated as a "System RAM" range, i.e. not a device mmio range, but 332 * treated as a "System RAM" range, i.e. not a device mmio range, but
343 * this is not enforced. 333 * this is not enforced.
344 */ 334 */
345void *devm_memremap_pages(struct device *dev, struct resource *res, 335void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
346 struct percpu_ref *ref, struct vmem_altmap *altmap)
347{ 336{
348 resource_size_t align_start, align_size, align_end; 337 resource_size_t align_start, align_size, align_end;
338 struct vmem_altmap *altmap = pgmap->altmap_valid ?
339 &pgmap->altmap : NULL;
349 unsigned long pfn, pgoff, order; 340 unsigned long pfn, pgoff, order;
350 pgprot_t pgprot = PAGE_KERNEL; 341 pgprot_t pgprot = PAGE_KERNEL;
351 struct dev_pagemap *pgmap;
352 struct page_map *page_map;
353 int error, nid, is_ram, i = 0; 342 int error, nid, is_ram, i = 0;
343 struct resource *res = &pgmap->res;
354 344
355 align_start = res->start & ~(SECTION_SIZE - 1); 345 align_start = res->start & ~(SECTION_SIZE - 1);
356 align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) 346 align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@@ -367,47 +357,18 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
367 if (is_ram == REGION_INTERSECTS) 357 if (is_ram == REGION_INTERSECTS)
368 return __va(res->start); 358 return __va(res->start);
369 359
370 if (!ref) 360 if (!pgmap->ref)
371 return ERR_PTR(-EINVAL); 361 return ERR_PTR(-EINVAL);
372 362
373 page_map = devres_alloc_node(devm_memremap_pages_release,
374 sizeof(*page_map), GFP_KERNEL, dev_to_node(dev));
375 if (!page_map)
376 return ERR_PTR(-ENOMEM);
377 pgmap = &page_map->pgmap;
378
379 memcpy(&page_map->res, res, sizeof(*res));
380
381 pgmap->dev = dev; 363 pgmap->dev = dev;
382 if (altmap) {
383 memcpy(&page_map->altmap, altmap, sizeof(*altmap));
384 pgmap->altmap = &page_map->altmap;
385 }
386 pgmap->ref = ref;
387 pgmap->res = &page_map->res;
388 pgmap->type = MEMORY_DEVICE_HOST;
389 pgmap->page_fault = NULL;
390 pgmap->page_free = NULL;
391 pgmap->data = NULL;
392 364
393 mutex_lock(&pgmap_lock); 365 mutex_lock(&pgmap_lock);
394 error = 0; 366 error = 0;
395 align_end = align_start + align_size - 1; 367 align_end = align_start + align_size - 1;
396 368
397 foreach_order_pgoff(res, order, pgoff) { 369 foreach_order_pgoff(res, order, pgoff) {
398 struct dev_pagemap *dup;
399
400 rcu_read_lock();
401 dup = find_dev_pagemap(res->start + PFN_PHYS(pgoff));
402 rcu_read_unlock();
403 if (dup) {
404 dev_err(dev, "%s: %pr collides with mapping for %s\n",
405 __func__, res, dev_name(dup->dev));
406 error = -EBUSY;
407 break;
408 }
409 error = __radix_tree_insert(&pgmap_radix, 370 error = __radix_tree_insert(&pgmap_radix,
410 PHYS_PFN(res->start) + pgoff, order, page_map); 371 PHYS_PFN(res->start) + pgoff, order, pgmap);
411 if (error) { 372 if (error) {
412 dev_err(dev, "%s: failed: %d\n", __func__, error); 373 dev_err(dev, "%s: failed: %d\n", __func__, error);
413 break; 374 break;
@@ -427,16 +388,16 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
427 goto err_pfn_remap; 388 goto err_pfn_remap;
428 389
429 mem_hotplug_begin(); 390 mem_hotplug_begin();
430 error = arch_add_memory(nid, align_start, align_size, false); 391 error = arch_add_memory(nid, align_start, align_size, altmap, false);
431 if (!error) 392 if (!error)
432 move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], 393 move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
433 align_start >> PAGE_SHIFT, 394 align_start >> PAGE_SHIFT,
434 align_size >> PAGE_SHIFT); 395 align_size >> PAGE_SHIFT, altmap);
435 mem_hotplug_done(); 396 mem_hotplug_done();
436 if (error) 397 if (error)
437 goto err_add_memory; 398 goto err_add_memory;
438 399
439 for_each_device_pfn(pfn, page_map) { 400 for_each_device_pfn(pfn, pgmap) {
440 struct page *page = pfn_to_page(pfn); 401 struct page *page = pfn_to_page(pfn);
441 402
442 /* 403 /*
@@ -447,19 +408,21 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
447 */ 408 */
448 list_del(&page->lru); 409 list_del(&page->lru);
449 page->pgmap = pgmap; 410 page->pgmap = pgmap;
450 percpu_ref_get(ref); 411 percpu_ref_get(pgmap->ref);
451 if (!(++i % 1024)) 412 if (!(++i % 1024))
452 cond_resched(); 413 cond_resched();
453 } 414 }
454 devres_add(dev, page_map); 415
416 devm_add_action(dev, devm_memremap_pages_release, pgmap);
417
455 return __va(res->start); 418 return __va(res->start);
456 419
457 err_add_memory: 420 err_add_memory:
458 untrack_pfn(NULL, PHYS_PFN(align_start), align_size); 421 untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
459 err_pfn_remap: 422 err_pfn_remap:
460 err_radix: 423 err_radix:
461 pgmap_radix_release(res); 424 pgmap_radix_release(res, pgoff);
462 devres_free(page_map); 425 devres_free(pgmap);
463 return ERR_PTR(error); 426 return ERR_PTR(error);
464} 427}
465EXPORT_SYMBOL(devm_memremap_pages); 428EXPORT_SYMBOL(devm_memremap_pages);
@@ -475,34 +438,39 @@ void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
475 altmap->alloc -= nr_pfns; 438 altmap->alloc -= nr_pfns;
476} 439}
477 440
478struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) 441/**
442 * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
443 * @pfn: page frame number to lookup page_map
444 * @pgmap: optional known pgmap that already has a reference
445 *
446 * If @pgmap is non-NULL and covers @pfn it will be returned as-is. If @pgmap
447 * is non-NULL but does not cover @pfn the reference to it will be released.
448 */
449struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
450 struct dev_pagemap *pgmap)
479{ 451{
480 /* 452 resource_size_t phys = PFN_PHYS(pfn);
481 * 'memmap_start' is the virtual address for the first "struct
482 * page" in this range of the vmemmap array. In the case of
483 * CONFIG_SPARSEMEM_VMEMMAP a page_to_pfn conversion is simple
484 * pointer arithmetic, so we can perform this to_vmem_altmap()
485 * conversion without concern for the initialization state of
486 * the struct page fields.
487 */
488 struct page *page = (struct page *) memmap_start;
489 struct dev_pagemap *pgmap;
490 453
491 /* 454 /*
492 * Unconditionally retrieve a dev_pagemap associated with the 455 * In the cached case we're already holding a live reference.
493 * given physical address, this is only for use in the
494 * arch_{add|remove}_memory() for setting up and tearing down
495 * the memmap.
496 */ 456 */
457 if (pgmap) {
458 if (phys >= pgmap->res.start && phys <= pgmap->res.end)
459 return pgmap;
460 put_dev_pagemap(pgmap);
461 }
462
463 /* fall back to slow path lookup */
497 rcu_read_lock(); 464 rcu_read_lock();
498 pgmap = find_dev_pagemap(__pfn_to_phys(page_to_pfn(page))); 465 pgmap = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
466 if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
467 pgmap = NULL;
499 rcu_read_unlock(); 468 rcu_read_unlock();
500 469
501 return pgmap ? pgmap->altmap : NULL; 470 return pgmap;
502} 471}
503#endif /* CONFIG_ZONE_DEVICE */ 472#endif /* CONFIG_ZONE_DEVICE */
504 473
505
506#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) 474#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC)
507void put_zone_device_private_or_public_page(struct page *page) 475void put_zone_device_private_or_public_page(struct page *page)
508{ 476{
diff --git a/mm/gup.c b/mm/gup.c
index 9e17d8db2d6b..1b46e6e74881 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1394,7 +1394,6 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1394 1394
1395 VM_BUG_ON_PAGE(compound_head(page) != head, page); 1395 VM_BUG_ON_PAGE(compound_head(page) != head, page);
1396 1396
1397 put_dev_pagemap(pgmap);
1398 SetPageReferenced(page); 1397 SetPageReferenced(page);
1399 pages[*nr] = page; 1398 pages[*nr] = page;
1400 (*nr)++; 1399 (*nr)++;
@@ -1404,6 +1403,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1404 ret = 1; 1403 ret = 1;
1405 1404
1406pte_unmap: 1405pte_unmap:
1406 if (pgmap)
1407 put_dev_pagemap(pgmap);
1407 pte_unmap(ptem); 1408 pte_unmap(ptem);
1408 return ret; 1409 return ret;
1409} 1410}
@@ -1443,10 +1444,12 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
1443 SetPageReferenced(page); 1444 SetPageReferenced(page);
1444 pages[*nr] = page; 1445 pages[*nr] = page;
1445 get_page(page); 1446 get_page(page);
1446 put_dev_pagemap(pgmap);
1447 (*nr)++; 1447 (*nr)++;
1448 pfn++; 1448 pfn++;
1449 } while (addr += PAGE_SIZE, addr != end); 1449 } while (addr += PAGE_SIZE, addr != end);
1450
1451 if (pgmap)
1452 put_dev_pagemap(pgmap);
1450 return 1; 1453 return 1;
1451} 1454}
1452 1455
diff --git a/mm/hmm.c b/mm/hmm.c
index 979211c7ccc8..320545b98ff5 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -836,10 +836,10 @@ static void hmm_devmem_release(struct device *dev, void *data)
836 836
837 mem_hotplug_begin(); 837 mem_hotplug_begin();
838 if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) 838 if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY)
839 __remove_pages(zone, start_pfn, npages); 839 __remove_pages(zone, start_pfn, npages, NULL);
840 else 840 else
841 arch_remove_memory(start_pfn << PAGE_SHIFT, 841 arch_remove_memory(start_pfn << PAGE_SHIFT,
842 npages << PAGE_SHIFT); 842 npages << PAGE_SHIFT, NULL);
843 mem_hotplug_done(); 843 mem_hotplug_done();
844 844
845 hmm_devmem_radix_release(resource); 845 hmm_devmem_radix_release(resource);
@@ -880,7 +880,7 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
880 else 880 else
881 devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; 881 devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
882 882
883 devmem->pagemap.res = devmem->resource; 883 devmem->pagemap.res = *devmem->resource;
884 devmem->pagemap.page_fault = hmm_devmem_fault; 884 devmem->pagemap.page_fault = hmm_devmem_fault;
885 devmem->pagemap.page_free = hmm_devmem_free; 885 devmem->pagemap.page_free = hmm_devmem_free;
886 devmem->pagemap.dev = devmem->device; 886 devmem->pagemap.dev = devmem->device;
@@ -929,17 +929,18 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
929 * want the linear mapping and thus use arch_add_memory(). 929 * want the linear mapping and thus use arch_add_memory().
930 */ 930 */
931 if (devmem->pagemap.type == MEMORY_DEVICE_PUBLIC) 931 if (devmem->pagemap.type == MEMORY_DEVICE_PUBLIC)
932 ret = arch_add_memory(nid, align_start, align_size, false); 932 ret = arch_add_memory(nid, align_start, align_size, NULL,
933 false);
933 else 934 else
934 ret = add_pages(nid, align_start >> PAGE_SHIFT, 935 ret = add_pages(nid, align_start >> PAGE_SHIFT,
935 align_size >> PAGE_SHIFT, false); 936 align_size >> PAGE_SHIFT, NULL, false);
936 if (ret) { 937 if (ret) {
937 mem_hotplug_done(); 938 mem_hotplug_done();
938 goto error_add_memory; 939 goto error_add_memory;
939 } 940 }
940 move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], 941 move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
941 align_start >> PAGE_SHIFT, 942 align_start >> PAGE_SHIFT,
942 align_size >> PAGE_SHIFT); 943 align_size >> PAGE_SHIFT, NULL);
943 mem_hotplug_done(); 944 mem_hotplug_done();
944 945
945 for (pfn = devmem->pfn_first; pfn < devmem->pfn_last; pfn++) { 946 for (pfn = devmem->pfn_first; pfn < devmem->pfn_last; pfn++) {
diff --git a/mm/memory.c b/mm/memory.c
index 53373b7a1512..2248529e71c1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1904,12 +1904,26 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
1904} 1904}
1905EXPORT_SYMBOL(vm_insert_pfn_prot); 1905EXPORT_SYMBOL(vm_insert_pfn_prot);
1906 1906
1907static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn)
1908{
1909 /* these checks mirror the abort conditions in vm_normal_page */
1910 if (vma->vm_flags & VM_MIXEDMAP)
1911 return true;
1912 if (pfn_t_devmap(pfn))
1913 return true;
1914 if (pfn_t_special(pfn))
1915 return true;
1916 if (is_zero_pfn(pfn_t_to_pfn(pfn)))
1917 return true;
1918 return false;
1919}
1920
1907static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, 1921static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
1908 pfn_t pfn, bool mkwrite) 1922 pfn_t pfn, bool mkwrite)
1909{ 1923{
1910 pgprot_t pgprot = vma->vm_page_prot; 1924 pgprot_t pgprot = vma->vm_page_prot;
1911 1925
1912 BUG_ON(!(vma->vm_flags & VM_MIXEDMAP)); 1926 BUG_ON(!vm_mixed_ok(vma, pfn));
1913 1927
1914 if (addr < vma->vm_start || addr >= vma->vm_end) 1928 if (addr < vma->vm_start || addr >= vma->vm_end)
1915 return -EFAULT; 1929 return -EFAULT;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9bbd6982d4e4..b2bd52ff7605 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -247,7 +247,7 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
247#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ 247#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
248 248
249static int __meminit __add_section(int nid, unsigned long phys_start_pfn, 249static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
250 bool want_memblock) 250 struct vmem_altmap *altmap, bool want_memblock)
251{ 251{
252 int ret; 252 int ret;
253 int i; 253 int i;
@@ -255,7 +255,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
255 if (pfn_valid(phys_start_pfn)) 255 if (pfn_valid(phys_start_pfn))
256 return -EEXIST; 256 return -EEXIST;
257 257
258 ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn); 258 ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap);
259 if (ret < 0) 259 if (ret < 0)
260 return ret; 260 return ret;
261 261
@@ -289,18 +289,17 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
289 * add the new pages. 289 * add the new pages.
290 */ 290 */
291int __ref __add_pages(int nid, unsigned long phys_start_pfn, 291int __ref __add_pages(int nid, unsigned long phys_start_pfn,
292 unsigned long nr_pages, bool want_memblock) 292 unsigned long nr_pages, struct vmem_altmap *altmap,
293 bool want_memblock)
293{ 294{
294 unsigned long i; 295 unsigned long i;
295 int err = 0; 296 int err = 0;
296 int start_sec, end_sec; 297 int start_sec, end_sec;
297 struct vmem_altmap *altmap;
298 298
299 /* during initialize mem_map, align hot-added range to section */ 299 /* during initialize mem_map, align hot-added range to section */
300 start_sec = pfn_to_section_nr(phys_start_pfn); 300 start_sec = pfn_to_section_nr(phys_start_pfn);
301 end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); 301 end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
302 302
303 altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
304 if (altmap) { 303 if (altmap) {
305 /* 304 /*
306 * Validate altmap is within bounds of the total request 305 * Validate altmap is within bounds of the total request
@@ -315,7 +314,8 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
315 } 314 }
316 315
317 for (i = start_sec; i <= end_sec; i++) { 316 for (i = start_sec; i <= end_sec; i++) {
318 err = __add_section(nid, section_nr_to_pfn(i), want_memblock); 317 err = __add_section(nid, section_nr_to_pfn(i), altmap,
318 want_memblock);
319 319
320 /* 320 /*
321 * EEXIST is finally dealt with by ioresource collision 321 * EEXIST is finally dealt with by ioresource collision
@@ -331,7 +331,6 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
331out: 331out:
332 return err; 332 return err;
333} 333}
334EXPORT_SYMBOL_GPL(__add_pages);
335 334
336#ifdef CONFIG_MEMORY_HOTREMOVE 335#ifdef CONFIG_MEMORY_HOTREMOVE
337/* find the smallest valid pfn in the range [start_pfn, end_pfn) */ 336/* find the smallest valid pfn in the range [start_pfn, end_pfn) */
@@ -534,7 +533,7 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn)
534} 533}
535 534
536static int __remove_section(struct zone *zone, struct mem_section *ms, 535static int __remove_section(struct zone *zone, struct mem_section *ms,
537 unsigned long map_offset) 536 unsigned long map_offset, struct vmem_altmap *altmap)
538{ 537{
539 unsigned long start_pfn; 538 unsigned long start_pfn;
540 int scn_nr; 539 int scn_nr;
@@ -551,7 +550,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms,
551 start_pfn = section_nr_to_pfn((unsigned long)scn_nr); 550 start_pfn = section_nr_to_pfn((unsigned long)scn_nr);
552 __remove_zone(zone, start_pfn); 551 __remove_zone(zone, start_pfn);
553 552
554 sparse_remove_one_section(zone, ms, map_offset); 553 sparse_remove_one_section(zone, ms, map_offset, altmap);
555 return 0; 554 return 0;
556} 555}
557 556
@@ -567,7 +566,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms,
567 * calling offline_pages(). 566 * calling offline_pages().
568 */ 567 */
569int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, 568int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
570 unsigned long nr_pages) 569 unsigned long nr_pages, struct vmem_altmap *altmap)
571{ 570{
572 unsigned long i; 571 unsigned long i;
573 unsigned long map_offset = 0; 572 unsigned long map_offset = 0;
@@ -575,10 +574,6 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
575 574
576 /* In the ZONE_DEVICE case device driver owns the memory region */ 575 /* In the ZONE_DEVICE case device driver owns the memory region */
577 if (is_dev_zone(zone)) { 576 if (is_dev_zone(zone)) {
578 struct page *page = pfn_to_page(phys_start_pfn);
579 struct vmem_altmap *altmap;
580
581 altmap = to_vmem_altmap((unsigned long) page);
582 if (altmap) 577 if (altmap)
583 map_offset = vmem_altmap_offset(altmap); 578 map_offset = vmem_altmap_offset(altmap);
584 } else { 579 } else {
@@ -609,7 +604,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
609 for (i = 0; i < sections_to_remove; i++) { 604 for (i = 0; i < sections_to_remove; i++) {
610 unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; 605 unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
611 606
612 ret = __remove_section(zone, __pfn_to_section(pfn), map_offset); 607 ret = __remove_section(zone, __pfn_to_section(pfn), map_offset,
608 altmap);
613 map_offset = 0; 609 map_offset = 0;
614 if (ret) 610 if (ret)
615 break; 611 break;
@@ -799,8 +795,8 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon
799 pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn; 795 pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
800} 796}
801 797
802void __ref move_pfn_range_to_zone(struct zone *zone, 798void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
803 unsigned long start_pfn, unsigned long nr_pages) 799 unsigned long nr_pages, struct vmem_altmap *altmap)
804{ 800{
805 struct pglist_data *pgdat = zone->zone_pgdat; 801 struct pglist_data *pgdat = zone->zone_pgdat;
806 int nid = pgdat->node_id; 802 int nid = pgdat->node_id;
@@ -825,7 +821,8 @@ void __ref move_pfn_range_to_zone(struct zone *zone,
825 * expects the zone spans the pfn range. All the pages in the range 821 * expects the zone spans the pfn range. All the pages in the range
826 * are reserved so nobody should be touching them so we should be safe 822 * are reserved so nobody should be touching them so we should be safe
827 */ 823 */
828 memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, MEMMAP_HOTPLUG); 824 memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
825 MEMMAP_HOTPLUG, altmap);
829 826
830 set_zone_contiguous(zone); 827 set_zone_contiguous(zone);
831} 828}
@@ -897,7 +894,7 @@ static struct zone * __meminit move_pfn_range(int online_type, int nid,
897 struct zone *zone; 894 struct zone *zone;
898 895
899 zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages); 896 zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
900 move_pfn_range_to_zone(zone, start_pfn, nr_pages); 897 move_pfn_range_to_zone(zone, start_pfn, nr_pages, NULL);
901 return zone; 898 return zone;
902} 899}
903 900
@@ -1146,7 +1143,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
1146 } 1143 }
1147 1144
1148 /* call arch's memory hotadd */ 1145 /* call arch's memory hotadd */
1149 ret = arch_add_memory(nid, start, size, true); 1146 ret = arch_add_memory(nid, start, size, NULL, true);
1150 1147
1151 if (ret < 0) 1148 if (ret < 0)
1152 goto error; 1149 goto error;
@@ -1888,7 +1885,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)
1888 memblock_free(start, size); 1885 memblock_free(start, size);
1889 memblock_remove(start, size); 1886 memblock_remove(start, size);
1890 1887
1891 arch_remove_memory(start, size); 1888 arch_remove_memory(start, size, NULL);
1892 1889
1893 try_offline_node(nid); 1890 try_offline_node(nid);
1894 1891
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c7dd9c86e353..81e18ceef579 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5321,9 +5321,9 @@ void __ref build_all_zonelists(pg_data_t *pgdat)
5321 * done. Non-atomic initialization, single-pass. 5321 * done. Non-atomic initialization, single-pass.
5322 */ 5322 */
5323void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, 5323void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
5324 unsigned long start_pfn, enum memmap_context context) 5324 unsigned long start_pfn, enum memmap_context context,
5325 struct vmem_altmap *altmap)
5325{ 5326{
5326 struct vmem_altmap *altmap = to_vmem_altmap(__pfn_to_phys(start_pfn));
5327 unsigned long end_pfn = start_pfn + size; 5327 unsigned long end_pfn = start_pfn + size;
5328 pg_data_t *pgdat = NODE_DATA(nid); 5328 pg_data_t *pgdat = NODE_DATA(nid);
5329 unsigned long pfn; 5329 unsigned long pfn;
@@ -5429,7 +5429,7 @@ static void __meminit zone_init_free_lists(struct zone *zone)
5429 5429
5430#ifndef __HAVE_ARCH_MEMMAP_INIT 5430#ifndef __HAVE_ARCH_MEMMAP_INIT
5431#define memmap_init(size, nid, zone, start_pfn) \ 5431#define memmap_init(size, nid, zone, start_pfn) \
5432 memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) 5432 memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY, NULL)
5433#endif 5433#endif
5434 5434
5435static int zone_batchsize(struct zone *zone) 5435static int zone_batchsize(struct zone *zone)
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 17acf01791fa..bd0276d5f66b 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -74,7 +74,7 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
74} 74}
75 75
76/* need to make sure size is all the same during early stage */ 76/* need to make sure size is all the same during early stage */
77static void * __meminit alloc_block_buf(unsigned long size, int node) 77void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
78{ 78{
79 void *ptr; 79 void *ptr;
80 80
@@ -107,33 +107,16 @@ static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
107} 107}
108 108
109/** 109/**
110 * vmem_altmap_alloc - allocate pages from the vmem_altmap reservation 110 * altmap_alloc_block_buf - allocate pages from the device page map
111 * @altmap - reserved page pool for the allocation 111 * @altmap: device page map
112 * @nr_pfns - size (in pages) of the allocation 112 * @size: size (in bytes) of the allocation
113 * 113 *
114 * Allocations are aligned to the size of the request 114 * Allocations are aligned to the size of the request.
115 */ 115 */
116static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap, 116void * __meminit altmap_alloc_block_buf(unsigned long size,
117 unsigned long nr_pfns)
118{
119 unsigned long pfn = vmem_altmap_next_pfn(altmap);
120 unsigned long nr_align;
121
122 nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
123 nr_align = ALIGN(pfn, nr_align) - pfn;
124
125 if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
126 return ULONG_MAX;
127 altmap->alloc += nr_pfns;
128 altmap->align += nr_align;
129 return pfn + nr_align;
130}
131
132static void * __meminit altmap_alloc_block_buf(unsigned long size,
133 struct vmem_altmap *altmap) 117 struct vmem_altmap *altmap)
134{ 118{
135 unsigned long pfn, nr_pfns; 119 unsigned long pfn, nr_pfns, nr_align;
136 void *ptr;
137 120
138 if (size & ~PAGE_MASK) { 121 if (size & ~PAGE_MASK) {
139 pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n", 122 pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
@@ -141,25 +124,20 @@ static void * __meminit altmap_alloc_block_buf(unsigned long size,
141 return NULL; 124 return NULL;
142 } 125 }
143 126
127 pfn = vmem_altmap_next_pfn(altmap);
144 nr_pfns = size >> PAGE_SHIFT; 128 nr_pfns = size >> PAGE_SHIFT;
145 pfn = vmem_altmap_alloc(altmap, nr_pfns); 129 nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
146 if (pfn < ULONG_MAX) 130 nr_align = ALIGN(pfn, nr_align) - pfn;
147 ptr = __va(__pfn_to_phys(pfn)); 131 if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
148 else 132 return NULL;
149 ptr = NULL;
150 pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
151 __func__, pfn, altmap->alloc, altmap->align, nr_pfns);
152 133
153 return ptr; 134 altmap->alloc += nr_pfns;
154} 135 altmap->align += nr_align;
136 pfn += nr_align;
155 137
156/* need to make sure size is all the same during early stage */ 138 pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
157void * __meminit __vmemmap_alloc_block_buf(unsigned long size, int node, 139 __func__, pfn, altmap->alloc, altmap->align, nr_pfns);
158 struct vmem_altmap *altmap) 140 return __va(__pfn_to_phys(pfn));
159{
160 if (altmap)
161 return altmap_alloc_block_buf(size, altmap);
162 return alloc_block_buf(size, node);
163} 141}
164 142
165void __meminit vmemmap_verify(pte_t *pte, int node, 143void __meminit vmemmap_verify(pte_t *pte, int node,
@@ -178,7 +156,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
178 pte_t *pte = pte_offset_kernel(pmd, addr); 156 pte_t *pte = pte_offset_kernel(pmd, addr);
179 if (pte_none(*pte)) { 157 if (pte_none(*pte)) {
180 pte_t entry; 158 pte_t entry;
181 void *p = alloc_block_buf(PAGE_SIZE, node); 159 void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
182 if (!p) 160 if (!p)
183 return NULL; 161 return NULL;
184 entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); 162 entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
@@ -278,7 +256,8 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
278 return 0; 256 return 0;
279} 257}
280 258
281struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) 259struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid,
260 struct vmem_altmap *altmap)
282{ 261{
283 unsigned long start; 262 unsigned long start;
284 unsigned long end; 263 unsigned long end;
@@ -288,7 +267,7 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
288 start = (unsigned long)map; 267 start = (unsigned long)map;
289 end = (unsigned long)(map + PAGES_PER_SECTION); 268 end = (unsigned long)(map + PAGES_PER_SECTION);
290 269
291 if (vmemmap_populate(start, end, nid)) 270 if (vmemmap_populate(start, end, nid, altmap))
292 return NULL; 271 return NULL;
293 272
294 return map; 273 return map;
@@ -318,7 +297,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
318 if (!present_section_nr(pnum)) 297 if (!present_section_nr(pnum))
319 continue; 298 continue;
320 299
321 map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); 300 map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL);
322 if (map_map[pnum]) 301 if (map_map[pnum])
323 continue; 302 continue;
324 ms = __nr_to_section(pnum); 303 ms = __nr_to_section(pnum);
diff --git a/mm/sparse.c b/mm/sparse.c
index 6b8b5e91ceef..7af5e7a92528 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -421,7 +421,8 @@ static void __init sparse_early_usemaps_alloc_node(void *data,
421} 421}
422 422
423#ifndef CONFIG_SPARSEMEM_VMEMMAP 423#ifndef CONFIG_SPARSEMEM_VMEMMAP
424struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) 424struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid,
425 struct vmem_altmap *altmap)
425{ 426{
426 struct page *map; 427 struct page *map;
427 unsigned long size; 428 unsigned long size;
@@ -476,7 +477,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
476 477
477 if (!present_section_nr(pnum)) 478 if (!present_section_nr(pnum))
478 continue; 479 continue;
479 map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); 480 map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL);
480 if (map_map[pnum]) 481 if (map_map[pnum])
481 continue; 482 continue;
482 ms = __nr_to_section(pnum); 483 ms = __nr_to_section(pnum);
@@ -504,7 +505,7 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
504 struct mem_section *ms = __nr_to_section(pnum); 505 struct mem_section *ms = __nr_to_section(pnum);
505 int nid = sparse_early_nid(ms); 506 int nid = sparse_early_nid(ms);
506 507
507 map = sparse_mem_map_populate(pnum, nid); 508 map = sparse_mem_map_populate(pnum, nid, NULL);
508 if (map) 509 if (map)
509 return map; 510 return map;
510 511
@@ -682,17 +683,19 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
682#endif 683#endif
683 684
684#ifdef CONFIG_SPARSEMEM_VMEMMAP 685#ifdef CONFIG_SPARSEMEM_VMEMMAP
685static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) 686static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
687 struct vmem_altmap *altmap)
686{ 688{
687 /* This will make the necessary allocations eventually. */ 689 /* This will make the necessary allocations eventually. */
688 return sparse_mem_map_populate(pnum, nid); 690 return sparse_mem_map_populate(pnum, nid, altmap);
689} 691}
690static void __kfree_section_memmap(struct page *memmap) 692static void __kfree_section_memmap(struct page *memmap,
693 struct vmem_altmap *altmap)
691{ 694{
692 unsigned long start = (unsigned long)memmap; 695 unsigned long start = (unsigned long)memmap;
693 unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); 696 unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
694 697
695 vmemmap_free(start, end); 698 vmemmap_free(start, end, altmap);
696} 699}
697#ifdef CONFIG_MEMORY_HOTREMOVE 700#ifdef CONFIG_MEMORY_HOTREMOVE
698static void free_map_bootmem(struct page *memmap) 701static void free_map_bootmem(struct page *memmap)
@@ -700,7 +703,7 @@ static void free_map_bootmem(struct page *memmap)
700 unsigned long start = (unsigned long)memmap; 703 unsigned long start = (unsigned long)memmap;
701 unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); 704 unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
702 705
703 vmemmap_free(start, end); 706 vmemmap_free(start, end, NULL);
704} 707}
705#endif /* CONFIG_MEMORY_HOTREMOVE */ 708#endif /* CONFIG_MEMORY_HOTREMOVE */
706#else 709#else
@@ -725,12 +728,14 @@ got_map_ptr:
725 return ret; 728 return ret;
726} 729}
727 730
728static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) 731static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
732 struct vmem_altmap *altmap)
729{ 733{
730 return __kmalloc_section_memmap(); 734 return __kmalloc_section_memmap();
731} 735}
732 736
733static void __kfree_section_memmap(struct page *memmap) 737static void __kfree_section_memmap(struct page *memmap,
738 struct vmem_altmap *altmap)
734{ 739{
735 if (is_vmalloc_addr(memmap)) 740 if (is_vmalloc_addr(memmap))
736 vfree(memmap); 741 vfree(memmap);
@@ -777,7 +782,8 @@ static void free_map_bootmem(struct page *memmap)
777 * set. If this is <=0, then that means that the passed-in 782 * set. If this is <=0, then that means that the passed-in
778 * map was not consumed and must be freed. 783 * map was not consumed and must be freed.
779 */ 784 */
780int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn) 785int __meminit sparse_add_one_section(struct pglist_data *pgdat,
786 unsigned long start_pfn, struct vmem_altmap *altmap)
781{ 787{
782 unsigned long section_nr = pfn_to_section_nr(start_pfn); 788 unsigned long section_nr = pfn_to_section_nr(start_pfn);
783 struct mem_section *ms; 789 struct mem_section *ms;
@@ -793,12 +799,12 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long st
793 ret = sparse_index_init(section_nr, pgdat->node_id); 799 ret = sparse_index_init(section_nr, pgdat->node_id);
794 if (ret < 0 && ret != -EEXIST) 800 if (ret < 0 && ret != -EEXIST)
795 return ret; 801 return ret;
796 memmap = kmalloc_section_memmap(section_nr, pgdat->node_id); 802 memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap);
797 if (!memmap) 803 if (!memmap)
798 return -ENOMEM; 804 return -ENOMEM;
799 usemap = __kmalloc_section_usemap(); 805 usemap = __kmalloc_section_usemap();
800 if (!usemap) { 806 if (!usemap) {
801 __kfree_section_memmap(memmap); 807 __kfree_section_memmap(memmap, altmap);
802 return -ENOMEM; 808 return -ENOMEM;
803 } 809 }
804 810
@@ -820,7 +826,7 @@ out:
820 pgdat_resize_unlock(pgdat, &flags); 826 pgdat_resize_unlock(pgdat, &flags);
821 if (ret <= 0) { 827 if (ret <= 0) {
822 kfree(usemap); 828 kfree(usemap);
823 __kfree_section_memmap(memmap); 829 __kfree_section_memmap(memmap, altmap);
824 } 830 }
825 return ret; 831 return ret;
826} 832}
@@ -847,7 +853,8 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
847} 853}
848#endif 854#endif
849 855
850static void free_section_usemap(struct page *memmap, unsigned long *usemap) 856static void free_section_usemap(struct page *memmap, unsigned long *usemap,
857 struct vmem_altmap *altmap)
851{ 858{
852 struct page *usemap_page; 859 struct page *usemap_page;
853 860
@@ -861,7 +868,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
861 if (PageSlab(usemap_page) || PageCompound(usemap_page)) { 868 if (PageSlab(usemap_page) || PageCompound(usemap_page)) {
862 kfree(usemap); 869 kfree(usemap);
863 if (memmap) 870 if (memmap)
864 __kfree_section_memmap(memmap); 871 __kfree_section_memmap(memmap, altmap);
865 return; 872 return;
866 } 873 }
867 874
@@ -875,7 +882,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
875} 882}
876 883
877void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, 884void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
878 unsigned long map_offset) 885 unsigned long map_offset, struct vmem_altmap *altmap)
879{ 886{
880 struct page *memmap = NULL; 887 struct page *memmap = NULL;
881 unsigned long *usemap = NULL, flags; 888 unsigned long *usemap = NULL, flags;
@@ -893,7 +900,7 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
893 900
894 clear_hwpoisoned_pages(memmap + map_offset, 901 clear_hwpoisoned_pages(memmap + map_offset,
895 PAGES_PER_SECTION - map_offset); 902 PAGES_PER_SECTION - map_offset);
896 free_section_usemap(memmap, usemap); 903 free_section_usemap(memmap, usemap, altmap);
897} 904}
898#endif /* CONFIG_MEMORY_HOTREMOVE */ 905#endif /* CONFIG_MEMORY_HOTREMOVE */
899#endif /* CONFIG_MEMORY_HOTPLUG */ 906#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index db33b28c5ef3..0392153a0009 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -37,10 +37,12 @@ obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
37 37
38nfit-y := $(ACPI_SRC)/core.o 38nfit-y := $(ACPI_SRC)/core.o
39nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o 39nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
40nfit-y += acpi_nfit_test.o
40nfit-y += config_check.o 41nfit-y += config_check.o
41 42
42nd_pmem-y := $(NVDIMM_SRC)/pmem.o 43nd_pmem-y := $(NVDIMM_SRC)/pmem.o
43nd_pmem-y += pmem-dax.o 44nd_pmem-y += pmem-dax.o
45nd_pmem-y += pmem_test.o
44nd_pmem-y += config_check.o 46nd_pmem-y += config_check.o
45 47
46nd_btt-y := $(NVDIMM_SRC)/btt.o 48nd_btt-y := $(NVDIMM_SRC)/btt.o
@@ -57,6 +59,7 @@ dax-y += config_check.o
57 59
58device_dax-y := $(DAX_SRC)/device.o 60device_dax-y := $(DAX_SRC)/device.o
59device_dax-y += dax-dev.o 61device_dax-y += dax-dev.o
62device_dax-y += device_dax_test.o
60device_dax-y += config_check.o 63device_dax-y += config_check.o
61 64
62dax_pmem-y := $(DAX_SRC)/pmem.o 65dax_pmem-y := $(DAX_SRC)/pmem.o
@@ -75,6 +78,7 @@ libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
75libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o 78libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
76libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o 79libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
77libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o 80libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o
81libnvdimm-y += libnvdimm_test.o
78libnvdimm-y += config_check.o 82libnvdimm-y += config_check.o
79 83
80obj-m += test/ 84obj-m += test/
diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c
new file mode 100644
index 000000000000..43521512e577
--- /dev/null
+++ b/tools/testing/nvdimm/acpi_nfit_test.c
@@ -0,0 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright(c) 2018 Intel Corporation. All rights reserved.
3
4#include <linux/module.h>
5#include <linux/printk.h>
6#include "watermark.h"
7
8nfit_test_watermark(acpi_nfit);
diff --git a/tools/testing/nvdimm/device_dax_test.c b/tools/testing/nvdimm/device_dax_test.c
new file mode 100644
index 000000000000..24b17bf42429
--- /dev/null
+++ b/tools/testing/nvdimm/device_dax_test.c
@@ -0,0 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright(c) 2018 Intel Corporation. All rights reserved.
3
4#include <linux/module.h>
5#include <linux/printk.h>
6#include "watermark.h"
7
8nfit_test_watermark(device_dax);
diff --git a/tools/testing/nvdimm/libnvdimm_test.c b/tools/testing/nvdimm/libnvdimm_test.c
new file mode 100644
index 000000000000..00ca30b23932
--- /dev/null
+++ b/tools/testing/nvdimm/libnvdimm_test.c
@@ -0,0 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright(c) 2018 Intel Corporation. All rights reserved.
3
4#include <linux/module.h>
5#include <linux/printk.h>
6#include "watermark.h"
7
8nfit_test_watermark(libnvdimm);
diff --git a/tools/testing/nvdimm/pmem_test.c b/tools/testing/nvdimm/pmem_test.c
new file mode 100644
index 000000000000..fd38f92275cf
--- /dev/null
+++ b/tools/testing/nvdimm/pmem_test.c
@@ -0,0 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright(c) 2018 Intel Corporation. All rights reserved.
3
4#include <linux/module.h>
5#include <linux/printk.h>
6#include "watermark.h"
7
8nfit_test_watermark(pmem);
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index e1f75a1914a1..ff9d3a5825e1 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -104,15 +104,14 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
104} 104}
105EXPORT_SYMBOL(__wrap_devm_memremap); 105EXPORT_SYMBOL(__wrap_devm_memremap);
106 106
107void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, 107void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
108 struct percpu_ref *ref, struct vmem_altmap *altmap)
109{ 108{
110 resource_size_t offset = res->start; 109 resource_size_t offset = pgmap->res.start;
111 struct nfit_test_resource *nfit_res = get_nfit_res(offset); 110 struct nfit_test_resource *nfit_res = get_nfit_res(offset);
112 111
113 if (nfit_res) 112 if (nfit_res)
114 return nfit_res->buf + offset - nfit_res->res.start; 113 return nfit_res->buf + offset - nfit_res->res.start;
115 return devm_memremap_pages(dev, res, ref, altmap); 114 return devm_memremap_pages(dev, pgmap);
116} 115}
117EXPORT_SYMBOL(__wrap_devm_memremap_pages); 116EXPORT_SYMBOL(__wrap_devm_memremap_pages);
118 117
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 7217b2b953b5..620fa78b3b1b 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -27,6 +27,7 @@
27#include <nfit.h> 27#include <nfit.h>
28#include <nd.h> 28#include <nd.h>
29#include "nfit_test.h" 29#include "nfit_test.h"
30#include "../watermark.h"
30 31
31/* 32/*
32 * Generate an NFIT table to describe the following topology: 33 * Generate an NFIT table to describe the following topology:
@@ -137,6 +138,14 @@ static u32 handle[] = {
137 138
138static unsigned long dimm_fail_cmd_flags[NUM_DCR]; 139static unsigned long dimm_fail_cmd_flags[NUM_DCR];
139 140
141struct nfit_test_fw {
142 enum intel_fw_update_state state;
143 u32 context;
144 u64 version;
145 u32 size_received;
146 u64 end_time;
147};
148
140struct nfit_test { 149struct nfit_test {
141 struct acpi_nfit_desc acpi_desc; 150 struct acpi_nfit_desc acpi_desc;
142 struct platform_device pdev; 151 struct platform_device pdev;
@@ -168,8 +177,11 @@ struct nfit_test {
168 spinlock_t lock; 177 spinlock_t lock;
169 } ars_state; 178 } ars_state;
170 struct device *dimm_dev[NUM_DCR]; 179 struct device *dimm_dev[NUM_DCR];
180 struct nd_intel_smart *smart;
181 struct nd_intel_smart_threshold *smart_threshold;
171 struct badrange badrange; 182 struct badrange badrange;
172 struct work_struct work; 183 struct work_struct work;
184 struct nfit_test_fw *fw;
173}; 185};
174 186
175static struct workqueue_struct *nfit_wq; 187static struct workqueue_struct *nfit_wq;
@@ -181,6 +193,226 @@ static struct nfit_test *to_nfit_test(struct device *dev)
181 return container_of(pdev, struct nfit_test, pdev); 193 return container_of(pdev, struct nfit_test, pdev);
182} 194}
183 195
196static int nd_intel_test_get_fw_info(struct nfit_test *t,
197 struct nd_intel_fw_info *nd_cmd, unsigned int buf_len,
198 int idx)
199{
200 struct device *dev = &t->pdev.dev;
201 struct nfit_test_fw *fw = &t->fw[idx];
202
203 dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p, buf_len: %u, idx: %d\n",
204 __func__, t, nd_cmd, buf_len, idx);
205
206 if (buf_len < sizeof(*nd_cmd))
207 return -EINVAL;
208
209 nd_cmd->status = 0;
210 nd_cmd->storage_size = INTEL_FW_STORAGE_SIZE;
211 nd_cmd->max_send_len = INTEL_FW_MAX_SEND_LEN;
212 nd_cmd->query_interval = INTEL_FW_QUERY_INTERVAL;
213 nd_cmd->max_query_time = INTEL_FW_QUERY_MAX_TIME;
214 nd_cmd->update_cap = 0;
215 nd_cmd->fis_version = INTEL_FW_FIS_VERSION;
216 nd_cmd->run_version = 0;
217 nd_cmd->updated_version = fw->version;
218
219 return 0;
220}
221
222static int nd_intel_test_start_update(struct nfit_test *t,
223 struct nd_intel_fw_start *nd_cmd, unsigned int buf_len,
224 int idx)
225{
226 struct device *dev = &t->pdev.dev;
227 struct nfit_test_fw *fw = &t->fw[idx];
228
229 dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
230 __func__, t, nd_cmd, buf_len, idx);
231
232 if (buf_len < sizeof(*nd_cmd))
233 return -EINVAL;
234
235 if (fw->state != FW_STATE_NEW) {
236 /* extended status, FW update in progress */
237 nd_cmd->status = 0x10007;
238 return 0;
239 }
240
241 fw->state = FW_STATE_IN_PROGRESS;
242 fw->context++;
243 fw->size_received = 0;
244 nd_cmd->status = 0;
245 nd_cmd->context = fw->context;
246
247 dev_dbg(dev, "%s: context issued: %#x\n", __func__, nd_cmd->context);
248
249 return 0;
250}
251
252static int nd_intel_test_send_data(struct nfit_test *t,
253 struct nd_intel_fw_send_data *nd_cmd, unsigned int buf_len,
254 int idx)
255{
256 struct device *dev = &t->pdev.dev;
257 struct nfit_test_fw *fw = &t->fw[idx];
258 u32 *status = (u32 *)&nd_cmd->data[nd_cmd->length];
259
260 dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
261 __func__, t, nd_cmd, buf_len, idx);
262
263 if (buf_len < sizeof(*nd_cmd))
264 return -EINVAL;
265
266
267 dev_dbg(dev, "%s: cmd->status: %#x\n", __func__, *status);
268 dev_dbg(dev, "%s: cmd->data[0]: %#x\n", __func__, nd_cmd->data[0]);
269 dev_dbg(dev, "%s: cmd->data[%u]: %#x\n", __func__, nd_cmd->length-1,
270 nd_cmd->data[nd_cmd->length-1]);
271
272 if (fw->state != FW_STATE_IN_PROGRESS) {
273 dev_dbg(dev, "%s: not in IN_PROGRESS state\n", __func__);
274 *status = 0x5;
275 return 0;
276 }
277
278 if (nd_cmd->context != fw->context) {
279 dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
280 __func__, nd_cmd->context, fw->context);
281 *status = 0x10007;
282 return 0;
283 }
284
285 /*
286 * check offset + len > size of fw storage
287 * check length is > max send length
288 */
289 if (nd_cmd->offset + nd_cmd->length > INTEL_FW_STORAGE_SIZE ||
290 nd_cmd->length > INTEL_FW_MAX_SEND_LEN) {
291 *status = 0x3;
292 dev_dbg(dev, "%s: buffer boundary violation\n", __func__);
293 return 0;
294 }
295
296 fw->size_received += nd_cmd->length;
297 dev_dbg(dev, "%s: copying %u bytes, %u bytes so far\n",
298 __func__, nd_cmd->length, fw->size_received);
299 *status = 0;
300 return 0;
301}
302
303static int nd_intel_test_finish_fw(struct nfit_test *t,
304 struct nd_intel_fw_finish_update *nd_cmd,
305 unsigned int buf_len, int idx)
306{
307 struct device *dev = &t->pdev.dev;
308 struct nfit_test_fw *fw = &t->fw[idx];
309
310 dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
311 __func__, t, nd_cmd, buf_len, idx);
312
313 if (fw->state == FW_STATE_UPDATED) {
314 /* update already done, need cold boot */
315 nd_cmd->status = 0x20007;
316 return 0;
317 }
318
319 dev_dbg(dev, "%s: context: %#x ctrl_flags: %#x\n",
320 __func__, nd_cmd->context, nd_cmd->ctrl_flags);
321
322 switch (nd_cmd->ctrl_flags) {
323 case 0: /* finish */
324 if (nd_cmd->context != fw->context) {
325 dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
326 __func__, nd_cmd->context,
327 fw->context);
328 nd_cmd->status = 0x10007;
329 return 0;
330 }
331 nd_cmd->status = 0;
332 fw->state = FW_STATE_VERIFY;
333 /* set 1 second of time for firmware "update" */
334 fw->end_time = jiffies + HZ;
335 break;
336
337 case 1: /* abort */
338 fw->size_received = 0;
339 /* successfully aborted status */
340 nd_cmd->status = 0x40007;
341 fw->state = FW_STATE_NEW;
342 dev_dbg(dev, "%s: abort successful\n", __func__);
343 break;
344
345 default: /* bad control flag */
346 dev_warn(dev, "%s: unknown control flag: %#x\n",
347 __func__, nd_cmd->ctrl_flags);
348 return -EINVAL;
349 }
350
351 return 0;
352}
353
354static int nd_intel_test_finish_query(struct nfit_test *t,
355 struct nd_intel_fw_finish_query *nd_cmd,
356 unsigned int buf_len, int idx)
357{
358 struct device *dev = &t->pdev.dev;
359 struct nfit_test_fw *fw = &t->fw[idx];
360
361 dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
362 __func__, t, nd_cmd, buf_len, idx);
363
364 if (buf_len < sizeof(*nd_cmd))
365 return -EINVAL;
366
367 if (nd_cmd->context != fw->context) {
368 dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
369 __func__, nd_cmd->context, fw->context);
370 nd_cmd->status = 0x10007;
371 return 0;
372 }
373
374 dev_dbg(dev, "%s context: %#x\n", __func__, nd_cmd->context);
375
376 switch (fw->state) {
377 case FW_STATE_NEW:
378 nd_cmd->updated_fw_rev = 0;
379 nd_cmd->status = 0;
380 dev_dbg(dev, "%s: new state\n", __func__);
381 break;
382
383 case FW_STATE_IN_PROGRESS:
384 /* sequencing error */
385 nd_cmd->status = 0x40007;
386 nd_cmd->updated_fw_rev = 0;
387 dev_dbg(dev, "%s: sequence error\n", __func__);
388 break;
389
390 case FW_STATE_VERIFY:
391 if (time_is_after_jiffies64(fw->end_time)) {
392 nd_cmd->updated_fw_rev = 0;
393 nd_cmd->status = 0x20007;
394 dev_dbg(dev, "%s: still verifying\n", __func__);
395 break;
396 }
397
398 dev_dbg(dev, "%s: transition out verify\n", __func__);
399 fw->state = FW_STATE_UPDATED;
400 /* we are going to fall through if it's "done" */
401 case FW_STATE_UPDATED:
402 nd_cmd->status = 0;
403 /* bogus test version */
404 fw->version = nd_cmd->updated_fw_rev =
405 INTEL_FW_FAKE_VERSION;
406 dev_dbg(dev, "%s: updated\n", __func__);
407 break;
408
409 default: /* we should never get here */
410 return -EINVAL;
411 }
412
413 return 0;
414}
415
184static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd, 416static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd,
185 unsigned int buf_len) 417 unsigned int buf_len)
186{ 418{
@@ -440,39 +672,66 @@ static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus,
440 return 0; 672 return 0;
441} 673}
442 674
443static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len) 675static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len,
676 struct nd_intel_smart *smart_data)
444{ 677{
445 static const struct nd_smart_payload smart_data = {
446 .flags = ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID
447 | ND_SMART_SPARES_VALID | ND_SMART_ALARM_VALID
448 | ND_SMART_USED_VALID | ND_SMART_SHUTDOWN_VALID,
449 .health = ND_SMART_NON_CRITICAL_HEALTH,
450 .temperature = 23 * 16,
451 .spares = 75,
452 .alarm_flags = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
453 .life_used = 5,
454 .shutdown_state = 0,
455 .vendor_size = 0,
456 };
457
458 if (buf_len < sizeof(*smart)) 678 if (buf_len < sizeof(*smart))
459 return -EINVAL; 679 return -EINVAL;
460 memcpy(smart->data, &smart_data, sizeof(smart_data)); 680 memcpy(smart, smart_data, sizeof(*smart));
461 return 0; 681 return 0;
462} 682}
463 683
464static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t, 684static int nfit_test_cmd_smart_threshold(
465 unsigned int buf_len) 685 struct nd_intel_smart_threshold *out,
686 unsigned int buf_len,
687 struct nd_intel_smart_threshold *smart_t)
466{ 688{
467 static const struct nd_smart_threshold_payload smart_t_data = {
468 .alarm_control = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
469 .temperature = 40 * 16,
470 .spares = 5,
471 };
472
473 if (buf_len < sizeof(*smart_t)) 689 if (buf_len < sizeof(*smart_t))
474 return -EINVAL; 690 return -EINVAL;
475 memcpy(smart_t->data, &smart_t_data, sizeof(smart_t_data)); 691 memcpy(out, smart_t, sizeof(*smart_t));
692 return 0;
693}
694
695static void smart_notify(struct device *bus_dev,
696 struct device *dimm_dev, struct nd_intel_smart *smart,
697 struct nd_intel_smart_threshold *thresh)
698{
699 dev_dbg(dimm_dev, "%s: alarm: %#x spares: %d (%d) mtemp: %d (%d) ctemp: %d (%d)\n",
700 __func__, thresh->alarm_control, thresh->spares,
701 smart->spares, thresh->media_temperature,
702 smart->media_temperature, thresh->ctrl_temperature,
703 smart->ctrl_temperature);
704 if (((thresh->alarm_control & ND_INTEL_SMART_SPARE_TRIP)
705 && smart->spares
706 <= thresh->spares)
707 || ((thresh->alarm_control & ND_INTEL_SMART_TEMP_TRIP)
708 && smart->media_temperature
709 >= thresh->media_temperature)
710 || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP)
711 && smart->ctrl_temperature
712 >= thresh->ctrl_temperature)) {
713 device_lock(bus_dev);
714 __acpi_nvdimm_notify(dimm_dev, 0x81);
715 device_unlock(bus_dev);
716 }
717}
718
719static int nfit_test_cmd_smart_set_threshold(
720 struct nd_intel_smart_set_threshold *in,
721 unsigned int buf_len,
722 struct nd_intel_smart_threshold *thresh,
723 struct nd_intel_smart *smart,
724 struct device *bus_dev, struct device *dimm_dev)
725{
726 unsigned int size;
727
728 size = sizeof(*in) - 4;
729 if (buf_len < size)
730 return -EINVAL;
731 memcpy(thresh->data, in, size);
732 in->status = 0;
733 smart_notify(bus_dev, dimm_dev, smart, thresh);
734
476 return 0; 735 return 0;
477} 736}
478 737
@@ -563,6 +822,52 @@ static int nfit_test_cmd_ars_inject_status(struct nfit_test *t,
563 return 0; 822 return 0;
564} 823}
565 824
825static int nd_intel_test_cmd_set_lss_status(struct nfit_test *t,
826 struct nd_intel_lss *nd_cmd, unsigned int buf_len)
827{
828 struct device *dev = &t->pdev.dev;
829
830 if (buf_len < sizeof(*nd_cmd))
831 return -EINVAL;
832
833 switch (nd_cmd->enable) {
834 case 0:
835 nd_cmd->status = 0;
836 dev_dbg(dev, "%s: Latch System Shutdown Status disabled\n",
837 __func__);
838 break;
839 case 1:
840 nd_cmd->status = 0;
841 dev_dbg(dev, "%s: Latch System Shutdown Status enabled\n",
842 __func__);
843 break;
844 default:
845 dev_warn(dev, "Unknown enable value: %#x\n", nd_cmd->enable);
846 nd_cmd->status = 0x3;
847 break;
848 }
849
850
851 return 0;
852}
853
854static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
855{
856 int i;
857
858 /* lookup per-dimm data */
859 for (i = 0; i < ARRAY_SIZE(handle); i++)
860 if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i])
861 break;
862 if (i >= ARRAY_SIZE(handle))
863 return -ENXIO;
864
865 if ((1 << func) & dimm_fail_cmd_flags[i])
866 return -EIO;
867
868 return i;
869}
870
566static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, 871static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
567 struct nvdimm *nvdimm, unsigned int cmd, void *buf, 872 struct nvdimm *nvdimm, unsigned int cmd, void *buf,
568 unsigned int buf_len, int *cmd_rc) 873 unsigned int buf_len, int *cmd_rc)
@@ -591,22 +896,57 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
591 func = call_pkg->nd_command; 896 func = call_pkg->nd_command;
592 if (call_pkg->nd_family != nfit_mem->family) 897 if (call_pkg->nd_family != nfit_mem->family)
593 return -ENOTTY; 898 return -ENOTTY;
899
900 i = get_dimm(nfit_mem, func);
901 if (i < 0)
902 return i;
903
904 switch (func) {
905 case ND_INTEL_ENABLE_LSS_STATUS:
906 return nd_intel_test_cmd_set_lss_status(t,
907 buf, buf_len);
908 case ND_INTEL_FW_GET_INFO:
909 return nd_intel_test_get_fw_info(t, buf,
910 buf_len, i - t->dcr_idx);
911 case ND_INTEL_FW_START_UPDATE:
912 return nd_intel_test_start_update(t, buf,
913 buf_len, i - t->dcr_idx);
914 case ND_INTEL_FW_SEND_DATA:
915 return nd_intel_test_send_data(t, buf,
916 buf_len, i - t->dcr_idx);
917 case ND_INTEL_FW_FINISH_UPDATE:
918 return nd_intel_test_finish_fw(t, buf,
919 buf_len, i - t->dcr_idx);
920 case ND_INTEL_FW_FINISH_QUERY:
921 return nd_intel_test_finish_query(t, buf,
922 buf_len, i - t->dcr_idx);
923 case ND_INTEL_SMART:
924 return nfit_test_cmd_smart(buf, buf_len,
925 &t->smart[i - t->dcr_idx]);
926 case ND_INTEL_SMART_THRESHOLD:
927 return nfit_test_cmd_smart_threshold(buf,
928 buf_len,
929 &t->smart_threshold[i -
930 t->dcr_idx]);
931 case ND_INTEL_SMART_SET_THRESHOLD:
932 return nfit_test_cmd_smart_set_threshold(buf,
933 buf_len,
934 &t->smart_threshold[i -
935 t->dcr_idx],
936 &t->smart[i - t->dcr_idx],
937 &t->pdev.dev, t->dimm_dev[i]);
938 default:
939 return -ENOTTY;
940 }
594 } 941 }
595 942
596 if (!test_bit(cmd, &cmd_mask) 943 if (!test_bit(cmd, &cmd_mask)
597 || !test_bit(func, &nfit_mem->dsm_mask)) 944 || !test_bit(func, &nfit_mem->dsm_mask))
598 return -ENOTTY; 945 return -ENOTTY;
599 946
600 /* lookup label space for the given dimm */ 947 i = get_dimm(nfit_mem, func);
601 for (i = 0; i < ARRAY_SIZE(handle); i++) 948 if (i < 0)
602 if (__to_nfit_memdev(nfit_mem)->device_handle == 949 return i;
603 handle[i])
604 break;
605 if (i >= ARRAY_SIZE(handle))
606 return -ENXIO;
607
608 if ((1 << func) & dimm_fail_cmd_flags[i])
609 return -EIO;
610 950
611 switch (func) { 951 switch (func) {
612 case ND_CMD_GET_CONFIG_SIZE: 952 case ND_CMD_GET_CONFIG_SIZE:
@@ -620,15 +960,6 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
620 rc = nfit_test_cmd_set_config_data(buf, buf_len, 960 rc = nfit_test_cmd_set_config_data(buf, buf_len,
621 t->label[i - t->dcr_idx]); 961 t->label[i - t->dcr_idx]);
622 break; 962 break;
623 case ND_CMD_SMART:
624 rc = nfit_test_cmd_smart(buf, buf_len);
625 break;
626 case ND_CMD_SMART_THRESHOLD:
627 rc = nfit_test_cmd_smart_threshold(buf, buf_len);
628 device_lock(&t->pdev.dev);
629 __acpi_nvdimm_notify(t->dimm_dev[i], 0x81);
630 device_unlock(&t->pdev.dev);
631 break;
632 default: 963 default:
633 return -ENOTTY; 964 return -ENOTTY;
634 } 965 }
@@ -872,6 +1203,44 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = {
872 NULL, 1203 NULL,
873}; 1204};
874 1205
1206static void smart_init(struct nfit_test *t)
1207{
1208 int i;
1209 const struct nd_intel_smart_threshold smart_t_data = {
1210 .alarm_control = ND_INTEL_SMART_SPARE_TRIP
1211 | ND_INTEL_SMART_TEMP_TRIP,
1212 .media_temperature = 40 * 16,
1213 .ctrl_temperature = 30 * 16,
1214 .spares = 5,
1215 };
1216 const struct nd_intel_smart smart_data = {
1217 .flags = ND_INTEL_SMART_HEALTH_VALID
1218 | ND_INTEL_SMART_SPARES_VALID
1219 | ND_INTEL_SMART_ALARM_VALID
1220 | ND_INTEL_SMART_USED_VALID
1221 | ND_INTEL_SMART_SHUTDOWN_VALID
1222 | ND_INTEL_SMART_MTEMP_VALID,
1223 .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
1224 .media_temperature = 23 * 16,
1225 .ctrl_temperature = 30 * 16,
1226 .pmic_temperature = 40 * 16,
1227 .spares = 75,
1228 .alarm_flags = ND_INTEL_SMART_SPARE_TRIP
1229 | ND_INTEL_SMART_TEMP_TRIP,
1230 .ait_status = 1,
1231 .life_used = 5,
1232 .shutdown_state = 0,
1233 .vendor_size = 0,
1234 .shutdown_count = 100,
1235 };
1236
1237 for (i = 0; i < t->num_dcr; i++) {
1238 memcpy(&t->smart[i], &smart_data, sizeof(smart_data));
1239 memcpy(&t->smart_threshold[i], &smart_t_data,
1240 sizeof(smart_t_data));
1241 }
1242}
1243
875static int nfit_test0_alloc(struct nfit_test *t) 1244static int nfit_test0_alloc(struct nfit_test *t)
876{ 1245{
877 size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA 1246 size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
@@ -881,7 +1250,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
881 window_size) * NUM_DCR 1250 window_size) * NUM_DCR
882 + sizeof(struct acpi_nfit_data_region) * NUM_BDW 1251 + sizeof(struct acpi_nfit_data_region) * NUM_BDW
883 + (sizeof(struct acpi_nfit_flush_address) 1252 + (sizeof(struct acpi_nfit_flush_address)
884 + sizeof(u64) * NUM_HINTS) * NUM_DCR; 1253 + sizeof(u64) * NUM_HINTS) * NUM_DCR
1254 + sizeof(struct acpi_nfit_capabilities);
885 int i; 1255 int i;
886 1256
887 t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); 1257 t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
@@ -939,6 +1309,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
939 return -ENOMEM; 1309 return -ENOMEM;
940 } 1310 }
941 1311
1312 smart_init(t);
942 return ars_state_init(&t->pdev.dev, &t->ars_state); 1313 return ars_state_init(&t->pdev.dev, &t->ars_state);
943} 1314}
944 1315
@@ -969,6 +1340,7 @@ static int nfit_test1_alloc(struct nfit_test *t)
969 if (!t->spa_set[1]) 1340 if (!t->spa_set[1])
970 return -ENOMEM; 1341 return -ENOMEM;
971 1342
1343 smart_init(t);
972 return ars_state_init(&t->pdev.dev, &t->ars_state); 1344 return ars_state_init(&t->pdev.dev, &t->ars_state);
973} 1345}
974 1346
@@ -993,6 +1365,7 @@ static void nfit_test0_setup(struct nfit_test *t)
993 struct acpi_nfit_control_region *dcr; 1365 struct acpi_nfit_control_region *dcr;
994 struct acpi_nfit_data_region *bdw; 1366 struct acpi_nfit_data_region *bdw;
995 struct acpi_nfit_flush_address *flush; 1367 struct acpi_nfit_flush_address *flush;
1368 struct acpi_nfit_capabilities *pcap;
996 unsigned int offset, i; 1369 unsigned int offset, i;
997 1370
998 /* 1371 /*
@@ -1500,8 +1873,16 @@ static void nfit_test0_setup(struct nfit_test *t)
1500 for (i = 0; i < NUM_HINTS; i++) 1873 for (i = 0; i < NUM_HINTS; i++)
1501 flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64); 1874 flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
1502 1875
1876 /* platform capabilities */
1877 pcap = nfit_buf + offset + flush_hint_size * 4;
1878 pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES;
1879 pcap->header.length = sizeof(*pcap);
1880 pcap->highest_capability = 1;
1881 pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH |
1882 ACPI_NFIT_CAPABILITY_MEM_FLUSH;
1883
1503 if (t->setup_hotplug) { 1884 if (t->setup_hotplug) {
1504 offset = offset + flush_hint_size * 4; 1885 offset = offset + flush_hint_size * 4 + sizeof(*pcap);
1505 /* dcr-descriptor4: blk */ 1886 /* dcr-descriptor4: blk */
1506 dcr = nfit_buf + offset; 1887 dcr = nfit_buf + offset;
1507 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1888 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -1642,17 +2023,24 @@ static void nfit_test0_setup(struct nfit_test *t)
1642 set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); 2023 set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
1643 set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); 2024 set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
1644 set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); 2025 set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
1645 set_bit(ND_CMD_SMART, &acpi_desc->dimm_cmd_force_en); 2026 set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en);
2027 set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
2028 set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
1646 set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); 2029 set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
1647 set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); 2030 set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
1648 set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); 2031 set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
1649 set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); 2032 set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
1650 set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en); 2033 set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en);
1651 set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
1652 set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en); 2034 set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en);
1653 set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en); 2035 set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
1654 set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en); 2036 set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
1655 set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en); 2037 set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en);
2038 set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en);
2039 set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en);
2040 set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en);
2041 set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en);
2042 set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en);
2043 set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
1656} 2044}
1657 2045
1658static void nfit_test1_setup(struct nfit_test *t) 2046static void nfit_test1_setup(struct nfit_test *t)
@@ -1750,6 +2138,7 @@ static void nfit_test1_setup(struct nfit_test *t)
1750 set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); 2138 set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
1751 set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); 2139 set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
1752 set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); 2140 set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
2141 set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
1753} 2142}
1754 2143
1755static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, 2144static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
@@ -2054,10 +2443,18 @@ static int nfit_test_probe(struct platform_device *pdev)
2054 sizeof(struct nfit_test_dcr *), GFP_KERNEL); 2443 sizeof(struct nfit_test_dcr *), GFP_KERNEL);
2055 nfit_test->dcr_dma = devm_kcalloc(dev, num, 2444 nfit_test->dcr_dma = devm_kcalloc(dev, num,
2056 sizeof(dma_addr_t), GFP_KERNEL); 2445 sizeof(dma_addr_t), GFP_KERNEL);
2446 nfit_test->smart = devm_kcalloc(dev, num,
2447 sizeof(struct nd_intel_smart), GFP_KERNEL);
2448 nfit_test->smart_threshold = devm_kcalloc(dev, num,
2449 sizeof(struct nd_intel_smart_threshold),
2450 GFP_KERNEL);
2451 nfit_test->fw = devm_kcalloc(dev, num,
2452 sizeof(struct nfit_test_fw), GFP_KERNEL);
2057 if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label 2453 if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label
2058 && nfit_test->label_dma && nfit_test->dcr 2454 && nfit_test->label_dma && nfit_test->dcr
2059 && nfit_test->dcr_dma && nfit_test->flush 2455 && nfit_test->dcr_dma && nfit_test->flush
2060 && nfit_test->flush_dma) 2456 && nfit_test->flush_dma
2457 && nfit_test->fw)
2061 /* pass */; 2458 /* pass */;
2062 else 2459 else
2063 return -ENOMEM; 2460 return -ENOMEM;
@@ -2159,6 +2556,11 @@ static __init int nfit_test_init(void)
2159{ 2556{
2160 int rc, i; 2557 int rc, i;
2161 2558
2559 pmem_test();
2560 libnvdimm_test();
2561 acpi_nfit_test();
2562 device_dax_test();
2563
2162 nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); 2564 nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
2163 2565
2164 nfit_wq = create_singlethread_workqueue("nfit"); 2566 nfit_wq = create_singlethread_workqueue("nfit");
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index 113b44675a71..428344519cdf 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -84,6 +84,140 @@ struct nd_cmd_ars_err_inj_stat {
84 } __packed record[0]; 84 } __packed record[0];
85} __packed; 85} __packed;
86 86
87#define ND_INTEL_SMART 1
88#define ND_INTEL_SMART_THRESHOLD 2
89#define ND_INTEL_ENABLE_LSS_STATUS 10
90#define ND_INTEL_FW_GET_INFO 12
91#define ND_INTEL_FW_START_UPDATE 13
92#define ND_INTEL_FW_SEND_DATA 14
93#define ND_INTEL_FW_FINISH_UPDATE 15
94#define ND_INTEL_FW_FINISH_QUERY 16
95#define ND_INTEL_SMART_SET_THRESHOLD 17
96
97#define ND_INTEL_SMART_HEALTH_VALID (1 << 0)
98#define ND_INTEL_SMART_SPARES_VALID (1 << 1)
99#define ND_INTEL_SMART_USED_VALID (1 << 2)
100#define ND_INTEL_SMART_MTEMP_VALID (1 << 3)
101#define ND_INTEL_SMART_CTEMP_VALID (1 << 4)
102#define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID (1 << 5)
103#define ND_INTEL_SMART_AIT_STATUS_VALID (1 << 6)
104#define ND_INTEL_SMART_PTEMP_VALID (1 << 7)
105#define ND_INTEL_SMART_ALARM_VALID (1 << 9)
106#define ND_INTEL_SMART_SHUTDOWN_VALID (1 << 10)
107#define ND_INTEL_SMART_VENDOR_VALID (1 << 11)
108#define ND_INTEL_SMART_SPARE_TRIP (1 << 0)
109#define ND_INTEL_SMART_TEMP_TRIP (1 << 1)
110#define ND_INTEL_SMART_CTEMP_TRIP (1 << 2)
111#define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0)
112#define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1)
113#define ND_INTEL_SMART_FATAL_HEALTH (1 << 2)
114
115struct nd_intel_smart {
116 __u32 status;
117 union {
118 struct {
119 __u32 flags;
120 __u8 reserved0[4];
121 __u8 health;
122 __u8 spares;
123 __u8 life_used;
124 __u8 alarm_flags;
125 __u16 media_temperature;
126 __u16 ctrl_temperature;
127 __u32 shutdown_count;
128 __u8 ait_status;
129 __u16 pmic_temperature;
130 __u8 reserved1[8];
131 __u8 shutdown_state;
132 __u32 vendor_size;
133 __u8 vendor_data[92];
134 } __packed;
135 __u8 data[128];
136 };
137} __packed;
138
139struct nd_intel_smart_threshold {
140 __u32 status;
141 union {
142 struct {
143 __u16 alarm_control;
144 __u8 spares;
145 __u16 media_temperature;
146 __u16 ctrl_temperature;
147 __u8 reserved[1];
148 } __packed;
149 __u8 data[8];
150 };
151} __packed;
152
153struct nd_intel_smart_set_threshold {
154 __u16 alarm_control;
155 __u8 spares;
156 __u16 media_temperature;
157 __u16 ctrl_temperature;
158 __u32 status;
159} __packed;
160
161#define INTEL_FW_STORAGE_SIZE 0x100000
162#define INTEL_FW_MAX_SEND_LEN 0xFFEC
163#define INTEL_FW_QUERY_INTERVAL 250000
164#define INTEL_FW_QUERY_MAX_TIME 3000000
165#define INTEL_FW_FIS_VERSION 0x0105
166#define INTEL_FW_FAKE_VERSION 0xffffffffabcd
167
168enum intel_fw_update_state {
169 FW_STATE_NEW = 0,
170 FW_STATE_IN_PROGRESS,
171 FW_STATE_VERIFY,
172 FW_STATE_UPDATED,
173};
174
175struct nd_intel_fw_info {
176 __u32 status;
177 __u32 storage_size;
178 __u32 max_send_len;
179 __u32 query_interval;
180 __u32 max_query_time;
181 __u8 update_cap;
182 __u8 reserved[3];
183 __u32 fis_version;
184 __u64 run_version;
185 __u64 updated_version;
186} __packed;
187
188struct nd_intel_fw_start {
189 __u32 status;
190 __u32 context;
191} __packed;
192
193/* this one has the output first because the variable input data size */
194struct nd_intel_fw_send_data {
195 __u32 context;
196 __u32 offset;
197 __u32 length;
198 __u8 data[0];
199/* this field is not declared due ot variable data from input */
200/* __u32 status; */
201} __packed;
202
203struct nd_intel_fw_finish_update {
204 __u8 ctrl_flags;
205 __u8 reserved[3];
206 __u32 context;
207 __u32 status;
208} __packed;
209
210struct nd_intel_fw_finish_query {
211 __u32 context;
212 __u32 status;
213 __u64 updated_fw_rev;
214} __packed;
215
216struct nd_intel_lss {
217 __u8 enable;
218 __u32 status;
219} __packed;
220
87union acpi_object; 221union acpi_object;
88typedef void *acpi_handle; 222typedef void *acpi_handle;
89 223
diff --git a/tools/testing/nvdimm/watermark.h b/tools/testing/nvdimm/watermark.h
new file mode 100644
index 000000000000..ed0528757bd4
--- /dev/null
+++ b/tools/testing/nvdimm/watermark.h
@@ -0,0 +1,21 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright(c) 2018 Intel Corporation. All rights reserved.
3#ifndef _TEST_NVDIMM_WATERMARK_H_
4#define _TEST_NVDIMM_WATERMARK_H_
5int pmem_test(void);
6int libnvdimm_test(void);
7int acpi_nfit_test(void);
8int device_dax_test(void);
9
10/*
11 * dummy routine for nfit_test to validate it is linking to the properly
12 * mocked module and not the standard one from the base tree.
13 */
14#define nfit_test_watermark(x) \
15int x##_test(void) \
16{ \
17 pr_debug("%s for nfit_test\n", KBUILD_MODNAME); \
18 return 0; \
19} \
20EXPORT_SYMBOL(x##_test)
21#endif /* _TEST_NVDIMM_WATERMARK_H_ */