aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-09-10 12:58:52 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-09-10 12:58:52 -0400
commit98ac9a608dc79ba8a20cee77fe959a6dfccdaa63 (patch)
tree79f9f8a750ba6335f95abe0d3b5cec8319d86dd2
parentb8db3714d7479881e23de20735f60f1733723241 (diff)
parent1e8b8d9619f9476e94f32eb20cab000d50d236aa (diff)
Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm fixes from Dan Williams: "nvdimm fixes for v4.8, two of them are tagged for -stable: - Fix devm_memremap_pages() to use track_pfn_insert(). Otherwise, DAX pmd mappings end up with an uncached pgprot, and unusable performance for the device-dax interface. The device-dax interface appeared in 4.7 so this is tagged for -stable. - Fix a couple VM_BUG_ON() checks in the show_smaps() path to understand DAX pmd entries. This fix is tagged for -stable. - Fix a mis-merge of the nfit machine-check handler to flip the polarity of an if() to match the final version of the patch that Vishal sent for 4.8-rc1. Without this the nfit machine check handler never detects / inserts new 'badblocks' entries which applications use to identify lost portions of files. - For test purposes, fix the nvdimm_clear_poison() path to operate on legacy / simulated nvdimm memory ranges. Without this fix a test can set badblocks, but never clear them on these ranges. - Fix the range checking done by dax_dev_pmd_fault(). This is not tagged for -stable since this problem is mitigated by specifying aligned resources at device-dax setup time. These patches have appeared in a next release over the past week. The recent rebase you can see in the timestamps was to drop an invalid fix as identified by the updated device-dax unit tests [1]. The -mm touches have an ack from Andrew" [1]: "[ndctl PATCH 0/3] device-dax test for recent kernel bugs" https://lists.01.org/pipermail/linux-nvdimm/2016-September/006855.html * 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: libnvdimm: allow legacy (e820) pmem region to clear bad blocks nfit, mce: Fix SPA matching logic in MCE handler mm: fix cache mode of dax pmd mappings mm: fix show_smap() for zone_device-pmd ranges dax: fix mapping size check
-rw-r--r--arch/x86/mm/pat.c17
-rw-r--r--drivers/acpi/nfit/mce.c2
-rw-r--r--drivers/dax/dax.c2
-rw-r--r--drivers/nvdimm/bus.c6
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--kernel/memremap.c9
-rw-r--r--mm/huge_memory.c4
7 files changed, 30 insertions, 12 deletions
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index ecb1b69c1651..170cc4ff057b 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -927,9 +927,10 @@ int track_pfn_copy(struct vm_area_struct *vma)
927} 927}
928 928
929/* 929/*
930 * prot is passed in as a parameter for the new mapping. If the vma has a 930 * prot is passed in as a parameter for the new mapping. If the vma has
931 * linear pfn mapping for the entire range reserve the entire vma range with 931 * a linear pfn mapping for the entire range, or no vma is provided,
932 * single reserve_pfn_range call. 932 * reserve the entire pfn + size range with single reserve_pfn_range
933 * call.
933 */ 934 */
934int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, 935int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
935 unsigned long pfn, unsigned long addr, unsigned long size) 936 unsigned long pfn, unsigned long addr, unsigned long size)
@@ -938,11 +939,12 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
938 enum page_cache_mode pcm; 939 enum page_cache_mode pcm;
939 940
940 /* reserve the whole chunk starting from paddr */ 941 /* reserve the whole chunk starting from paddr */
941 if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) { 942 if (!vma || (addr == vma->vm_start
943 && size == (vma->vm_end - vma->vm_start))) {
942 int ret; 944 int ret;
943 945
944 ret = reserve_pfn_range(paddr, size, prot, 0); 946 ret = reserve_pfn_range(paddr, size, prot, 0);
945 if (!ret) 947 if (ret == 0 && vma)
946 vma->vm_flags |= VM_PAT; 948 vma->vm_flags |= VM_PAT;
947 return ret; 949 return ret;
948 } 950 }
@@ -997,7 +999,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
997 resource_size_t paddr; 999 resource_size_t paddr;
998 unsigned long prot; 1000 unsigned long prot;
999 1001
1000 if (!(vma->vm_flags & VM_PAT)) 1002 if (vma && !(vma->vm_flags & VM_PAT))
1001 return; 1003 return;
1002 1004
1003 /* free the chunk starting from pfn or the whole chunk */ 1005 /* free the chunk starting from pfn or the whole chunk */
@@ -1011,7 +1013,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
1011 size = vma->vm_end - vma->vm_start; 1013 size = vma->vm_end - vma->vm_start;
1012 } 1014 }
1013 free_pfn_range(paddr, size); 1015 free_pfn_range(paddr, size);
1014 vma->vm_flags &= ~VM_PAT; 1016 if (vma)
1017 vma->vm_flags &= ~VM_PAT;
1015} 1018}
1016 1019
1017/* 1020/*
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index 4c745bf389fe..161f91539ae6 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -42,7 +42,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
42 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 42 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
43 struct acpi_nfit_system_address *spa = nfit_spa->spa; 43 struct acpi_nfit_system_address *spa = nfit_spa->spa;
44 44
45 if (nfit_spa_type(spa) == NFIT_SPA_PM) 45 if (nfit_spa_type(spa) != NFIT_SPA_PM)
46 continue; 46 continue;
47 /* find the spa that covers the mce addr */ 47 /* find the spa that covers the mce addr */
48 if (spa->address > mce->addr) 48 if (spa->address > mce->addr)
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 803f3953b341..29f600f2c447 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -459,7 +459,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
459 } 459 }
460 460
461 pgoff = linear_page_index(vma, pmd_addr); 461 pgoff = linear_page_index(vma, pmd_addr);
462 phys = pgoff_to_phys(dax_dev, pgoff, PAGE_SIZE); 462 phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE);
463 if (phys == -1) { 463 if (phys == -1) {
464 dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, 464 dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
465 pgoff); 465 pgoff);
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 458daf927336..935866fe5ec2 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -185,8 +185,12 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
185 return -ENXIO; 185 return -ENXIO;
186 186
187 nd_desc = nvdimm_bus->nd_desc; 187 nd_desc = nvdimm_bus->nd_desc;
188 /*
189 * if ndctl does not exist, it's PMEM_LEGACY and
190 * we want to just pretend everything is handled.
191 */
188 if (!nd_desc->ndctl) 192 if (!nd_desc->ndctl)
189 return -ENXIO; 193 return len;
190 194
191 memset(&ars_cap, 0, sizeof(ars_cap)); 195 memset(&ars_cap, 0, sizeof(ars_cap));
192 ars_cap.address = phys; 196 ars_cap.address = phys;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 187d84ef9de9..f6fa99eca515 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -581,6 +581,8 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
581 mss->anonymous_thp += HPAGE_PMD_SIZE; 581 mss->anonymous_thp += HPAGE_PMD_SIZE;
582 else if (PageSwapBacked(page)) 582 else if (PageSwapBacked(page))
583 mss->shmem_thp += HPAGE_PMD_SIZE; 583 mss->shmem_thp += HPAGE_PMD_SIZE;
584 else if (is_zone_device_page(page))
585 /* pass */;
584 else 586 else
585 VM_BUG_ON_PAGE(1, page); 587 VM_BUG_ON_PAGE(1, page);
586 smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd)); 588 smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 251d16b4cb41..b501e390bb34 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -247,6 +247,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
247 align_start = res->start & ~(SECTION_SIZE - 1); 247 align_start = res->start & ~(SECTION_SIZE - 1);
248 align_size = ALIGN(resource_size(res), SECTION_SIZE); 248 align_size = ALIGN(resource_size(res), SECTION_SIZE);
249 arch_remove_memory(align_start, align_size); 249 arch_remove_memory(align_start, align_size);
250 untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
250 pgmap_radix_release(res); 251 pgmap_radix_release(res);
251 dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, 252 dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
252 "%s: failed to free all reserved pages\n", __func__); 253 "%s: failed to free all reserved pages\n", __func__);
@@ -282,6 +283,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
282 struct percpu_ref *ref, struct vmem_altmap *altmap) 283 struct percpu_ref *ref, struct vmem_altmap *altmap)
283{ 284{
284 resource_size_t key, align_start, align_size, align_end; 285 resource_size_t key, align_start, align_size, align_end;
286 pgprot_t pgprot = PAGE_KERNEL;
285 struct dev_pagemap *pgmap; 287 struct dev_pagemap *pgmap;
286 struct page_map *page_map; 288 struct page_map *page_map;
287 int error, nid, is_ram; 289 int error, nid, is_ram;
@@ -351,6 +353,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
351 if (nid < 0) 353 if (nid < 0)
352 nid = numa_mem_id(); 354 nid = numa_mem_id();
353 355
356 error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start), 0,
357 align_size);
358 if (error)
359 goto err_pfn_remap;
360
354 error = arch_add_memory(nid, align_start, align_size, true); 361 error = arch_add_memory(nid, align_start, align_size, true);
355 if (error) 362 if (error)
356 goto err_add_memory; 363 goto err_add_memory;
@@ -371,6 +378,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
371 return __va(res->start); 378 return __va(res->start);
372 379
373 err_add_memory: 380 err_add_memory:
381 untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
382 err_pfn_remap:
374 err_radix: 383 err_radix:
375 pgmap_radix_release(res); 384 pgmap_radix_release(res);
376 devres_free(page_map); 385 devres_free(page_map);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2db2112aa31e..a6abd76baa72 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1078,7 +1078,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
1078 goto out; 1078 goto out;
1079 1079
1080 page = pmd_page(*pmd); 1080 page = pmd_page(*pmd);
1081 VM_BUG_ON_PAGE(!PageHead(page), page); 1081 VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
1082 if (flags & FOLL_TOUCH) 1082 if (flags & FOLL_TOUCH)
1083 touch_pmd(vma, addr, pmd); 1083 touch_pmd(vma, addr, pmd);
1084 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { 1084 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
@@ -1116,7 +1116,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
1116 } 1116 }
1117skip_mlock: 1117skip_mlock:
1118 page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; 1118 page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
1119 VM_BUG_ON_PAGE(!PageCompound(page), page); 1119 VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
1120 if (flags & FOLL_GET) 1120 if (flags & FOLL_GET)
1121 get_page(page); 1121 get_page(page);
1122 1122