aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/nvdimm/region_devs.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-28 20:22:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-28 20:38:16 -0400
commitf0c98ebc57c2d5e535bc4f9167f35650d2ba3c90 (patch)
treead584aa321c0a2dbdaa49e0754f6c9f233b79a48 /drivers/nvdimm/region_devs.c
parentd94ba9e7d8d5c821d0442f13b30b0140c1109c38 (diff)
parent0606263f24f3d64960de742c55894190b5df903b (diff)
Merge tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams: - Replace pcommit with ADR / directed-flushing. The pcommit instruction, which has not shipped on any product, is deprecated. Instead, the requirement is that platforms implement either ADR, or provide one or more flush addresses per nvdimm. ADR (Asynchronous DRAM Refresh) flushes data in posted write buffers to the memory controller on a power-fail event. Flush addresses are defined in ACPI 6.x as an NVDIMM Firmware Interface Table (NFIT) sub-structure: "Flush Hint Address Structure". A flush hint is an mmio address that when written and fenced assures that all previous posted writes targeting a given dimm have been flushed to media. - On-demand ARS (address range scrub). Linux uses the results of the ACPI ARS commands to track bad blocks in pmem devices. When latent errors are detected we re-scrub the media to refresh the bad block list, userspace can also request a re-scrub at any time. - Support for the Microsoft DSM (device specific method) command format. - Support for EDK2/OVMF virtual disk device memory ranges. - Various fixes and cleanups across the subsystem. * tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (41 commits) libnvdimm-btt: Delete an unnecessary check before the function call "__nd_device_register" nfit: do an ARS scrub on hitting a latent media error nfit: move to nfit/ sub-directory nfit, libnvdimm: allow an ARS scrub to be triggered on demand libnvdimm: register nvdimm_bus devices with an nd_bus driver pmem: clarify a debug print in pmem_clear_poison x86/insn: remove pcommit Revert "KVM: x86: add pcommit support" nfit, tools/testing/nvdimm/: unify shutdown paths libnvdimm: move ->module to struct nvdimm_bus_descriptor nfit: cleanup acpi_nfit_init calling convention nfit: fix _FIT evaluation memory leak + use after free tools/testing/nvdimm: add manufacturing_{date|location} dimm properties tools/testing/nvdimm: add virtual ramdisk range acpi, nfit: treat virtual ramdisk SPA as pmem region pmem: kill __pmem address space pmem: kill wmb_pmem() libnvdimm, pmem: use nvdimm_flush() for namespace I/O writes fs/dax: remove wmb_pmem() libnvdimm, pmem: flush posted-write queues on shutdown ...
Diffstat (limited to 'drivers/nvdimm/region_devs.c')
-rw-r--r--drivers/nvdimm/region_devs.c154
1 files changed, 148 insertions, 6 deletions
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 40fcfea26fbb..e8d5ba7b29af 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -14,13 +14,97 @@
14#include <linux/highmem.h> 14#include <linux/highmem.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/hash.h>
18#include <linux/pmem.h>
17#include <linux/sort.h> 19#include <linux/sort.h>
18#include <linux/io.h> 20#include <linux/io.h>
19#include <linux/nd.h> 21#include <linux/nd.h>
20#include "nd-core.h" 22#include "nd-core.h"
21#include "nd.h" 23#include "nd.h"
22 24
25/*
26 * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
27 * irrelevant.
28 */
29#include <linux/io-64-nonatomic-hi-lo.h>
30
23static DEFINE_IDA(region_ida); 31static DEFINE_IDA(region_ida);
32static DEFINE_PER_CPU(int, flush_idx);
33
34static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
35 struct nd_region_data *ndrd)
36{
37 int i, j;
38
39 dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm),
40 nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es");
41 for (i = 0; i < nvdimm->num_flush; i++) {
42 struct resource *res = &nvdimm->flush_wpq[i];
43 unsigned long pfn = PHYS_PFN(res->start);
44 void __iomem *flush_page;
45
46 /* check if flush hints share a page */
47 for (j = 0; j < i; j++) {
48 struct resource *res_j = &nvdimm->flush_wpq[j];
49 unsigned long pfn_j = PHYS_PFN(res_j->start);
50
51 if (pfn == pfn_j)
52 break;
53 }
54
55 if (j < i)
56 flush_page = (void __iomem *) ((unsigned long)
57 ndrd->flush_wpq[dimm][j] & PAGE_MASK);
58 else
59 flush_page = devm_nvdimm_ioremap(dev,
60 PHYS_PFN(pfn), PAGE_SIZE);
61 if (!flush_page)
62 return -ENXIO;
63 ndrd->flush_wpq[dimm][i] = flush_page
64 + (res->start & ~PAGE_MASK);
65 }
66
67 return 0;
68}
69
70int nd_region_activate(struct nd_region *nd_region)
71{
72 int i, num_flush = 0;
73 struct nd_region_data *ndrd;
74 struct device *dev = &nd_region->dev;
75 size_t flush_data_size = sizeof(void *);
76
77 nvdimm_bus_lock(&nd_region->dev);
78 for (i = 0; i < nd_region->ndr_mappings; i++) {
79 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
80 struct nvdimm *nvdimm = nd_mapping->nvdimm;
81
82 /* at least one null hint slot per-dimm for the "no-hint" case */
83 flush_data_size += sizeof(void *);
84 num_flush = min_not_zero(num_flush, nvdimm->num_flush);
85 if (!nvdimm->num_flush)
86 continue;
87 flush_data_size += nvdimm->num_flush * sizeof(void *);
88 }
89 nvdimm_bus_unlock(&nd_region->dev);
90
91 ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL);
92 if (!ndrd)
93 return -ENOMEM;
94 dev_set_drvdata(dev, ndrd);
95
96 ndrd->flush_mask = (1 << ilog2(num_flush)) - 1;
97 for (i = 0; i < nd_region->ndr_mappings; i++) {
98 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
99 struct nvdimm *nvdimm = nd_mapping->nvdimm;
100 int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd);
101
102 if (rc)
103 return rc;
104 }
105
106 return 0;
107}
24 108
25static void nd_region_release(struct device *dev) 109static void nd_region_release(struct device *dev)
26{ 110{
@@ -242,12 +326,12 @@ static DEVICE_ATTR_RO(available_size);
242static ssize_t init_namespaces_show(struct device *dev, 326static ssize_t init_namespaces_show(struct device *dev,
243 struct device_attribute *attr, char *buf) 327 struct device_attribute *attr, char *buf)
244{ 328{
245 struct nd_region_namespaces *num_ns = dev_get_drvdata(dev); 329 struct nd_region_data *ndrd = dev_get_drvdata(dev);
246 ssize_t rc; 330 ssize_t rc;
247 331
248 nvdimm_bus_lock(dev); 332 nvdimm_bus_lock(dev);
249 if (num_ns) 333 if (ndrd)
250 rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count); 334 rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count);
251 else 335 else
252 rc = -ENXIO; 336 rc = -ENXIO;
253 nvdimm_bus_unlock(dev); 337 nvdimm_bus_unlock(dev);
@@ -433,8 +517,6 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
433 517
434 if (is_nd_pmem(dev)) 518 if (is_nd_pmem(dev))
435 return; 519 return;
436
437 to_nd_blk_region(dev)->disable(nvdimm_bus, dev);
438 } 520 }
439 if (dev->parent && is_nd_blk(dev->parent) && probe) { 521 if (dev->parent && is_nd_blk(dev->parent) && probe) {
440 nd_region = to_nd_region(dev->parent); 522 nd_region = to_nd_region(dev->parent);
@@ -698,7 +780,6 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
698 if (ndbr) { 780 if (ndbr) {
699 nd_region = &ndbr->nd_region; 781 nd_region = &ndbr->nd_region;
700 ndbr->enable = ndbr_desc->enable; 782 ndbr->enable = ndbr_desc->enable;
701 ndbr->disable = ndbr_desc->disable;
702 ndbr->do_io = ndbr_desc->do_io; 783 ndbr->do_io = ndbr_desc->do_io;
703 } 784 }
704 region_buf = ndbr; 785 region_buf = ndbr;
@@ -794,6 +875,67 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
794} 875}
795EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); 876EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
796 877
878/**
879 * nvdimm_flush - flush any posted write queues between the cpu and pmem media
880 * @nd_region: blk or interleaved pmem region
881 */
882void nvdimm_flush(struct nd_region *nd_region)
883{
884 struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
885 int i, idx;
886
887 /*
888 * Try to encourage some diversity in flush hint addresses
889 * across cpus assuming a limited number of flush hints.
890 */
891 idx = this_cpu_read(flush_idx);
892 idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
893
894 /*
895 * The first wmb() is needed to 'sfence' all previous writes
896 * such that they are architecturally visible for the platform
897 * buffer flush. Note that we've already arranged for pmem
898 * writes to avoid the cache via arch_memcpy_to_pmem(). The
899 * final wmb() ensures ordering for the NVDIMM flush write.
900 */
901 wmb();
902 for (i = 0; i < nd_region->ndr_mappings; i++)
903 if (ndrd->flush_wpq[i][0])
904 writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]);
905 wmb();
906}
907EXPORT_SYMBOL_GPL(nvdimm_flush);
908
909/**
910 * nvdimm_has_flush - determine write flushing requirements
911 * @nd_region: blk or interleaved pmem region
912 *
913 * Returns 1 if writes require flushing
914 * Returns 0 if writes do not require flushing
915 * Returns -ENXIO if flushing capability can not be determined
916 */
917int nvdimm_has_flush(struct nd_region *nd_region)
918{
919 struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
920 int i;
921
922 /* no nvdimm == flushing capability unknown */
923 if (nd_region->ndr_mappings == 0)
924 return -ENXIO;
925
926 for (i = 0; i < nd_region->ndr_mappings; i++)
927 /* flush hints present, flushing required */
928 if (ndrd->flush_wpq[i][0])
929 return 1;
930
931 /*
932 * The platform defines dimm devices without hints, assume
933 * platform persistence mechanism like ADR
934 */
935 return 0;
936}
937EXPORT_SYMBOL_GPL(nvdimm_has_flush);
938
797void __exit nd_region_devs_exit(void) 939void __exit nd_region_devs_exit(void)
798{ 940{
799 ida_destroy(&region_ida); 941 ida_destroy(&region_ida);