diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-28 20:22:07 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-28 20:38:16 -0400 |
| commit | f0c98ebc57c2d5e535bc4f9167f35650d2ba3c90 (patch) | |
| tree | ad584aa321c0a2dbdaa49e0754f6c9f233b79a48 /drivers/nvdimm/region_devs.c | |
| parent | d94ba9e7d8d5c821d0442f13b30b0140c1109c38 (diff) | |
| parent | 0606263f24f3d64960de742c55894190b5df903b (diff) | |
Merge tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
- Replace pcommit with ADR / directed-flushing.
The pcommit instruction, which has not shipped on any product, is
deprecated. Instead, the requirement is that platforms implement
either ADR, or provide one or more flush addresses per nvdimm.
ADR (Asynchronous DRAM Refresh) flushes data in posted write buffers
to the memory controller on a power-fail event.
Flush addresses are defined in ACPI 6.x as an NVDIMM Firmware
Interface Table (NFIT) sub-structure: "Flush Hint Address Structure".
A flush hint is an mmio address that when written and fenced assures
that all previous posted writes targeting a given dimm have been
flushed to media.
- On-demand ARS (address range scrub).
Linux uses the results of the ACPI ARS commands to track bad blocks
in pmem devices. When latent errors are detected we re-scrub the
media to refresh the bad block list, userspace can also request a
re-scrub at any time.
- Support for the Microsoft DSM (device specific method) command
format.
- Support for EDK2/OVMF virtual disk device memory ranges.
- Various fixes and cleanups across the subsystem.
* tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (41 commits)
libnvdimm-btt: Delete an unnecessary check before the function call "__nd_device_register"
nfit: do an ARS scrub on hitting a latent media error
nfit: move to nfit/ sub-directory
nfit, libnvdimm: allow an ARS scrub to be triggered on demand
libnvdimm: register nvdimm_bus devices with an nd_bus driver
pmem: clarify a debug print in pmem_clear_poison
x86/insn: remove pcommit
Revert "KVM: x86: add pcommit support"
nfit, tools/testing/nvdimm/: unify shutdown paths
libnvdimm: move ->module to struct nvdimm_bus_descriptor
nfit: cleanup acpi_nfit_init calling convention
nfit: fix _FIT evaluation memory leak + use after free
tools/testing/nvdimm: add manufacturing_{date|location} dimm properties
tools/testing/nvdimm: add virtual ramdisk range
acpi, nfit: treat virtual ramdisk SPA as pmem region
pmem: kill __pmem address space
pmem: kill wmb_pmem()
libnvdimm, pmem: use nvdimm_flush() for namespace I/O writes
fs/dax: remove wmb_pmem()
libnvdimm, pmem: flush posted-write queues on shutdown
...
Diffstat (limited to 'drivers/nvdimm/region_devs.c')
| -rw-r--r-- | drivers/nvdimm/region_devs.c | 154 |
1 files changed, 148 insertions, 6 deletions
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 40fcfea26fbb..e8d5ba7b29af 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c | |||
| @@ -14,13 +14,97 @@ | |||
| 14 | #include <linux/highmem.h> | 14 | #include <linux/highmem.h> |
| 15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
| 16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
| 17 | #include <linux/hash.h> | ||
| 18 | #include <linux/pmem.h> | ||
| 17 | #include <linux/sort.h> | 19 | #include <linux/sort.h> |
| 18 | #include <linux/io.h> | 20 | #include <linux/io.h> |
| 19 | #include <linux/nd.h> | 21 | #include <linux/nd.h> |
| 20 | #include "nd-core.h" | 22 | #include "nd-core.h" |
| 21 | #include "nd.h" | 23 | #include "nd.h" |
| 22 | 24 | ||
| 25 | /* | ||
| 26 | * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is | ||
| 27 | * irrelevant. | ||
| 28 | */ | ||
| 29 | #include <linux/io-64-nonatomic-hi-lo.h> | ||
| 30 | |||
| 23 | static DEFINE_IDA(region_ida); | 31 | static DEFINE_IDA(region_ida); |
| 32 | static DEFINE_PER_CPU(int, flush_idx); | ||
| 33 | |||
| 34 | static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, | ||
| 35 | struct nd_region_data *ndrd) | ||
| 36 | { | ||
| 37 | int i, j; | ||
| 38 | |||
| 39 | dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm), | ||
| 40 | nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es"); | ||
| 41 | for (i = 0; i < nvdimm->num_flush; i++) { | ||
| 42 | struct resource *res = &nvdimm->flush_wpq[i]; | ||
| 43 | unsigned long pfn = PHYS_PFN(res->start); | ||
| 44 | void __iomem *flush_page; | ||
| 45 | |||
| 46 | /* check if flush hints share a page */ | ||
| 47 | for (j = 0; j < i; j++) { | ||
| 48 | struct resource *res_j = &nvdimm->flush_wpq[j]; | ||
| 49 | unsigned long pfn_j = PHYS_PFN(res_j->start); | ||
| 50 | |||
| 51 | if (pfn == pfn_j) | ||
| 52 | break; | ||
| 53 | } | ||
| 54 | |||
| 55 | if (j < i) | ||
| 56 | flush_page = (void __iomem *) ((unsigned long) | ||
| 57 | ndrd->flush_wpq[dimm][j] & PAGE_MASK); | ||
| 58 | else | ||
| 59 | flush_page = devm_nvdimm_ioremap(dev, | ||
| 60 | PHYS_PFN(pfn), PAGE_SIZE); | ||
| 61 | if (!flush_page) | ||
| 62 | return -ENXIO; | ||
| 63 | ndrd->flush_wpq[dimm][i] = flush_page | ||
| 64 | + (res->start & ~PAGE_MASK); | ||
| 65 | } | ||
| 66 | |||
| 67 | return 0; | ||
| 68 | } | ||
| 69 | |||
| 70 | int nd_region_activate(struct nd_region *nd_region) | ||
| 71 | { | ||
| 72 | int i, num_flush = 0; | ||
| 73 | struct nd_region_data *ndrd; | ||
| 74 | struct device *dev = &nd_region->dev; | ||
| 75 | size_t flush_data_size = sizeof(void *); | ||
| 76 | |||
| 77 | nvdimm_bus_lock(&nd_region->dev); | ||
| 78 | for (i = 0; i < nd_region->ndr_mappings; i++) { | ||
| 79 | struct nd_mapping *nd_mapping = &nd_region->mapping[i]; | ||
| 80 | struct nvdimm *nvdimm = nd_mapping->nvdimm; | ||
| 81 | |||
| 82 | /* at least one null hint slot per-dimm for the "no-hint" case */ | ||
| 83 | flush_data_size += sizeof(void *); | ||
| 84 | num_flush = min_not_zero(num_flush, nvdimm->num_flush); | ||
| 85 | if (!nvdimm->num_flush) | ||
| 86 | continue; | ||
| 87 | flush_data_size += nvdimm->num_flush * sizeof(void *); | ||
| 88 | } | ||
| 89 | nvdimm_bus_unlock(&nd_region->dev); | ||
| 90 | |||
| 91 | ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL); | ||
| 92 | if (!ndrd) | ||
| 93 | return -ENOMEM; | ||
| 94 | dev_set_drvdata(dev, ndrd); | ||
| 95 | |||
| 96 | ndrd->flush_mask = (1 << ilog2(num_flush)) - 1; | ||
| 97 | for (i = 0; i < nd_region->ndr_mappings; i++) { | ||
| 98 | struct nd_mapping *nd_mapping = &nd_region->mapping[i]; | ||
| 99 | struct nvdimm *nvdimm = nd_mapping->nvdimm; | ||
| 100 | int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd); | ||
| 101 | |||
| 102 | if (rc) | ||
| 103 | return rc; | ||
| 104 | } | ||
| 105 | |||
| 106 | return 0; | ||
| 107 | } | ||
| 24 | 108 | ||
| 25 | static void nd_region_release(struct device *dev) | 109 | static void nd_region_release(struct device *dev) |
| 26 | { | 110 | { |
| @@ -242,12 +326,12 @@ static DEVICE_ATTR_RO(available_size); | |||
| 242 | static ssize_t init_namespaces_show(struct device *dev, | 326 | static ssize_t init_namespaces_show(struct device *dev, |
| 243 | struct device_attribute *attr, char *buf) | 327 | struct device_attribute *attr, char *buf) |
| 244 | { | 328 | { |
| 245 | struct nd_region_namespaces *num_ns = dev_get_drvdata(dev); | 329 | struct nd_region_data *ndrd = dev_get_drvdata(dev); |
| 246 | ssize_t rc; | 330 | ssize_t rc; |
| 247 | 331 | ||
| 248 | nvdimm_bus_lock(dev); | 332 | nvdimm_bus_lock(dev); |
| 249 | if (num_ns) | 333 | if (ndrd) |
| 250 | rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count); | 334 | rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count); |
| 251 | else | 335 | else |
| 252 | rc = -ENXIO; | 336 | rc = -ENXIO; |
| 253 | nvdimm_bus_unlock(dev); | 337 | nvdimm_bus_unlock(dev); |
| @@ -433,8 +517,6 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, | |||
| 433 | 517 | ||
| 434 | if (is_nd_pmem(dev)) | 518 | if (is_nd_pmem(dev)) |
| 435 | return; | 519 | return; |
| 436 | |||
| 437 | to_nd_blk_region(dev)->disable(nvdimm_bus, dev); | ||
| 438 | } | 520 | } |
| 439 | if (dev->parent && is_nd_blk(dev->parent) && probe) { | 521 | if (dev->parent && is_nd_blk(dev->parent) && probe) { |
| 440 | nd_region = to_nd_region(dev->parent); | 522 | nd_region = to_nd_region(dev->parent); |
| @@ -698,7 +780,6 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, | |||
| 698 | if (ndbr) { | 780 | if (ndbr) { |
| 699 | nd_region = &ndbr->nd_region; | 781 | nd_region = &ndbr->nd_region; |
| 700 | ndbr->enable = ndbr_desc->enable; | 782 | ndbr->enable = ndbr_desc->enable; |
| 701 | ndbr->disable = ndbr_desc->disable; | ||
| 702 | ndbr->do_io = ndbr_desc->do_io; | 783 | ndbr->do_io = ndbr_desc->do_io; |
| 703 | } | 784 | } |
| 704 | region_buf = ndbr; | 785 | region_buf = ndbr; |
| @@ -794,6 +875,67 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus, | |||
| 794 | } | 875 | } |
| 795 | EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); | 876 | EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); |
| 796 | 877 | ||
| 878 | /** | ||
| 879 | * nvdimm_flush - flush any posted write queues between the cpu and pmem media | ||
| 880 | * @nd_region: blk or interleaved pmem region | ||
| 881 | */ | ||
| 882 | void nvdimm_flush(struct nd_region *nd_region) | ||
| 883 | { | ||
| 884 | struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev); | ||
| 885 | int i, idx; | ||
| 886 | |||
| 887 | /* | ||
| 888 | * Try to encourage some diversity in flush hint addresses | ||
| 889 | * across cpus assuming a limited number of flush hints. | ||
| 890 | */ | ||
| 891 | idx = this_cpu_read(flush_idx); | ||
| 892 | idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8)); | ||
| 893 | |||
| 894 | /* | ||
| 895 | * The first wmb() is needed to 'sfence' all previous writes | ||
| 896 | * such that they are architecturally visible for the platform | ||
| 897 | * buffer flush. Note that we've already arranged for pmem | ||
| 898 | * writes to avoid the cache via arch_memcpy_to_pmem(). The | ||
| 899 | * final wmb() ensures ordering for the NVDIMM flush write. | ||
| 900 | */ | ||
| 901 | wmb(); | ||
| 902 | for (i = 0; i < nd_region->ndr_mappings; i++) | ||
| 903 | if (ndrd->flush_wpq[i][0]) | ||
| 904 | writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]); | ||
| 905 | wmb(); | ||
| 906 | } | ||
| 907 | EXPORT_SYMBOL_GPL(nvdimm_flush); | ||
| 908 | |||
| 909 | /** | ||
| 910 | * nvdimm_has_flush - determine write flushing requirements | ||
| 911 | * @nd_region: blk or interleaved pmem region | ||
| 912 | * | ||
| 913 | * Returns 1 if writes require flushing | ||
| 914 | * Returns 0 if writes do not require flushing | ||
| 915 | * Returns -ENXIO if flushing capability can not be determined | ||
| 916 | */ | ||
| 917 | int nvdimm_has_flush(struct nd_region *nd_region) | ||
| 918 | { | ||
| 919 | struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev); | ||
| 920 | int i; | ||
| 921 | |||
| 922 | /* no nvdimm == flushing capability unknown */ | ||
| 923 | if (nd_region->ndr_mappings == 0) | ||
| 924 | return -ENXIO; | ||
| 925 | |||
| 926 | for (i = 0; i < nd_region->ndr_mappings; i++) | ||
| 927 | /* flush hints present, flushing required */ | ||
| 928 | if (ndrd->flush_wpq[i][0]) | ||
| 929 | return 1; | ||
| 930 | |||
| 931 | /* | ||
| 932 | * The platform defines dimm devices without hints, assume | ||
| 933 | * platform persistence mechanism like ADR | ||
| 934 | */ | ||
| 935 | return 0; | ||
| 936 | } | ||
| 937 | EXPORT_SYMBOL_GPL(nvdimm_has_flush); | ||
| 938 | |||
| 797 | void __exit nd_region_devs_exit(void) | 939 | void __exit nd_region_devs_exit(void) |
| 798 | { | 940 | { |
| 799 | ida_destroy(®ion_ida); | 941 | ida_destroy(®ion_ida); |
