aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoss Zwisler <ross.zwisler@linux.intel.com>2015-06-25 03:08:39 -0400
committerDan Williams <dan.j.williams@intel.com>2015-06-26 11:23:38 -0400
commit61031952f4c89dba1065f7a5b9419badb112554c (patch)
tree70a8b29fa96b6222bd19bb604d364bce404f14ae
parent74ae66c3b14ffa94c8d2dea201cdf8e6203d13d5 (diff)
arch, x86: pmem api for ensuring durability of persistent memory updates
Based on an original patch by Ross Zwisler [1]. Writes to persistent memory have the potential to be posted to cpu cache, cpu write buffers, and platform write buffers (memory controller) before being committed to persistent media. Provide apis, memcpy_to_pmem(), wmb_pmem(), and memremap_pmem(), to write data to pmem and assert that it is durable in PMEM (a persistent linear address range). A '__pmem' attribute is added so sparse can track proper usage of pointers to pmem. This continues the status quo of pmem being x86 only for 4.2, but reworks to ioremap, and wider implementation of memremap() will enable other archs in 4.3. [1]: https://lists.01.org/pipermail/linux-nvdimm/2015-May/000932.html Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com> [djbw: various reworks] Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/cacheflush.h72
-rw-r--r--arch/x86/include/asm/io.h6
-rw-r--r--drivers/nvdimm/pmem.c33
-rw-r--r--include/linux/compiler.h2
-rw-r--r--include/linux/pmem.h153
-rw-r--r--lib/Kconfig3
7 files changed, 257 insertions, 13 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1a2cbf641667..62564ddf7f78 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -27,6 +27,7 @@ config X86
27 select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS 27 select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
28 select ARCH_HAS_FAST_MULTIPLIER 28 select ARCH_HAS_FAST_MULTIPLIER
29 select ARCH_HAS_GCOV_PROFILE_ALL 29 select ARCH_HAS_GCOV_PROFILE_ALL
30 select ARCH_HAS_PMEM_API
30 select ARCH_MIGHT_HAVE_PC_PARPORT 31 select ARCH_MIGHT_HAVE_PC_PARPORT
31 select ARCH_MIGHT_HAVE_PC_SERIO 32 select ARCH_MIGHT_HAVE_PC_SERIO
32 select HAVE_AOUT if X86_32 33 select HAVE_AOUT if X86_32
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 47c8e32f621a..ec23bb753a3e 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -4,6 +4,7 @@
4/* Caches aren't brain-dead on the intel. */ 4/* Caches aren't brain-dead on the intel. */
5#include <asm-generic/cacheflush.h> 5#include <asm-generic/cacheflush.h>
6#include <asm/special_insns.h> 6#include <asm/special_insns.h>
7#include <asm/uaccess.h>
7 8
8/* 9/*
9 * The set_memory_* API can be used to change various attributes of a virtual 10 * The set_memory_* API can be used to change various attributes of a virtual
@@ -104,4 +105,75 @@ static inline int rodata_test(void)
104} 105}
105#endif 106#endif
106 107
108#ifdef ARCH_HAS_NOCACHE_UACCESS
109
110/**
111 * arch_memcpy_to_pmem - copy data to persistent memory
112 * @dst: destination buffer for the copy
113 * @src: source buffer for the copy
114 * @n: length of the copy in bytes
115 *
116 * Copy data to persistent memory media via non-temporal stores so that
117 * a subsequent arch_wmb_pmem() can flush cpu and memory controller
118 * write buffers to guarantee durability.
119 */
120static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
121 size_t n)
122{
123 int unwritten;
124
125 /*
126 * We are copying between two kernel buffers, if
127 * __copy_from_user_inatomic_nocache() returns an error (page
128 * fault) we would have already reported a general protection fault
129 * before the WARN+BUG.
130 */
131 unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
132 (void __user *) src, n);
133 if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
134 __func__, dst, src, unwritten))
135 BUG();
136}
137
138/**
139 * arch_wmb_pmem - synchronize writes to persistent memory
140 *
141 * After a series of arch_memcpy_to_pmem() operations this drains data
142 * from cpu write buffers and any platform (memory controller) buffers
143 * to ensure that written data is durable on persistent memory media.
144 */
145static inline void arch_wmb_pmem(void)
146{
147 /*
148 * wmb() to 'sfence' all previous writes such that they are
149 * architecturally visible to 'pcommit'. Note, that we've
150 * already arranged for pmem writes to avoid the cache via
151 * arch_memcpy_to_pmem().
152 */
153 wmb();
154 pcommit_sfence();
155}
156
157static inline bool __arch_has_wmb_pmem(void)
158{
159#ifdef CONFIG_X86_64
160 /*
161 * We require that wmb() be an 'sfence', that is only guaranteed on
162 * 64-bit builds
163 */
164 return static_cpu_has(X86_FEATURE_PCOMMIT);
165#else
166 return false;
167#endif
168}
169#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */
170extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n);
171extern void arch_wmb_pmem(void);
172
173static inline bool __arch_has_wmb_pmem(void)
174{
175 return false;
176}
177#endif
178
107#endif /* _ASM_X86_CACHEFLUSH_H */ 179#endif /* _ASM_X86_CACHEFLUSH_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 34a5b93704d3..c60c3f3b0183 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -247,6 +247,12 @@ static inline void flush_write_buffers(void)
247#endif 247#endif
248} 248}
249 249
250static inline void __pmem *arch_memremap_pmem(resource_size_t offset,
251 unsigned long size)
252{
253 return (void __force __pmem *) ioremap_cache(offset, size);
254}
255
250#endif /* __KERNEL__ */ 256#endif /* __KERNEL__ */
251 257
252extern void native_io_delay(void); 258extern void native_io_delay(void);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 42b766f33e59..ade9eb917a4d 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -23,6 +23,7 @@
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/moduleparam.h> 24#include <linux/moduleparam.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/pmem.h>
26#include <linux/nd.h> 27#include <linux/nd.h>
27#include "nd.h" 28#include "nd.h"
28 29
@@ -32,7 +33,7 @@ struct pmem_device {
32 33
33 /* One contiguous memory region per device */ 34 /* One contiguous memory region per device */
34 phys_addr_t phys_addr; 35 phys_addr_t phys_addr;
35 void *virt_addr; 36 void __pmem *virt_addr;
36 size_t size; 37 size_t size;
37}; 38};
38 39
@@ -44,13 +45,14 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
44{ 45{
45 void *mem = kmap_atomic(page); 46 void *mem = kmap_atomic(page);
46 size_t pmem_off = sector << 9; 47 size_t pmem_off = sector << 9;
48 void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
47 49
48 if (rw == READ) { 50 if (rw == READ) {
49 memcpy(mem + off, pmem->virt_addr + pmem_off, len); 51 memcpy_from_pmem(mem + off, pmem_addr, len);
50 flush_dcache_page(page); 52 flush_dcache_page(page);
51 } else { 53 } else {
52 flush_dcache_page(page); 54 flush_dcache_page(page);
53 memcpy(pmem->virt_addr + pmem_off, mem + off, len); 55 memcpy_to_pmem(pmem_addr, mem + off, len);
54 } 56 }
55 57
56 kunmap_atomic(mem); 58 kunmap_atomic(mem);
@@ -71,6 +73,10 @@ static void pmem_make_request(struct request_queue *q, struct bio *bio)
71 bio_data_dir(bio), iter.bi_sector); 73 bio_data_dir(bio), iter.bi_sector);
72 if (do_acct) 74 if (do_acct)
73 nd_iostat_end(bio, start); 75 nd_iostat_end(bio, start);
76
77 if (bio_data_dir(bio))
78 wmb_pmem();
79
74 bio_endio(bio, 0); 80 bio_endio(bio, 0);
75} 81}
76 82
@@ -94,7 +100,8 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector,
94 if (!pmem) 100 if (!pmem)
95 return -ENODEV; 101 return -ENODEV;
96 102
97 *kaddr = pmem->virt_addr + offset; 103 /* FIXME convert DAX to comprehend that this mapping has a lifetime */
104 *kaddr = (void __force *) pmem->virt_addr + offset;
98 *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT; 105 *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
99 106
100 return pmem->size - offset; 107 return pmem->size - offset;
@@ -118,6 +125,8 @@ static struct pmem_device *pmem_alloc(struct device *dev,
118 125
119 pmem->phys_addr = res->start; 126 pmem->phys_addr = res->start;
120 pmem->size = resource_size(res); 127 pmem->size = resource_size(res);
128 if (!arch_has_pmem_api())
129 dev_warn(dev, "unable to guarantee persistence of writes\n");
121 130
122 if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) { 131 if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) {
123 dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", 132 dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
@@ -126,11 +135,7 @@ static struct pmem_device *pmem_alloc(struct device *dev,
126 return ERR_PTR(-EBUSY); 135 return ERR_PTR(-EBUSY);
127 } 136 }
128 137
129 /* 138 pmem->virt_addr = memremap_pmem(pmem->phys_addr, pmem->size);
130 * Map the memory as non-cachable, as we can't write back the contents
131 * of the CPU caches in case of a crash.
132 */
133 pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size);
134 if (!pmem->virt_addr) { 139 if (!pmem->virt_addr) {
135 release_mem_region(pmem->phys_addr, pmem->size); 140 release_mem_region(pmem->phys_addr, pmem->size);
136 kfree(pmem); 141 kfree(pmem);
@@ -195,16 +200,18 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns,
195 } 200 }
196 201
197 if (rw == READ) 202 if (rw == READ)
198 memcpy(buf, pmem->virt_addr + offset, size); 203 memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
199 else 204 else {
200 memcpy(pmem->virt_addr + offset, buf, size); 205 memcpy_to_pmem(pmem->virt_addr + offset, buf, size);
206 wmb_pmem();
207 }
201 208
202 return 0; 209 return 0;
203} 210}
204 211
205static void pmem_free(struct pmem_device *pmem) 212static void pmem_free(struct pmem_device *pmem)
206{ 213{
207 iounmap(pmem->virt_addr); 214 memunmap_pmem(pmem->virt_addr);
208 release_mem_region(pmem->phys_addr, pmem->size); 215 release_mem_region(pmem->phys_addr, pmem->size);
209 kfree(pmem); 216 kfree(pmem);
210} 217}
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 867722591be2..9a528d945498 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -21,6 +21,7 @@
21# define __rcu __attribute__((noderef, address_space(4))) 21# define __rcu __attribute__((noderef, address_space(4)))
22#else 22#else
23# define __rcu 23# define __rcu
24# define __pmem __attribute__((noderef, address_space(5)))
24#endif 25#endif
25extern void __chk_user_ptr(const volatile void __user *); 26extern void __chk_user_ptr(const volatile void __user *);
26extern void __chk_io_ptr(const volatile void __iomem *); 27extern void __chk_io_ptr(const volatile void __iomem *);
@@ -42,6 +43,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
42# define __cond_lock(x,c) (c) 43# define __cond_lock(x,c) (c)
43# define __percpu 44# define __percpu
44# define __rcu 45# define __rcu
46# define __pmem
45#endif 47#endif
46 48
47/* Indirect macros required for expanded argument pasting, eg. __LINE__. */ 49/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
new file mode 100644
index 000000000000..f6481a0b1d4f
--- /dev/null
+++ b/include/linux/pmem.h
@@ -0,0 +1,153 @@
1/*
2 * Copyright(c) 2015 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#ifndef __PMEM_H__
14#define __PMEM_H__
15
16#include <linux/io.h>
17
18#ifdef CONFIG_ARCH_HAS_PMEM_API
19#include <asm/cacheflush.h>
20#else
21static inline void arch_wmb_pmem(void)
22{
23 BUG();
24}
25
26static inline bool __arch_has_wmb_pmem(void)
27{
28 return false;
29}
30
31static inline void __pmem *arch_memremap_pmem(resource_size_t offset,
32 unsigned long size)
33{
34 return NULL;
35}
36
37static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
38 size_t n)
39{
40 BUG();
41}
42#endif
43
44/*
45 * Architectures that define ARCH_HAS_PMEM_API must provide
46 * implementations for arch_memremap_pmem(), arch_memcpy_to_pmem(),
47 * arch_wmb_pmem(), and __arch_has_wmb_pmem().
48 */
49
50static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
51{
52 memcpy(dst, (void __force const *) src, size);
53}
54
55static inline void memunmap_pmem(void __pmem *addr)
56{
57 iounmap((void __force __iomem *) addr);
58}
59
60/**
61 * arch_has_wmb_pmem - true if wmb_pmem() ensures durability
62 *
63 * For a given cpu implementation within an architecture it is possible
64 * that wmb_pmem() resolves to a nop. In the case this returns
65 * false, pmem api users are unable to ensure durability and may want to
66 * fall back to a different data consistency model, or otherwise notify
67 * the user.
68 */
69static inline bool arch_has_wmb_pmem(void)
70{
71 if (IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API))
72 return __arch_has_wmb_pmem();
73 return false;
74}
75
76static inline bool arch_has_pmem_api(void)
77{
78 return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && arch_has_wmb_pmem();
79}
80
81/*
82 * These defaults seek to offer decent performance and minimize the
83 * window between i/o completion and writes being durable on media.
84 * However, it is undefined / architecture specific whether
85 * default_memremap_pmem + default_memcpy_to_pmem is sufficient for
86 * making data durable relative to i/o completion.
87 */
88static void default_memcpy_to_pmem(void __pmem *dst, const void *src,
89 size_t size)
90{
91 memcpy((void __force *) dst, src, size);
92}
93
94static void __pmem *default_memremap_pmem(resource_size_t offset,
95 unsigned long size)
96{
97 /* TODO: convert to ioremap_wt() */
98 return (void __pmem __force *)ioremap_nocache(offset, size);
99}
100
101/**
102 * memremap_pmem - map physical persistent memory for pmem api
103 * @offset: physical address of persistent memory
104 * @size: size of the mapping
105 *
106 * Establish a mapping of the architecture specific memory type expected
107 * by memcpy_to_pmem() and wmb_pmem(). For example, it may be
108 * the case that an uncacheable or writethrough mapping is sufficient,
109 * or a writeback mapping provided memcpy_to_pmem() and
110 * wmb_pmem() arrange for the data to be written through the
111 * cache to persistent media.
112 */
113static inline void __pmem *memremap_pmem(resource_size_t offset,
114 unsigned long size)
115{
116 if (arch_has_pmem_api())
117 return arch_memremap_pmem(offset, size);
118 return default_memremap_pmem(offset, size);
119}
120
121/**
122 * memcpy_to_pmem - copy data to persistent memory
123 * @dst: destination buffer for the copy
124 * @src: source buffer for the copy
125 * @n: length of the copy in bytes
126 *
127 * Perform a memory copy that results in the destination of the copy
128 * being effectively evicted from, or never written to, the processor
129 * cache hierarchy after the copy completes. After memcpy_to_pmem()
130 * data may still reside in cpu or platform buffers, so this operation
131 * must be followed by a wmb_pmem().
132 */
133static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n)
134{
135 if (arch_has_pmem_api())
136 arch_memcpy_to_pmem(dst, src, n);
137 else
138 default_memcpy_to_pmem(dst, src, n);
139}
140
141/**
142 * wmb_pmem - synchronize writes to persistent memory
143 *
144 * After a series of memcpy_to_pmem() operations this drains data from
145 * cpu write buffers and any platform (memory controller) buffers to
146 * ensure that written data is durable on persistent memory media.
147 */
148static inline void wmb_pmem(void)
149{
150 if (arch_has_pmem_api())
151 arch_wmb_pmem();
152}
153#endif /* __PMEM_H__ */
diff --git a/lib/Kconfig b/lib/Kconfig
index 601965a948e8..d27c13a91c28 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -522,4 +522,7 @@ source "lib/fonts/Kconfig"
522config ARCH_HAS_SG_CHAIN 522config ARCH_HAS_SG_CHAIN
523 def_bool n 523 def_bool n
524 524
525config ARCH_HAS_PMEM_API
526 bool
527
525endmenu 528endmenu