summaryrefslogtreecommitdiffstats
path: root/drivers/nvdimm
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2016-05-18 12:59:34 -0400
committerDan Williams <dan.j.williams@intel.com>2016-05-18 12:59:34 -0400
commit594d6d96ea042366878aa7dc7f5711b8c245db5a (patch)
tree1b7333bf5b5c1147e136f050d686ab4f888ab85f /drivers/nvdimm
parent1b8d2afde54fade94339f573c4e05644f9ae9866 (diff)
parent45a0dac0451136fa7ae34a6fea53ef6a136287ce (diff)
Merge branch 'for-4.7/dax' into libnvdimm-for-next
Diffstat (limited to 'drivers/nvdimm')
-rw-r--r--drivers/nvdimm/Kconfig13
-rw-r--r--drivers/nvdimm/Makefile1
-rw-r--r--drivers/nvdimm/blk.c208
-rw-r--r--drivers/nvdimm/btt.c20
-rw-r--r--drivers/nvdimm/btt_devs.c24
-rw-r--r--drivers/nvdimm/bus.c4
-rw-r--r--drivers/nvdimm/claim.c63
-rw-r--r--drivers/nvdimm/dax_devs.c99
-rw-r--r--drivers/nvdimm/namespace_devs.c38
-rw-r--r--drivers/nvdimm/nd-core.h1
-rw-r--r--drivers/nvdimm/nd.h72
-rw-r--r--drivers/nvdimm/pfn.h4
-rw-r--r--drivers/nvdimm/pfn_devs.c319
-rw-r--r--drivers/nvdimm/pmem.c492
-rw-r--r--drivers/nvdimm/region.c2
-rw-r--r--drivers/nvdimm/region_devs.c29
16 files changed, 803 insertions, 586 deletions
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 53c11621d5b1..7c8a3bf07884 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -88,4 +88,17 @@ config NVDIMM_PFN
88 88
89 Select Y if unsure 89 Select Y if unsure
90 90
91config NVDIMM_DAX
92 bool "NVDIMM DAX: Raw access to persistent memory"
93 default LIBNVDIMM
94 depends on NVDIMM_PFN
95 help
96 Support raw device dax access to a persistent memory
97 namespace. For environments that want to hard partition
98 peristent memory, this capability provides a mechanism to
99 sub-divide a namespace into character devices that can only be
100 accessed via DAX (mmap(2)).
101
102 Select Y if unsure
103
91endif 104endif
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index ea84d3c4e8e5..909554c3f955 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -23,3 +23,4 @@ libnvdimm-y += label.o
23libnvdimm-$(CONFIG_ND_CLAIM) += claim.o 23libnvdimm-$(CONFIG_ND_CLAIM) += claim.o
24libnvdimm-$(CONFIG_BTT) += btt_devs.o 24libnvdimm-$(CONFIG_BTT) += btt_devs.o
25libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o 25libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o
26libnvdimm-$(CONFIG_NVDIMM_DAX) += dax_devs.o
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index e9ff9229d942..495e06d9f7e7 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -21,19 +21,19 @@
21#include <linux/sizes.h> 21#include <linux/sizes.h>
22#include "nd.h" 22#include "nd.h"
23 23
24struct nd_blk_device { 24static u32 nsblk_meta_size(struct nd_namespace_blk *nsblk)
25 struct request_queue *queue; 25{
26 struct gendisk *disk; 26 return nsblk->lbasize - ((nsblk->lbasize >= 4096) ? 4096 : 512);
27 struct nd_namespace_blk *nsblk; 27}
28 struct nd_blk_region *ndbr; 28
29 size_t disk_size; 29static u32 nsblk_internal_lbasize(struct nd_namespace_blk *nsblk)
30 u32 sector_size; 30{
31 u32 internal_lbasize; 31 return roundup(nsblk->lbasize, INT_LBASIZE_ALIGNMENT);
32}; 32}
33 33
34static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev) 34static u32 nsblk_sector_size(struct nd_namespace_blk *nsblk)
35{ 35{
36 return blk_dev->nsblk->lbasize - blk_dev->sector_size; 36 return nsblk->lbasize - nsblk_meta_size(nsblk);
37} 37}
38 38
39static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk, 39static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk,
@@ -57,20 +57,29 @@ static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk,
57 return SIZE_MAX; 57 return SIZE_MAX;
58} 58}
59 59
60static struct nd_blk_region *to_ndbr(struct nd_namespace_blk *nsblk)
61{
62 struct nd_region *nd_region;
63 struct device *parent;
64
65 parent = nsblk->common.dev.parent;
66 nd_region = container_of(parent, struct nd_region, dev);
67 return container_of(nd_region, struct nd_blk_region, nd_region);
68}
69
60#ifdef CONFIG_BLK_DEV_INTEGRITY 70#ifdef CONFIG_BLK_DEV_INTEGRITY
61static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, 71static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
62 struct bio_integrity_payload *bip, u64 lba, 72 struct bio_integrity_payload *bip, u64 lba, int rw)
63 int rw)
64{ 73{
65 unsigned int len = nd_blk_meta_size(blk_dev); 74 struct nd_blk_region *ndbr = to_ndbr(nsblk);
75 unsigned int len = nsblk_meta_size(nsblk);
66 resource_size_t dev_offset, ns_offset; 76 resource_size_t dev_offset, ns_offset;
67 struct nd_namespace_blk *nsblk; 77 u32 internal_lbasize, sector_size;
68 struct nd_blk_region *ndbr;
69 int err = 0; 78 int err = 0;
70 79
71 nsblk = blk_dev->nsblk; 80 internal_lbasize = nsblk_internal_lbasize(nsblk);
72 ndbr = blk_dev->ndbr; 81 sector_size = nsblk_sector_size(nsblk);
73 ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size; 82 ns_offset = lba * internal_lbasize + sector_size;
74 dev_offset = to_dev_offset(nsblk, ns_offset, len); 83 dev_offset = to_dev_offset(nsblk, ns_offset, len);
75 if (dev_offset == SIZE_MAX) 84 if (dev_offset == SIZE_MAX)
76 return -EIO; 85 return -EIO;
@@ -104,25 +113,26 @@ static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev,
104} 113}
105 114
106#else /* CONFIG_BLK_DEV_INTEGRITY */ 115#else /* CONFIG_BLK_DEV_INTEGRITY */
107static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, 116static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
108 struct bio_integrity_payload *bip, u64 lba, 117 struct bio_integrity_payload *bip, u64 lba, int rw)
109 int rw)
110{ 118{
111 return 0; 119 return 0;
112} 120}
113#endif 121#endif
114 122
115static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, 123static int nsblk_do_bvec(struct nd_namespace_blk *nsblk,
116 struct bio_integrity_payload *bip, struct page *page, 124 struct bio_integrity_payload *bip, struct page *page,
117 unsigned int len, unsigned int off, int rw, 125 unsigned int len, unsigned int off, int rw, sector_t sector)
118 sector_t sector)
119{ 126{
120 struct nd_blk_region *ndbr = blk_dev->ndbr; 127 struct nd_blk_region *ndbr = to_ndbr(nsblk);
121 resource_size_t dev_offset, ns_offset; 128 resource_size_t dev_offset, ns_offset;
129 u32 internal_lbasize, sector_size;
122 int err = 0; 130 int err = 0;
123 void *iobuf; 131 void *iobuf;
124 u64 lba; 132 u64 lba;
125 133
134 internal_lbasize = nsblk_internal_lbasize(nsblk);
135 sector_size = nsblk_sector_size(nsblk);
126 while (len) { 136 while (len) {
127 unsigned int cur_len; 137 unsigned int cur_len;
128 138
@@ -132,11 +142,11 @@ static int nd_blk_do_bvec(struct nd_blk_device *blk_dev,
132 * Block Window setup/move steps. the do_io routine is capable 142 * Block Window setup/move steps. the do_io routine is capable
133 * of handling len <= PAGE_SIZE. 143 * of handling len <= PAGE_SIZE.
134 */ 144 */
135 cur_len = bip ? min(len, blk_dev->sector_size) : len; 145 cur_len = bip ? min(len, sector_size) : len;
136 146
137 lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size); 147 lba = div_u64(sector << SECTOR_SHIFT, sector_size);
138 ns_offset = lba * blk_dev->internal_lbasize; 148 ns_offset = lba * internal_lbasize;
139 dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len); 149 dev_offset = to_dev_offset(nsblk, ns_offset, cur_len);
140 if (dev_offset == SIZE_MAX) 150 if (dev_offset == SIZE_MAX)
141 return -EIO; 151 return -EIO;
142 152
@@ -147,13 +157,13 @@ static int nd_blk_do_bvec(struct nd_blk_device *blk_dev,
147 return err; 157 return err;
148 158
149 if (bip) { 159 if (bip) {
150 err = nd_blk_rw_integrity(blk_dev, bip, lba, rw); 160 err = nd_blk_rw_integrity(nsblk, bip, lba, rw);
151 if (err) 161 if (err)
152 return err; 162 return err;
153 } 163 }
154 len -= cur_len; 164 len -= cur_len;
155 off += cur_len; 165 off += cur_len;
156 sector += blk_dev->sector_size >> SECTOR_SHIFT; 166 sector += sector_size >> SECTOR_SHIFT;
157 } 167 }
158 168
159 return err; 169 return err;
@@ -161,10 +171,8 @@ static int nd_blk_do_bvec(struct nd_blk_device *blk_dev,
161 171
162static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio) 172static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
163{ 173{
164 struct block_device *bdev = bio->bi_bdev;
165 struct gendisk *disk = bdev->bd_disk;
166 struct bio_integrity_payload *bip; 174 struct bio_integrity_payload *bip;
167 struct nd_blk_device *blk_dev; 175 struct nd_namespace_blk *nsblk;
168 struct bvec_iter iter; 176 struct bvec_iter iter;
169 unsigned long start; 177 unsigned long start;
170 struct bio_vec bvec; 178 struct bio_vec bvec;
@@ -183,17 +191,17 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
183 } 191 }
184 192
185 bip = bio_integrity(bio); 193 bip = bio_integrity(bio);
186 blk_dev = disk->private_data; 194 nsblk = q->queuedata;
187 rw = bio_data_dir(bio); 195 rw = bio_data_dir(bio);
188 do_acct = nd_iostat_start(bio, &start); 196 do_acct = nd_iostat_start(bio, &start);
189 bio_for_each_segment(bvec, bio, iter) { 197 bio_for_each_segment(bvec, bio, iter) {
190 unsigned int len = bvec.bv_len; 198 unsigned int len = bvec.bv_len;
191 199
192 BUG_ON(len > PAGE_SIZE); 200 BUG_ON(len > PAGE_SIZE);
193 err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len, 201 err = nsblk_do_bvec(nsblk, bip, bvec.bv_page, len,
194 bvec.bv_offset, rw, iter.bi_sector); 202 bvec.bv_offset, rw, iter.bi_sector);
195 if (err) { 203 if (err) {
196 dev_info(&blk_dev->nsblk->common.dev, 204 dev_dbg(&nsblk->common.dev,
197 "io error in %s sector %lld, len %d,\n", 205 "io error in %s sector %lld, len %d,\n",
198 (rw == READ) ? "READ" : "WRITE", 206 (rw == READ) ? "READ" : "WRITE",
199 (unsigned long long) iter.bi_sector, len); 207 (unsigned long long) iter.bi_sector, len);
@@ -209,17 +217,16 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
209 return BLK_QC_T_NONE; 217 return BLK_QC_T_NONE;
210} 218}
211 219
212static int nd_blk_rw_bytes(struct nd_namespace_common *ndns, 220static int nsblk_rw_bytes(struct nd_namespace_common *ndns,
213 resource_size_t offset, void *iobuf, size_t n, int rw) 221 resource_size_t offset, void *iobuf, size_t n, int rw)
214{ 222{
215 struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim); 223 struct nd_namespace_blk *nsblk = to_nd_namespace_blk(&ndns->dev);
216 struct nd_namespace_blk *nsblk = blk_dev->nsblk; 224 struct nd_blk_region *ndbr = to_ndbr(nsblk);
217 struct nd_blk_region *ndbr = blk_dev->ndbr;
218 resource_size_t dev_offset; 225 resource_size_t dev_offset;
219 226
220 dev_offset = to_dev_offset(nsblk, offset, n); 227 dev_offset = to_dev_offset(nsblk, offset, n);
221 228
222 if (unlikely(offset + n > blk_dev->disk_size)) { 229 if (unlikely(offset + n > nsblk->size)) {
223 dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); 230 dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
224 return -EFAULT; 231 return -EFAULT;
225 } 232 }
@@ -235,51 +242,65 @@ static const struct block_device_operations nd_blk_fops = {
235 .revalidate_disk = nvdimm_revalidate_disk, 242 .revalidate_disk = nvdimm_revalidate_disk,
236}; 243};
237 244
238static int nd_blk_attach_disk(struct nd_namespace_common *ndns, 245static void nd_blk_release_queue(void *q)
239 struct nd_blk_device *blk_dev) 246{
247 blk_cleanup_queue(q);
248}
249
250static void nd_blk_release_disk(void *disk)
251{
252 del_gendisk(disk);
253 put_disk(disk);
254}
255
256static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
240{ 257{
258 struct device *dev = &nsblk->common.dev;
241 resource_size_t available_disk_size; 259 resource_size_t available_disk_size;
260 struct request_queue *q;
242 struct gendisk *disk; 261 struct gendisk *disk;
243 u64 internal_nlba; 262 u64 internal_nlba;
244 263
245 internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize); 264 internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk));
246 available_disk_size = internal_nlba * blk_dev->sector_size; 265 available_disk_size = internal_nlba * nsblk_sector_size(nsblk);
247 266
248 blk_dev->queue = blk_alloc_queue(GFP_KERNEL); 267 q = blk_alloc_queue(GFP_KERNEL);
249 if (!blk_dev->queue) 268 if (!q)
250 return -ENOMEM; 269 return -ENOMEM;
270 if (devm_add_action(dev, nd_blk_release_queue, q)) {
271 blk_cleanup_queue(q);
272 return -ENOMEM;
273 }
251 274
252 blk_queue_make_request(blk_dev->queue, nd_blk_make_request); 275 blk_queue_make_request(q, nd_blk_make_request);
253 blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX); 276 blk_queue_max_hw_sectors(q, UINT_MAX);
254 blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY); 277 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
255 blk_queue_logical_block_size(blk_dev->queue, blk_dev->sector_size); 278 blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
256 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue); 279 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
280 q->queuedata = nsblk;
257 281
258 disk = blk_dev->disk = alloc_disk(0); 282 disk = alloc_disk(0);
259 if (!disk) { 283 if (!disk)
260 blk_cleanup_queue(blk_dev->queue); 284 return -ENOMEM;
285 if (devm_add_action(dev, nd_blk_release_disk, disk)) {
286 put_disk(disk);
261 return -ENOMEM; 287 return -ENOMEM;
262 } 288 }
263 289
264 disk->driverfs_dev = &ndns->dev; 290 disk->driverfs_dev = dev;
265 disk->first_minor = 0; 291 disk->first_minor = 0;
266 disk->fops = &nd_blk_fops; 292 disk->fops = &nd_blk_fops;
267 disk->private_data = blk_dev; 293 disk->queue = q;
268 disk->queue = blk_dev->queue;
269 disk->flags = GENHD_FL_EXT_DEVT; 294 disk->flags = GENHD_FL_EXT_DEVT;
270 nvdimm_namespace_disk_name(ndns, disk->disk_name); 295 nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name);
271 set_capacity(disk, 0); 296 set_capacity(disk, 0);
272 add_disk(disk); 297 add_disk(disk);
273 298
274 if (nd_blk_meta_size(blk_dev)) { 299 if (nsblk_meta_size(nsblk)) {
275 int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev)); 300 int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk));
276 301
277 if (rc) { 302 if (rc)
278 del_gendisk(disk);
279 put_disk(disk);
280 blk_cleanup_queue(blk_dev->queue);
281 return rc; 303 return rc;
282 }
283 } 304 }
284 305
285 set_capacity(disk, available_disk_size >> SECTOR_SHIFT); 306 set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
@@ -291,56 +312,29 @@ static int nd_blk_probe(struct device *dev)
291{ 312{
292 struct nd_namespace_common *ndns; 313 struct nd_namespace_common *ndns;
293 struct nd_namespace_blk *nsblk; 314 struct nd_namespace_blk *nsblk;
294 struct nd_blk_device *blk_dev;
295 int rc;
296 315
297 ndns = nvdimm_namespace_common_probe(dev); 316 ndns = nvdimm_namespace_common_probe(dev);
298 if (IS_ERR(ndns)) 317 if (IS_ERR(ndns))
299 return PTR_ERR(ndns); 318 return PTR_ERR(ndns);
300 319
301 blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL);
302 if (!blk_dev)
303 return -ENOMEM;
304
305 nsblk = to_nd_namespace_blk(&ndns->dev); 320 nsblk = to_nd_namespace_blk(&ndns->dev);
306 blk_dev->disk_size = nvdimm_namespace_capacity(ndns); 321 nsblk->size = nvdimm_namespace_capacity(ndns);
307 blk_dev->ndbr = to_nd_blk_region(dev->parent); 322 dev_set_drvdata(dev, nsblk);
308 blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev); 323
309 blk_dev->internal_lbasize = roundup(nsblk->lbasize, 324 ndns->rw_bytes = nsblk_rw_bytes;
310 INT_LBASIZE_ALIGNMENT);
311 blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512);
312 dev_set_drvdata(dev, blk_dev);
313
314 ndns->rw_bytes = nd_blk_rw_bytes;
315 if (is_nd_btt(dev)) 325 if (is_nd_btt(dev))
316 rc = nvdimm_namespace_attach_btt(ndns); 326 return nvdimm_namespace_attach_btt(ndns);
317 else if (nd_btt_probe(ndns, blk_dev) == 0) { 327 else if (nd_btt_probe(dev, ndns) == 0) {
318 /* we'll come back as btt-blk */ 328 /* we'll come back as btt-blk */
319 rc = -ENXIO; 329 return -ENXIO;
320 } else 330 } else
321 rc = nd_blk_attach_disk(ndns, blk_dev); 331 return nsblk_attach_disk(nsblk);
322 if (rc)
323 kfree(blk_dev);
324 return rc;
325}
326
327static void nd_blk_detach_disk(struct nd_blk_device *blk_dev)
328{
329 del_gendisk(blk_dev->disk);
330 put_disk(blk_dev->disk);
331 blk_cleanup_queue(blk_dev->queue);
332} 332}
333 333
334static int nd_blk_remove(struct device *dev) 334static int nd_blk_remove(struct device *dev)
335{ 335{
336 struct nd_blk_device *blk_dev = dev_get_drvdata(dev);
337
338 if (is_nd_btt(dev)) 336 if (is_nd_btt(dev))
339 nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); 337 nvdimm_namespace_detach_btt(to_nd_btt(dev));
340 else
341 nd_blk_detach_disk(blk_dev);
342 kfree(blk_dev);
343
344 return 0; 338 return 0;
345} 339}
346 340
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index f068b6513cd2..cc9fafed9362 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1306,7 +1306,7 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
1306 struct btt *btt; 1306 struct btt *btt;
1307 struct device *dev = &nd_btt->dev; 1307 struct device *dev = &nd_btt->dev;
1308 1308
1309 btt = kzalloc(sizeof(struct btt), GFP_KERNEL); 1309 btt = devm_kzalloc(dev, sizeof(struct btt), GFP_KERNEL);
1310 if (!btt) 1310 if (!btt)
1311 return NULL; 1311 return NULL;
1312 1312
@@ -1321,13 +1321,13 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
1321 ret = discover_arenas(btt); 1321 ret = discover_arenas(btt);
1322 if (ret) { 1322 if (ret) {
1323 dev_err(dev, "init: error in arena_discover: %d\n", ret); 1323 dev_err(dev, "init: error in arena_discover: %d\n", ret);
1324 goto out_free; 1324 return NULL;
1325 } 1325 }
1326 1326
1327 if (btt->init_state != INIT_READY && nd_region->ro) { 1327 if (btt->init_state != INIT_READY && nd_region->ro) {
1328 dev_info(dev, "%s is read-only, unable to init btt metadata\n", 1328 dev_info(dev, "%s is read-only, unable to init btt metadata\n",
1329 dev_name(&nd_region->dev)); 1329 dev_name(&nd_region->dev));
1330 goto out_free; 1330 return NULL;
1331 } else if (btt->init_state != INIT_READY) { 1331 } else if (btt->init_state != INIT_READY) {
1332 btt->num_arenas = (rawsize / ARENA_MAX_SIZE) + 1332 btt->num_arenas = (rawsize / ARENA_MAX_SIZE) +
1333 ((rawsize % ARENA_MAX_SIZE) ? 1 : 0); 1333 ((rawsize % ARENA_MAX_SIZE) ? 1 : 0);
@@ -1337,29 +1337,25 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
1337 ret = create_arenas(btt); 1337 ret = create_arenas(btt);
1338 if (ret) { 1338 if (ret) {
1339 dev_info(dev, "init: create_arenas: %d\n", ret); 1339 dev_info(dev, "init: create_arenas: %d\n", ret);
1340 goto out_free; 1340 return NULL;
1341 } 1341 }
1342 1342
1343 ret = btt_meta_init(btt); 1343 ret = btt_meta_init(btt);
1344 if (ret) { 1344 if (ret) {
1345 dev_err(dev, "init: error in meta_init: %d\n", ret); 1345 dev_err(dev, "init: error in meta_init: %d\n", ret);
1346 goto out_free; 1346 return NULL;
1347 } 1347 }
1348 } 1348 }
1349 1349
1350 ret = btt_blk_init(btt); 1350 ret = btt_blk_init(btt);
1351 if (ret) { 1351 if (ret) {
1352 dev_err(dev, "init: error in blk_init: %d\n", ret); 1352 dev_err(dev, "init: error in blk_init: %d\n", ret);
1353 goto out_free; 1353 return NULL;
1354 } 1354 }
1355 1355
1356 btt_debugfs_init(btt); 1356 btt_debugfs_init(btt);
1357 1357
1358 return btt; 1358 return btt;
1359
1360 out_free:
1361 kfree(btt);
1362 return NULL;
1363} 1359}
1364 1360
1365/** 1361/**
@@ -1377,7 +1373,6 @@ static void btt_fini(struct btt *btt)
1377 btt_blk_cleanup(btt); 1373 btt_blk_cleanup(btt);
1378 free_arenas(btt); 1374 free_arenas(btt);
1379 debugfs_remove_recursive(btt->debugfs_dir); 1375 debugfs_remove_recursive(btt->debugfs_dir);
1380 kfree(btt);
1381 } 1376 }
1382} 1377}
1383 1378
@@ -1406,9 +1401,8 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
1406} 1401}
1407EXPORT_SYMBOL(nvdimm_namespace_attach_btt); 1402EXPORT_SYMBOL(nvdimm_namespace_attach_btt);
1408 1403
1409int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns) 1404int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt)
1410{ 1405{
1411 struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
1412 struct btt *btt = nd_btt->btt; 1406 struct btt *btt = nd_btt->btt;
1413 1407
1414 btt_fini(btt); 1408 btt_fini(btt);
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index cb477518dd0e..816d0dae6398 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -273,10 +273,10 @@ static int __nd_btt_probe(struct nd_btt *nd_btt,
273 return 0; 273 return 0;
274} 274}
275 275
276int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) 276int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns)
277{ 277{
278 int rc; 278 int rc;
279 struct device *dev; 279 struct device *btt_dev;
280 struct btt_sb *btt_sb; 280 struct btt_sb *btt_sb;
281 struct nd_region *nd_region = to_nd_region(ndns->dev.parent); 281 struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
282 282
@@ -284,21 +284,19 @@ int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
284 return -ENODEV; 284 return -ENODEV;
285 285
286 nvdimm_bus_lock(&ndns->dev); 286 nvdimm_bus_lock(&ndns->dev);
287 dev = __nd_btt_create(nd_region, 0, NULL, ndns); 287 btt_dev = __nd_btt_create(nd_region, 0, NULL, ndns);
288 nvdimm_bus_unlock(&ndns->dev); 288 nvdimm_bus_unlock(&ndns->dev);
289 if (!dev) 289 if (!btt_dev)
290 return -ENOMEM; 290 return -ENOMEM;
291 dev_set_drvdata(dev, drvdata); 291 btt_sb = devm_kzalloc(dev, sizeof(*btt_sb), GFP_KERNEL);
292 btt_sb = kzalloc(sizeof(*btt_sb), GFP_KERNEL); 292 rc = __nd_btt_probe(to_nd_btt(btt_dev), ndns, btt_sb);
293 rc = __nd_btt_probe(to_nd_btt(dev), ndns, btt_sb); 293 dev_dbg(dev, "%s: btt: %s\n", __func__,
294 kfree(btt_sb); 294 rc == 0 ? dev_name(btt_dev) : "<none>");
295 dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__,
296 rc == 0 ? dev_name(dev) : "<none>");
297 if (rc < 0) { 295 if (rc < 0) {
298 struct nd_btt *nd_btt = to_nd_btt(dev); 296 struct nd_btt *nd_btt = to_nd_btt(btt_dev);
299 297
300 __nd_detach_ndns(dev, &nd_btt->ndns); 298 __nd_detach_ndns(btt_dev, &nd_btt->ndns);
301 put_device(dev); 299 put_device(btt_dev);
302 } 300 }
303 301
304 return rc; 302 return rc;
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 19f822d7f652..97589e3cb852 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -40,6 +40,8 @@ static int to_nd_device_type(struct device *dev)
40 return ND_DEVICE_REGION_PMEM; 40 return ND_DEVICE_REGION_PMEM;
41 else if (is_nd_blk(dev)) 41 else if (is_nd_blk(dev))
42 return ND_DEVICE_REGION_BLK; 42 return ND_DEVICE_REGION_BLK;
43 else if (is_nd_dax(dev))
44 return ND_DEVICE_DAX_PMEM;
43 else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent)) 45 else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent))
44 return nd_region_to_nstype(to_nd_region(dev->parent)); 46 return nd_region_to_nstype(to_nd_region(dev->parent));
45 47
@@ -246,6 +248,8 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie)
246 248
247void __nd_device_register(struct device *dev) 249void __nd_device_register(struct device *dev)
248{ 250{
251 if (!dev)
252 return;
249 dev->bus = &nvdimm_bus_type; 253 dev->bus = &nvdimm_bus_type;
250 get_device(dev); 254 get_device(dev);
251 async_schedule_domain(nd_async_device_register, dev, 255 async_schedule_domain(nd_async_device_register, dev,
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index e8f03b0e95e4..5f53db59a058 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -12,6 +12,7 @@
12 */ 12 */
13#include <linux/device.h> 13#include <linux/device.h>
14#include <linux/sizes.h> 14#include <linux/sizes.h>
15#include <linux/pmem.h>
15#include "nd-core.h" 16#include "nd-core.h"
16#include "pfn.h" 17#include "pfn.h"
17#include "btt.h" 18#include "btt.h"
@@ -84,6 +85,8 @@ static bool is_idle(struct device *dev, struct nd_namespace_common *ndns)
84 seed = nd_region->btt_seed; 85 seed = nd_region->btt_seed;
85 else if (is_nd_pfn(dev)) 86 else if (is_nd_pfn(dev))
86 seed = nd_region->pfn_seed; 87 seed = nd_region->pfn_seed;
88 else if (is_nd_dax(dev))
89 seed = nd_region->dax_seed;
87 90
88 if (seed == dev || ndns || dev->driver) 91 if (seed == dev || ndns || dev->driver)
89 return false; 92 return false;
@@ -199,3 +202,63 @@ u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb)
199 return sum; 202 return sum;
200} 203}
201EXPORT_SYMBOL(nd_sb_checksum); 204EXPORT_SYMBOL(nd_sb_checksum);
205
206static int nsio_rw_bytes(struct nd_namespace_common *ndns,
207 resource_size_t offset, void *buf, size_t size, int rw)
208{
209 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
210
211 if (unlikely(offset + size > nsio->size)) {
212 dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
213 return -EFAULT;
214 }
215
216 if (rw == READ) {
217 unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
218
219 if (unlikely(is_bad_pmem(&nsio->bb, offset / 512, sz_align)))
220 return -EIO;
221 return memcpy_from_pmem(buf, nsio->addr + offset, size);
222 } else {
223 memcpy_to_pmem(nsio->addr + offset, buf, size);
224 wmb_pmem();
225 }
226
227 return 0;
228}
229
230int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio)
231{
232 struct resource *res = &nsio->res;
233 struct nd_namespace_common *ndns = &nsio->common;
234
235 nsio->size = resource_size(res);
236 if (!devm_request_mem_region(dev, res->start, resource_size(res),
237 dev_name(dev))) {
238 dev_warn(dev, "could not reserve region %pR\n", res);
239 return -EBUSY;
240 }
241
242 ndns->rw_bytes = nsio_rw_bytes;
243 if (devm_init_badblocks(dev, &nsio->bb))
244 return -ENOMEM;
245 nvdimm_badblocks_populate(to_nd_region(ndns->dev.parent), &nsio->bb,
246 &nsio->res);
247
248 nsio->addr = devm_memremap(dev, res->start, resource_size(res),
249 ARCH_MEMREMAP_PMEM);
250 if (IS_ERR(nsio->addr))
251 return PTR_ERR(nsio->addr);
252 return 0;
253}
254EXPORT_SYMBOL_GPL(devm_nsio_enable);
255
256void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio)
257{
258 struct resource *res = &nsio->res;
259
260 devm_memunmap(dev, nsio->addr);
261 devm_exit_badblocks(dev, &nsio->bb);
262 devm_release_mem_region(dev, res->start, resource_size(res));
263}
264EXPORT_SYMBOL_GPL(devm_nsio_disable);
diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c
new file mode 100644
index 000000000000..f90f7549e7f4
--- /dev/null
+++ b/drivers/nvdimm/dax_devs.c
@@ -0,0 +1,99 @@
1/*
2 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/device.h>
14#include <linux/sizes.h>
15#include <linux/slab.h>
16#include <linux/mm.h>
17#include "nd-core.h"
18#include "nd.h"
19
20static void nd_dax_release(struct device *dev)
21{
22 struct nd_region *nd_region = to_nd_region(dev->parent);
23 struct nd_dax *nd_dax = to_nd_dax(dev);
24 struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
25
26 dev_dbg(dev, "%s\n", __func__);
27 nd_detach_ndns(dev, &nd_pfn->ndns);
28 ida_simple_remove(&nd_region->dax_ida, nd_pfn->id);
29 kfree(nd_pfn->uuid);
30 kfree(nd_dax);
31}
32
33static struct device_type nd_dax_device_type = {
34 .name = "nd_dax",
35 .release = nd_dax_release,
36};
37
38bool is_nd_dax(struct device *dev)
39{
40 return dev ? dev->type == &nd_dax_device_type : false;
41}
42EXPORT_SYMBOL(is_nd_dax);
43
44struct nd_dax *to_nd_dax(struct device *dev)
45{
46 struct nd_dax *nd_dax = container_of(dev, struct nd_dax, nd_pfn.dev);
47
48 WARN_ON(!is_nd_dax(dev));
49 return nd_dax;
50}
51EXPORT_SYMBOL(to_nd_dax);
52
53static const struct attribute_group *nd_dax_attribute_groups[] = {
54 &nd_pfn_attribute_group,
55 &nd_device_attribute_group,
56 &nd_numa_attribute_group,
57 NULL,
58};
59
60static struct nd_dax *nd_dax_alloc(struct nd_region *nd_region)
61{
62 struct nd_pfn *nd_pfn;
63 struct nd_dax *nd_dax;
64 struct device *dev;
65
66 nd_dax = kzalloc(sizeof(*nd_dax), GFP_KERNEL);
67 if (!nd_dax)
68 return NULL;
69
70 nd_pfn = &nd_dax->nd_pfn;
71 nd_pfn->id = ida_simple_get(&nd_region->dax_ida, 0, 0, GFP_KERNEL);
72 if (nd_pfn->id < 0) {
73 kfree(nd_dax);
74 return NULL;
75 }
76
77 dev = &nd_pfn->dev;
78 dev_set_name(dev, "dax%d.%d", nd_region->id, nd_pfn->id);
79 dev->groups = nd_dax_attribute_groups;
80 dev->type = &nd_dax_device_type;
81 dev->parent = &nd_region->dev;
82
83 return nd_dax;
84}
85
86struct device *nd_dax_create(struct nd_region *nd_region)
87{
88 struct device *dev = NULL;
89 struct nd_dax *nd_dax;
90
91 if (!is_nd_pmem(&nd_region->dev))
92 return NULL;
93
94 nd_dax = nd_dax_alloc(nd_region);
95 if (nd_dax)
96 dev = nd_pfn_devinit(&nd_dax->nd_pfn, NULL);
97 __nd_device_register(dev);
98 return dev;
99}
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index f5cb88601359..c5e3196c45b0 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1288,6 +1288,8 @@ static ssize_t mode_show(struct device *dev,
1288 mode = "safe"; 1288 mode = "safe";
1289 else if (claim && is_nd_pfn(claim)) 1289 else if (claim && is_nd_pfn(claim))
1290 mode = "memory"; 1290 mode = "memory";
1291 else if (claim && is_nd_dax(claim))
1292 mode = "dax";
1291 else if (!claim && pmem_should_map_pages(dev)) 1293 else if (!claim && pmem_should_map_pages(dev))
1292 mode = "memory"; 1294 mode = "memory";
1293 else 1295 else
@@ -1379,21 +1381,19 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
1379{ 1381{
1380 struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL; 1382 struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL;
1381 struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL; 1383 struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL;
1382 struct nd_namespace_common *ndns; 1384 struct nd_dax *nd_dax = is_nd_dax(dev) ? to_nd_dax(dev) : NULL;
1385 struct nd_namespace_common *ndns = NULL;
1383 resource_size_t size; 1386 resource_size_t size;
1384 1387
1385 if (nd_btt || nd_pfn) { 1388 if (nd_btt || nd_pfn || nd_dax) {
1386 struct device *host = NULL; 1389 if (nd_btt)
1387
1388 if (nd_btt) {
1389 host = &nd_btt->dev;
1390 ndns = nd_btt->ndns; 1390 ndns = nd_btt->ndns;
1391 } else if (nd_pfn) { 1391 else if (nd_pfn)
1392 host = &nd_pfn->dev;
1393 ndns = nd_pfn->ndns; 1392 ndns = nd_pfn->ndns;
1394 } 1393 else if (nd_dax)
1394 ndns = nd_dax->nd_pfn.ndns;
1395 1395
1396 if (!ndns || !host) 1396 if (!ndns)
1397 return ERR_PTR(-ENODEV); 1397 return ERR_PTR(-ENODEV);
1398 1398
1399 /* 1399 /*
@@ -1404,12 +1404,12 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
1404 device_unlock(&ndns->dev); 1404 device_unlock(&ndns->dev);
1405 if (ndns->dev.driver) { 1405 if (ndns->dev.driver) {
1406 dev_dbg(&ndns->dev, "is active, can't bind %s\n", 1406 dev_dbg(&ndns->dev, "is active, can't bind %s\n",
1407 dev_name(host)); 1407 dev_name(dev));
1408 return ERR_PTR(-EBUSY); 1408 return ERR_PTR(-EBUSY);
1409 } 1409 }
1410 if (dev_WARN_ONCE(&ndns->dev, ndns->claim != host, 1410 if (dev_WARN_ONCE(&ndns->dev, ndns->claim != dev,
1411 "host (%s) vs claim (%s) mismatch\n", 1411 "host (%s) vs claim (%s) mismatch\n",
1412 dev_name(host), 1412 dev_name(dev),
1413 dev_name(ndns->claim))) 1413 dev_name(ndns->claim)))
1414 return ERR_PTR(-ENXIO); 1414 return ERR_PTR(-ENXIO);
1415 } else { 1415 } else {
@@ -1784,6 +1784,18 @@ void nd_region_create_blk_seed(struct nd_region *nd_region)
1784 nd_device_register(nd_region->ns_seed); 1784 nd_device_register(nd_region->ns_seed);
1785} 1785}
1786 1786
1787void nd_region_create_dax_seed(struct nd_region *nd_region)
1788{
1789 WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
1790 nd_region->dax_seed = nd_dax_create(nd_region);
1791 /*
1792 * Seed creation failures are not fatal, provisioning is simply
1793 * disabled until memory becomes available
1794 */
1795 if (!nd_region->dax_seed)
1796 dev_err(&nd_region->dev, "failed to create dax namespace\n");
1797}
1798
1787void nd_region_create_pfn_seed(struct nd_region *nd_region) 1799void nd_region_create_pfn_seed(struct nd_region *nd_region)
1788{ 1800{
1789 WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); 1801 WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 1d1500f3d8b5..cb65308c0329 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -54,6 +54,7 @@ struct nd_region;
54void nd_region_create_blk_seed(struct nd_region *nd_region); 54void nd_region_create_blk_seed(struct nd_region *nd_region);
55void nd_region_create_btt_seed(struct nd_region *nd_region); 55void nd_region_create_btt_seed(struct nd_region *nd_region);
56void nd_region_create_pfn_seed(struct nd_region *nd_region); 56void nd_region_create_pfn_seed(struct nd_region *nd_region);
57void nd_region_create_dax_seed(struct nd_region *nd_region);
57void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev); 58void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev);
58int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus); 59int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus);
59void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus); 60void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 875c524fafb0..46910b8f32b1 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -13,6 +13,7 @@
13#ifndef __ND_H__ 13#ifndef __ND_H__
14#define __ND_H__ 14#define __ND_H__
15#include <linux/libnvdimm.h> 15#include <linux/libnvdimm.h>
16#include <linux/badblocks.h>
16#include <linux/blkdev.h> 17#include <linux/blkdev.h>
17#include <linux/device.h> 18#include <linux/device.h>
18#include <linux/mutex.h> 19#include <linux/mutex.h>
@@ -100,10 +101,12 @@ struct nd_region {
100 struct ida ns_ida; 101 struct ida ns_ida;
101 struct ida btt_ida; 102 struct ida btt_ida;
102 struct ida pfn_ida; 103 struct ida pfn_ida;
104 struct ida dax_ida;
103 unsigned long flags; 105 unsigned long flags;
104 struct device *ns_seed; 106 struct device *ns_seed;
105 struct device *btt_seed; 107 struct device *btt_seed;
106 struct device *pfn_seed; 108 struct device *pfn_seed;
109 struct device *dax_seed;
107 u16 ndr_mappings; 110 u16 ndr_mappings;
108 u64 ndr_size; 111 u64 ndr_size;
109 u64 ndr_start; 112 u64 ndr_start;
@@ -160,6 +163,10 @@ struct nd_pfn {
160 struct nd_namespace_common *ndns; 163 struct nd_namespace_common *ndns;
161}; 164};
162 165
166struct nd_dax {
167 struct nd_pfn nd_pfn;
168};
169
163enum nd_async_mode { 170enum nd_async_mode {
164 ND_SYNC, 171 ND_SYNC,
165 ND_ASYNC, 172 ND_ASYNC,
@@ -197,11 +204,12 @@ struct nd_gen_sb {
197 204
198u64 nd_sb_checksum(struct nd_gen_sb *sb); 205u64 nd_sb_checksum(struct nd_gen_sb *sb);
199#if IS_ENABLED(CONFIG_BTT) 206#if IS_ENABLED(CONFIG_BTT)
200int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata); 207int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns);
201bool is_nd_btt(struct device *dev); 208bool is_nd_btt(struct device *dev);
202struct device *nd_btt_create(struct nd_region *nd_region); 209struct device *nd_btt_create(struct nd_region *nd_region);
203#else 210#else
204static inline int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) 211static inline int nd_btt_probe(struct device *dev,
212 struct nd_namespace_common *ndns)
205{ 213{
206 return -ENODEV; 214 return -ENODEV;
207} 215}
@@ -219,12 +227,16 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region)
219 227
220struct nd_pfn *to_nd_pfn(struct device *dev); 228struct nd_pfn *to_nd_pfn(struct device *dev);
221#if IS_ENABLED(CONFIG_NVDIMM_PFN) 229#if IS_ENABLED(CONFIG_NVDIMM_PFN)
222int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata); 230int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns);
223bool is_nd_pfn(struct device *dev); 231bool is_nd_pfn(struct device *dev);
224struct device *nd_pfn_create(struct nd_region *nd_region); 232struct device *nd_pfn_create(struct nd_region *nd_region);
233struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
234 struct nd_namespace_common *ndns);
225int nd_pfn_validate(struct nd_pfn *nd_pfn); 235int nd_pfn_validate(struct nd_pfn *nd_pfn);
236extern struct attribute_group nd_pfn_attribute_group;
226#else 237#else
227static inline int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata) 238static inline int nd_pfn_probe(struct device *dev,
239 struct nd_namespace_common *ndns)
228{ 240{
229 return -ENODEV; 241 return -ENODEV;
230} 242}
@@ -245,6 +257,22 @@ static inline int nd_pfn_validate(struct nd_pfn *nd_pfn)
245} 257}
246#endif 258#endif
247 259
260struct nd_dax *to_nd_dax(struct device *dev);
261#if IS_ENABLED(CONFIG_NVDIMM_DAX)
262bool is_nd_dax(struct device *dev);
263struct device *nd_dax_create(struct nd_region *nd_region);
264#else
265static inline bool is_nd_dax(struct device *dev)
266{
267 return false;
268}
269
270static inline struct device *nd_dax_create(struct nd_region *nd_region)
271{
272 return NULL;
273}
274#endif
275
248struct nd_region *to_nd_region(struct device *dev); 276struct nd_region *to_nd_region(struct device *dev);
249int nd_region_to_nstype(struct nd_region *nd_region); 277int nd_region_to_nstype(struct nd_region *nd_region);
250int nd_region_register_namespaces(struct nd_region *nd_region, int *err); 278int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
@@ -263,11 +291,32 @@ struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
263resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns); 291resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
264struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev); 292struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev);
265int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns); 293int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns);
266int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns); 294int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt);
267const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, 295const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
268 char *name); 296 char *name);
269void nvdimm_badblocks_populate(struct nd_region *nd_region, 297void nvdimm_badblocks_populate(struct nd_region *nd_region,
270 struct badblocks *bb, const struct resource *res); 298 struct badblocks *bb, const struct resource *res);
299#if IS_ENABLED(CONFIG_ND_CLAIM)
300struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
301 struct resource *res, struct vmem_altmap *altmap);
302int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio);
303void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio);
304#else
305static inline struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
306 struct resource *res, struct vmem_altmap *altmap)
307{
308 return ERR_PTR(-ENXIO);
309}
310static inline int devm_nsio_enable(struct device *dev,
311 struct nd_namespace_io *nsio)
312{
313 return -ENXIO;
314}
315static inline void devm_nsio_disable(struct device *dev,
316 struct nd_namespace_io *nsio)
317{
318}
319#endif
271int nd_blk_region_init(struct nd_region *nd_region); 320int nd_blk_region_init(struct nd_region *nd_region);
272void __nd_iostat_start(struct bio *bio, unsigned long *start); 321void __nd_iostat_start(struct bio *bio, unsigned long *start);
273static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) 322static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
@@ -281,6 +330,19 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
281 return true; 330 return true;
282} 331}
283void nd_iostat_end(struct bio *bio, unsigned long start); 332void nd_iostat_end(struct bio *bio, unsigned long start);
333static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
334 unsigned int len)
335{
336 if (bb->count) {
337 sector_t first_bad;
338 int num_bad;
339
340 return !!badblocks_check(bb, sector, len / 512, &first_bad,
341 &num_bad);
342 }
343
344 return false;
345}
284resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk); 346resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk);
285const u8 *nd_dev_to_uuid(struct device *dev); 347const u8 *nd_dev_to_uuid(struct device *dev);
286bool pmem_should_map_pages(struct device *dev); 348bool pmem_should_map_pages(struct device *dev);
diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
index 8e343a3ca873..9d2704c83fa7 100644
--- a/drivers/nvdimm/pfn.h
+++ b/drivers/nvdimm/pfn.h
@@ -33,7 +33,9 @@ struct nd_pfn_sb {
33 /* minor-version-1 additions for section alignment */ 33 /* minor-version-1 additions for section alignment */
34 __le32 start_pad; 34 __le32 start_pad;
35 __le32 end_trunc; 35 __le32 end_trunc;
36 u8 padding[4004]; 36 /* minor-version-2 record the base alignment of the mapping */
37 __le32 align;
38 u8 padding[4000];
37 __le64 checksum; 39 __le64 checksum;
38}; 40};
39 41
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index e071e214feba..2248056d29e7 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. 2 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as 5 * it under the terms of version 2 of the GNU General Public License as
@@ -10,6 +10,7 @@
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details. 11 * General Public License for more details.
12 */ 12 */
13#include <linux/memremap.h>
13#include <linux/blkdev.h> 14#include <linux/blkdev.h>
14#include <linux/device.h> 15#include <linux/device.h>
15#include <linux/genhd.h> 16#include <linux/genhd.h>
@@ -53,10 +54,29 @@ struct nd_pfn *to_nd_pfn(struct device *dev)
53} 54}
54EXPORT_SYMBOL(to_nd_pfn); 55EXPORT_SYMBOL(to_nd_pfn);
55 56
57static struct nd_pfn *to_nd_pfn_safe(struct device *dev)
58{
59 /*
60 * pfn device attributes are re-used by dax device instances, so we
61 * need to be careful to correct device-to-nd_pfn conversion.
62 */
63 if (is_nd_pfn(dev))
64 return to_nd_pfn(dev);
65
66 if (is_nd_dax(dev)) {
67 struct nd_dax *nd_dax = to_nd_dax(dev);
68
69 return &nd_dax->nd_pfn;
70 }
71
72 WARN_ON(1);
73 return NULL;
74}
75
56static ssize_t mode_show(struct device *dev, 76static ssize_t mode_show(struct device *dev,
57 struct device_attribute *attr, char *buf) 77 struct device_attribute *attr, char *buf)
58{ 78{
59 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 79 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
60 80
61 switch (nd_pfn->mode) { 81 switch (nd_pfn->mode) {
62 case PFN_MODE_RAM: 82 case PFN_MODE_RAM:
@@ -71,7 +91,7 @@ static ssize_t mode_show(struct device *dev,
71static ssize_t mode_store(struct device *dev, 91static ssize_t mode_store(struct device *dev,
72 struct device_attribute *attr, const char *buf, size_t len) 92 struct device_attribute *attr, const char *buf, size_t len)
73{ 93{
74 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 94 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
75 ssize_t rc = 0; 95 ssize_t rc = 0;
76 96
77 device_lock(dev); 97 device_lock(dev);
@@ -105,7 +125,7 @@ static DEVICE_ATTR_RW(mode);
105static ssize_t align_show(struct device *dev, 125static ssize_t align_show(struct device *dev,
106 struct device_attribute *attr, char *buf) 126 struct device_attribute *attr, char *buf)
107{ 127{
108 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 128 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
109 129
110 return sprintf(buf, "%lx\n", nd_pfn->align); 130 return sprintf(buf, "%lx\n", nd_pfn->align);
111} 131}
@@ -133,7 +153,7 @@ static ssize_t __align_store(struct nd_pfn *nd_pfn, const char *buf)
133static ssize_t align_store(struct device *dev, 153static ssize_t align_store(struct device *dev,
134 struct device_attribute *attr, const char *buf, size_t len) 154 struct device_attribute *attr, const char *buf, size_t len)
135{ 155{
136 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 156 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
137 ssize_t rc; 157 ssize_t rc;
138 158
139 device_lock(dev); 159 device_lock(dev);
@@ -151,7 +171,7 @@ static DEVICE_ATTR_RW(align);
151static ssize_t uuid_show(struct device *dev, 171static ssize_t uuid_show(struct device *dev,
152 struct device_attribute *attr, char *buf) 172 struct device_attribute *attr, char *buf)
153{ 173{
154 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 174 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
155 175
156 if (nd_pfn->uuid) 176 if (nd_pfn->uuid)
157 return sprintf(buf, "%pUb\n", nd_pfn->uuid); 177 return sprintf(buf, "%pUb\n", nd_pfn->uuid);
@@ -161,7 +181,7 @@ static ssize_t uuid_show(struct device *dev,
161static ssize_t uuid_store(struct device *dev, 181static ssize_t uuid_store(struct device *dev,
162 struct device_attribute *attr, const char *buf, size_t len) 182 struct device_attribute *attr, const char *buf, size_t len)
163{ 183{
164 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 184 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
165 ssize_t rc; 185 ssize_t rc;
166 186
167 device_lock(dev); 187 device_lock(dev);
@@ -177,7 +197,7 @@ static DEVICE_ATTR_RW(uuid);
177static ssize_t namespace_show(struct device *dev, 197static ssize_t namespace_show(struct device *dev,
178 struct device_attribute *attr, char *buf) 198 struct device_attribute *attr, char *buf)
179{ 199{
180 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 200 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
181 ssize_t rc; 201 ssize_t rc;
182 202
183 nvdimm_bus_lock(dev); 203 nvdimm_bus_lock(dev);
@@ -190,7 +210,7 @@ static ssize_t namespace_show(struct device *dev,
190static ssize_t namespace_store(struct device *dev, 210static ssize_t namespace_store(struct device *dev,
191 struct device_attribute *attr, const char *buf, size_t len) 211 struct device_attribute *attr, const char *buf, size_t len)
192{ 212{
193 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 213 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
194 ssize_t rc; 214 ssize_t rc;
195 215
196 device_lock(dev); 216 device_lock(dev);
@@ -208,7 +228,7 @@ static DEVICE_ATTR_RW(namespace);
208static ssize_t resource_show(struct device *dev, 228static ssize_t resource_show(struct device *dev,
209 struct device_attribute *attr, char *buf) 229 struct device_attribute *attr, char *buf)
210{ 230{
211 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 231 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
212 ssize_t rc; 232 ssize_t rc;
213 233
214 device_lock(dev); 234 device_lock(dev);
@@ -234,7 +254,7 @@ static DEVICE_ATTR_RO(resource);
234static ssize_t size_show(struct device *dev, 254static ssize_t size_show(struct device *dev,
235 struct device_attribute *attr, char *buf) 255 struct device_attribute *attr, char *buf)
236{ 256{
237 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 257 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
238 ssize_t rc; 258 ssize_t rc;
239 259
240 device_lock(dev); 260 device_lock(dev);
@@ -269,7 +289,7 @@ static struct attribute *nd_pfn_attributes[] = {
269 NULL, 289 NULL,
270}; 290};
271 291
272static struct attribute_group nd_pfn_attribute_group = { 292struct attribute_group nd_pfn_attribute_group = {
273 .attrs = nd_pfn_attributes, 293 .attrs = nd_pfn_attributes,
274}; 294};
275 295
@@ -280,16 +300,32 @@ static const struct attribute_group *nd_pfn_attribute_groups[] = {
280 NULL, 300 NULL,
281}; 301};
282 302
283static struct device *__nd_pfn_create(struct nd_region *nd_region, 303struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
284 struct nd_namespace_common *ndns) 304 struct nd_namespace_common *ndns)
285{ 305{
286 struct nd_pfn *nd_pfn; 306 struct device *dev = &nd_pfn->dev;
287 struct device *dev;
288 307
289 /* we can only create pages for contiguous ranged of pmem */ 308 if (!nd_pfn)
290 if (!is_nd_pmem(&nd_region->dev))
291 return NULL; 309 return NULL;
292 310
311 nd_pfn->mode = PFN_MODE_NONE;
312 nd_pfn->align = HPAGE_SIZE;
313 dev = &nd_pfn->dev;
314 device_initialize(&nd_pfn->dev);
315 if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
316 dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
317 __func__, dev_name(ndns->claim));
318 put_device(dev);
319 return NULL;
320 }
321 return dev;
322}
323
324static struct nd_pfn *nd_pfn_alloc(struct nd_region *nd_region)
325{
326 struct nd_pfn *nd_pfn;
327 struct device *dev;
328
293 nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); 329 nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL);
294 if (!nd_pfn) 330 if (!nd_pfn)
295 return NULL; 331 return NULL;
@@ -300,29 +336,27 @@ static struct device *__nd_pfn_create(struct nd_region *nd_region,
300 return NULL; 336 return NULL;
301 } 337 }
302 338
303 nd_pfn->mode = PFN_MODE_NONE;
304 nd_pfn->align = HPAGE_SIZE;
305 dev = &nd_pfn->dev; 339 dev = &nd_pfn->dev;
306 dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); 340 dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id);
307 dev->parent = &nd_region->dev;
308 dev->type = &nd_pfn_device_type;
309 dev->groups = nd_pfn_attribute_groups; 341 dev->groups = nd_pfn_attribute_groups;
310 device_initialize(&nd_pfn->dev); 342 dev->type = &nd_pfn_device_type;
311 if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { 343 dev->parent = &nd_region->dev;
312 dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", 344
313 __func__, dev_name(ndns->claim)); 345 return nd_pfn;
314 put_device(dev);
315 return NULL;
316 }
317 return dev;
318} 346}
319 347
320struct device *nd_pfn_create(struct nd_region *nd_region) 348struct device *nd_pfn_create(struct nd_region *nd_region)
321{ 349{
322 struct device *dev = __nd_pfn_create(nd_region, NULL); 350 struct nd_pfn *nd_pfn;
351 struct device *dev;
352
353 if (!is_nd_pmem(&nd_region->dev))
354 return NULL;
323 355
324 if (dev) 356 nd_pfn = nd_pfn_alloc(nd_region);
325 __nd_device_register(dev); 357 dev = nd_pfn_devinit(nd_pfn, NULL);
358
359 __nd_device_register(dev);
326 return dev; 360 return dev;
327} 361}
328 362
@@ -360,6 +394,9 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
360 pfn_sb->end_trunc = 0; 394 pfn_sb->end_trunc = 0;
361 } 395 }
362 396
397 if (__le16_to_cpu(pfn_sb->version_minor) < 2)
398 pfn_sb->align = 0;
399
363 switch (le32_to_cpu(pfn_sb->mode)) { 400 switch (le32_to_cpu(pfn_sb->mode)) {
364 case PFN_MODE_RAM: 401 case PFN_MODE_RAM:
365 case PFN_MODE_PMEM: 402 case PFN_MODE_PMEM:
@@ -399,7 +436,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
399 return -EBUSY; 436 return -EBUSY;
400 } 437 }
401 438
402 nd_pfn->align = 1UL << ilog2(offset); 439 nd_pfn->align = le32_to_cpu(pfn_sb->align);
403 if (!is_power_of_2(offset) || offset < PAGE_SIZE) { 440 if (!is_power_of_2(offset) || offset < PAGE_SIZE) {
404 dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n", 441 dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n",
405 offset); 442 offset);
@@ -410,11 +447,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
410} 447}
411EXPORT_SYMBOL(nd_pfn_validate); 448EXPORT_SYMBOL(nd_pfn_validate);
412 449
413int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata) 450int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
414{ 451{
415 int rc; 452 int rc;
416 struct device *dev;
417 struct nd_pfn *nd_pfn; 453 struct nd_pfn *nd_pfn;
454 struct device *pfn_dev;
418 struct nd_pfn_sb *pfn_sb; 455 struct nd_pfn_sb *pfn_sb;
419 struct nd_region *nd_region = to_nd_region(ndns->dev.parent); 456 struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
420 457
@@ -422,25 +459,213 @@ int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata)
422 return -ENODEV; 459 return -ENODEV;
423 460
424 nvdimm_bus_lock(&ndns->dev); 461 nvdimm_bus_lock(&ndns->dev);
425 dev = __nd_pfn_create(nd_region, ndns); 462 nd_pfn = nd_pfn_alloc(nd_region);
463 pfn_dev = nd_pfn_devinit(nd_pfn, ndns);
426 nvdimm_bus_unlock(&ndns->dev); 464 nvdimm_bus_unlock(&ndns->dev);
427 if (!dev) 465 if (!pfn_dev)
428 return -ENOMEM; 466 return -ENOMEM;
429 dev_set_drvdata(dev, drvdata); 467 pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
430 pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL); 468 nd_pfn = to_nd_pfn(pfn_dev);
431 nd_pfn = to_nd_pfn(dev);
432 nd_pfn->pfn_sb = pfn_sb; 469 nd_pfn->pfn_sb = pfn_sb;
433 rc = nd_pfn_validate(nd_pfn); 470 rc = nd_pfn_validate(nd_pfn);
434 nd_pfn->pfn_sb = NULL; 471 dev_dbg(dev, "%s: pfn: %s\n", __func__,
435 kfree(pfn_sb); 472 rc == 0 ? dev_name(pfn_dev) : "<none>");
436 dev_dbg(&ndns->dev, "%s: pfn: %s\n", __func__,
437 rc == 0 ? dev_name(dev) : "<none>");
438 if (rc < 0) { 473 if (rc < 0) {
439 __nd_detach_ndns(dev, &nd_pfn->ndns); 474 __nd_detach_ndns(pfn_dev, &nd_pfn->ndns);
440 put_device(dev); 475 put_device(pfn_dev);
441 } else 476 } else
442 __nd_device_register(&nd_pfn->dev); 477 __nd_device_register(pfn_dev);
443 478
444 return rc; 479 return rc;
445} 480}
446EXPORT_SYMBOL(nd_pfn_probe); 481EXPORT_SYMBOL(nd_pfn_probe);
482
483/*
484 * We hotplug memory at section granularity, pad the reserved area from
485 * the previous section base to the namespace base address.
486 */
487static unsigned long init_altmap_base(resource_size_t base)
488{
489 unsigned long base_pfn = PHYS_PFN(base);
490
491 return PFN_SECTION_ALIGN_DOWN(base_pfn);
492}
493
494static unsigned long init_altmap_reserve(resource_size_t base)
495{
496 unsigned long reserve = PHYS_PFN(SZ_8K);
497 unsigned long base_pfn = PHYS_PFN(base);
498
499 reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn);
500 return reserve;
501}
502
503static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
504 struct resource *res, struct vmem_altmap *altmap)
505{
506 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
507 u64 offset = le64_to_cpu(pfn_sb->dataoff);
508 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
509 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
510 struct nd_namespace_common *ndns = nd_pfn->ndns;
511 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
512 resource_size_t base = nsio->res.start + start_pad;
513 struct vmem_altmap __altmap = {
514 .base_pfn = init_altmap_base(base),
515 .reserve = init_altmap_reserve(base),
516 };
517
518 memcpy(res, &nsio->res, sizeof(*res));
519 res->start += start_pad;
520 res->end -= end_trunc;
521
522 nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
523 if (nd_pfn->mode == PFN_MODE_RAM) {
524 if (offset < SZ_8K)
525 return ERR_PTR(-EINVAL);
526 nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
527 altmap = NULL;
528 } else if (nd_pfn->mode == PFN_MODE_PMEM) {
529 nd_pfn->npfns = (resource_size(res) - offset) / PAGE_SIZE;
530 if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
531 dev_info(&nd_pfn->dev,
532 "number of pfns truncated from %lld to %ld\n",
533 le64_to_cpu(nd_pfn->pfn_sb->npfns),
534 nd_pfn->npfns);
535 memcpy(altmap, &__altmap, sizeof(*altmap));
536 altmap->free = PHYS_PFN(offset - SZ_8K);
537 altmap->alloc = 0;
538 } else
539 return ERR_PTR(-ENXIO);
540
541 return altmap;
542}
543
544static int nd_pfn_init(struct nd_pfn *nd_pfn)
545{
546 u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
547 struct nd_namespace_common *ndns = nd_pfn->ndns;
548 u32 start_pad = 0, end_trunc = 0;
549 resource_size_t start, size;
550 struct nd_namespace_io *nsio;
551 struct nd_region *nd_region;
552 struct nd_pfn_sb *pfn_sb;
553 unsigned long npfns;
554 phys_addr_t offset;
555 u64 checksum;
556 int rc;
557
558 pfn_sb = devm_kzalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL);
559 if (!pfn_sb)
560 return -ENOMEM;
561
562 nd_pfn->pfn_sb = pfn_sb;
563 rc = nd_pfn_validate(nd_pfn);
564 if (rc != -ENODEV)
565 return rc;
566
567 /* no info block, do init */;
568 nd_region = to_nd_region(nd_pfn->dev.parent);
569 if (nd_region->ro) {
570 dev_info(&nd_pfn->dev,
571 "%s is read-only, unable to init metadata\n",
572 dev_name(&nd_region->dev));
573 return -ENXIO;
574 }
575
576 memset(pfn_sb, 0, sizeof(*pfn_sb));
577
578 /*
579 * Check if pmem collides with 'System RAM' when section aligned and
580 * trim it accordingly
581 */
582 nsio = to_nd_namespace_io(&ndns->dev);
583 start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start);
584 size = resource_size(&nsio->res);
585 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
586 IORES_DESC_NONE) == REGION_MIXED) {
587 start = nsio->res.start;
588 start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
589 }
590
591 start = nsio->res.start;
592 size = PHYS_SECTION_ALIGN_UP(start + size) - start;
593 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
594 IORES_DESC_NONE) == REGION_MIXED) {
595 size = resource_size(&nsio->res);
596 end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
597 }
598
599 if (start_pad + end_trunc)
600 dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
601 dev_name(&ndns->dev), start_pad + end_trunc);
602
603 /*
604 * Note, we use 64 here for the standard size of struct page,
605 * debugging options may cause it to be larger in which case the
606 * implementation will limit the pfns advertised through
607 * ->direct_access() to those that are included in the memmap.
608 */
609 start += start_pad;
610 size = resource_size(&nsio->res);
611 npfns = (size - start_pad - end_trunc - SZ_8K) / SZ_4K;
612 if (nd_pfn->mode == PFN_MODE_PMEM) {
613 unsigned long memmap_size;
614
615 /*
616 * vmemmap_populate_hugepages() allocates the memmap array in
617 * HPAGE_SIZE chunks.
618 */
619 memmap_size = ALIGN(64 * npfns, HPAGE_SIZE);
620 offset = ALIGN(start + SZ_8K + memmap_size + dax_label_reserve,
621 nd_pfn->align) - start;
622 } else if (nd_pfn->mode == PFN_MODE_RAM)
623 offset = ALIGN(start + SZ_8K + dax_label_reserve,
624 nd_pfn->align) - start;
625 else
626 return -ENXIO;
627
628 if (offset + start_pad + end_trunc >= size) {
629 dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n",
630 dev_name(&ndns->dev));
631 return -ENXIO;
632 }
633
634 npfns = (size - offset - start_pad - end_trunc) / SZ_4K;
635 pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
636 pfn_sb->dataoff = cpu_to_le64(offset);
637 pfn_sb->npfns = cpu_to_le64(npfns);
638 memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN);
639 memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
640 memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
641 pfn_sb->version_major = cpu_to_le16(1);
642 pfn_sb->version_minor = cpu_to_le16(2);
643 pfn_sb->start_pad = cpu_to_le32(start_pad);
644 pfn_sb->end_trunc = cpu_to_le32(end_trunc);
645 pfn_sb->align = cpu_to_le32(nd_pfn->align);
646 checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
647 pfn_sb->checksum = cpu_to_le64(checksum);
648
649 return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
650}
651
652/*
653 * Determine the effective resource range and vmem_altmap from an nd_pfn
654 * instance.
655 */
656struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
657 struct resource *res, struct vmem_altmap *altmap)
658{
659 int rc;
660
661 if (!nd_pfn->uuid || !nd_pfn->ndns)
662 return ERR_PTR(-ENODEV);
663
664 rc = nd_pfn_init(nd_pfn);
665 if (rc)
666 return ERR_PTR(rc);
667
668 /* we need a valid pfn_sb before we can init a vmem_altmap */
669 return __nvdimm_setup_pfn(nd_pfn, res, altmap);
670}
671EXPORT_SYMBOL_GPL(nvdimm_setup_pfn);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 92f536596b24..d9a0dbc2d023 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -33,10 +33,6 @@
33#include "nd.h" 33#include "nd.h"
34 34
35struct pmem_device { 35struct pmem_device {
36 struct request_queue *pmem_queue;
37 struct gendisk *pmem_disk;
38 struct nd_namespace_common *ndns;
39
40 /* One contiguous memory region per device */ 36 /* One contiguous memory region per device */
41 phys_addr_t phys_addr; 37 phys_addr_t phys_addr;
42 /* when non-zero this device is hosting a 'pfn' instance */ 38 /* when non-zero this device is hosting a 'pfn' instance */
@@ -50,23 +46,10 @@ struct pmem_device {
50 struct badblocks bb; 46 struct badblocks bb;
51}; 47};
52 48
53static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len)
54{
55 if (bb->count) {
56 sector_t first_bad;
57 int num_bad;
58
59 return !!badblocks_check(bb, sector, len / 512, &first_bad,
60 &num_bad);
61 }
62
63 return false;
64}
65
66static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, 49static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
67 unsigned int len) 50 unsigned int len)
68{ 51{
69 struct device *dev = disk_to_dev(pmem->pmem_disk); 52 struct device *dev = pmem->bb.dev;
70 sector_t sector; 53 sector_t sector;
71 long cleared; 54 long cleared;
72 55
@@ -136,8 +119,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
136 unsigned long start; 119 unsigned long start;
137 struct bio_vec bvec; 120 struct bio_vec bvec;
138 struct bvec_iter iter; 121 struct bvec_iter iter;
139 struct block_device *bdev = bio->bi_bdev; 122 struct pmem_device *pmem = q->queuedata;
140 struct pmem_device *pmem = bdev->bd_disk->private_data;
141 123
142 do_acct = nd_iostat_start(bio, &start); 124 do_acct = nd_iostat_start(bio, &start);
143 bio_for_each_segment(bvec, bio, iter) { 125 bio_for_each_segment(bvec, bio, iter) {
@@ -162,7 +144,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
162static int pmem_rw_page(struct block_device *bdev, sector_t sector, 144static int pmem_rw_page(struct block_device *bdev, sector_t sector,
163 struct page *page, int rw) 145 struct page *page, int rw)
164{ 146{
165 struct pmem_device *pmem = bdev->bd_disk->private_data; 147 struct pmem_device *pmem = bdev->bd_queue->queuedata;
166 int rc; 148 int rc;
167 149
168 rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector); 150 rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector);
@@ -184,7 +166,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
184static long pmem_direct_access(struct block_device *bdev, sector_t sector, 166static long pmem_direct_access(struct block_device *bdev, sector_t sector,
185 void __pmem **kaddr, pfn_t *pfn) 167 void __pmem **kaddr, pfn_t *pfn)
186{ 168{
187 struct pmem_device *pmem = bdev->bd_disk->private_data; 169 struct pmem_device *pmem = bdev->bd_queue->queuedata;
188 resource_size_t offset = sector * 512 + pmem->data_offset; 170 resource_size_t offset = sector * 512 + pmem->data_offset;
189 171
190 *kaddr = pmem->virt_addr + offset; 172 *kaddr = pmem->virt_addr + offset;
@@ -200,104 +182,119 @@ static const struct block_device_operations pmem_fops = {
200 .revalidate_disk = nvdimm_revalidate_disk, 182 .revalidate_disk = nvdimm_revalidate_disk,
201}; 183};
202 184
203static struct pmem_device *pmem_alloc(struct device *dev, 185static void pmem_release_queue(void *q)
204 struct resource *res, int id) 186{
187 blk_cleanup_queue(q);
188}
189
190void pmem_release_disk(void *disk)
205{ 191{
192 del_gendisk(disk);
193 put_disk(disk);
194}
195
196static int pmem_attach_disk(struct device *dev,
197 struct nd_namespace_common *ndns)
198{
199 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
200 struct vmem_altmap __altmap, *altmap = NULL;
201 struct resource *res = &nsio->res;
202 struct nd_pfn *nd_pfn = NULL;
203 int nid = dev_to_node(dev);
204 struct nd_pfn_sb *pfn_sb;
206 struct pmem_device *pmem; 205 struct pmem_device *pmem;
206 struct resource pfn_res;
207 struct request_queue *q; 207 struct request_queue *q;
208 struct gendisk *disk;
209 void *addr;
210
211 /* while nsio_rw_bytes is active, parse a pfn info block if present */
212 if (is_nd_pfn(dev)) {
213 nd_pfn = to_nd_pfn(dev);
214 altmap = nvdimm_setup_pfn(nd_pfn, &pfn_res, &__altmap);
215 if (IS_ERR(altmap))
216 return PTR_ERR(altmap);
217 }
218
219 /* we're attaching a block device, disable raw namespace access */
220 devm_nsio_disable(dev, nsio);
208 221
209 pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); 222 pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
210 if (!pmem) 223 if (!pmem)
211 return ERR_PTR(-ENOMEM); 224 return -ENOMEM;
212 225
226 dev_set_drvdata(dev, pmem);
213 pmem->phys_addr = res->start; 227 pmem->phys_addr = res->start;
214 pmem->size = resource_size(res); 228 pmem->size = resource_size(res);
215 if (!arch_has_wmb_pmem()) 229 if (!arch_has_wmb_pmem())
216 dev_warn(dev, "unable to guarantee persistence of writes\n"); 230 dev_warn(dev, "unable to guarantee persistence of writes\n");
217 231
218 if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size, 232 if (!devm_request_mem_region(dev, res->start, resource_size(res),
219 dev_name(dev))) { 233 dev_name(dev))) {
220 dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", 234 dev_warn(dev, "could not reserve region %pR\n", res);
221 &pmem->phys_addr, pmem->size); 235 return -EBUSY;
222 return ERR_PTR(-EBUSY);
223 } 236 }
224 237
225 q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); 238 q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
226 if (!q) 239 if (!q)
227 return ERR_PTR(-ENOMEM); 240 return -ENOMEM;
228 241
229 pmem->pfn_flags = PFN_DEV; 242 pmem->pfn_flags = PFN_DEV;
230 if (pmem_should_map_pages(dev)) { 243 if (is_nd_pfn(dev)) {
231 pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res, 244 addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter,
245 altmap);
246 pfn_sb = nd_pfn->pfn_sb;
247 pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
248 pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res);
249 pmem->pfn_flags |= PFN_MAP;
250 res = &pfn_res; /* for badblocks populate */
251 res->start += pmem->data_offset;
252 } else if (pmem_should_map_pages(dev)) {
253 addr = devm_memremap_pages(dev, &nsio->res,
232 &q->q_usage_counter, NULL); 254 &q->q_usage_counter, NULL);
233 pmem->pfn_flags |= PFN_MAP; 255 pmem->pfn_flags |= PFN_MAP;
234 } else 256 } else
235 pmem->virt_addr = (void __pmem *) devm_memremap(dev, 257 addr = devm_memremap(dev, pmem->phys_addr,
236 pmem->phys_addr, pmem->size, 258 pmem->size, ARCH_MEMREMAP_PMEM);
237 ARCH_MEMREMAP_PMEM);
238 259
239 if (IS_ERR(pmem->virt_addr)) { 260 /*
261 * At release time the queue must be dead before
262 * devm_memremap_pages is unwound
263 */
264 if (devm_add_action(dev, pmem_release_queue, q)) {
240 blk_cleanup_queue(q); 265 blk_cleanup_queue(q);
241 return (void __force *) pmem->virt_addr; 266 return -ENOMEM;
242 } 267 }
243 268
244 pmem->pmem_queue = q; 269 if (IS_ERR(addr))
245 return pmem; 270 return PTR_ERR(addr);
246} 271 pmem->virt_addr = (void __pmem *) addr;
247
248static void pmem_detach_disk(struct pmem_device *pmem)
249{
250 if (!pmem->pmem_disk)
251 return;
252
253 del_gendisk(pmem->pmem_disk);
254 put_disk(pmem->pmem_disk);
255 blk_cleanup_queue(pmem->pmem_queue);
256}
257 272
258static int pmem_attach_disk(struct device *dev, 273 blk_queue_make_request(q, pmem_make_request);
259 struct nd_namespace_common *ndns, struct pmem_device *pmem) 274 blk_queue_physical_block_size(q, PAGE_SIZE);
260{ 275 blk_queue_max_hw_sectors(q, UINT_MAX);
261 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 276 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
262 int nid = dev_to_node(dev); 277 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
263 struct resource bb_res; 278 q->queuedata = pmem;
264 struct gendisk *disk;
265
266 blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
267 blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE);
268 blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
269 blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
270 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
271 279
272 disk = alloc_disk_node(0, nid); 280 disk = alloc_disk_node(0, nid);
273 if (!disk) { 281 if (!disk)
274 blk_cleanup_queue(pmem->pmem_queue); 282 return -ENOMEM;
283 if (devm_add_action(dev, pmem_release_disk, disk)) {
284 put_disk(disk);
275 return -ENOMEM; 285 return -ENOMEM;
276 } 286 }
277 287
278 disk->fops = &pmem_fops; 288 disk->fops = &pmem_fops;
279 disk->private_data = pmem; 289 disk->queue = q;
280 disk->queue = pmem->pmem_queue;
281 disk->flags = GENHD_FL_EXT_DEVT; 290 disk->flags = GENHD_FL_EXT_DEVT;
282 nvdimm_namespace_disk_name(ndns, disk->disk_name); 291 nvdimm_namespace_disk_name(ndns, disk->disk_name);
283 disk->driverfs_dev = dev; 292 disk->driverfs_dev = dev;
284 set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) 293 set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
285 / 512); 294 / 512);
286 pmem->pmem_disk = disk;
287 devm_exit_badblocks(dev, &pmem->bb);
288 if (devm_init_badblocks(dev, &pmem->bb)) 295 if (devm_init_badblocks(dev, &pmem->bb))
289 return -ENOMEM; 296 return -ENOMEM;
290 bb_res.start = nsio->res.start + pmem->data_offset; 297 nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res);
291 bb_res.end = nsio->res.end;
292 if (is_nd_pfn(dev)) {
293 struct nd_pfn *nd_pfn = to_nd_pfn(dev);
294 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
295
296 bb_res.start += __le32_to_cpu(pfn_sb->start_pad);
297 bb_res.end -= __le32_to_cpu(pfn_sb->end_trunc);
298 }
299 nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb,
300 &bb_res);
301 disk->bb = &pmem->bb; 298 disk->bb = &pmem->bb;
302 add_disk(disk); 299 add_disk(disk);
303 revalidate_disk(disk); 300 revalidate_disk(disk);
@@ -305,346 +302,67 @@ static int pmem_attach_disk(struct device *dev,
305 return 0; 302 return 0;
306} 303}
307 304
308static int pmem_rw_bytes(struct nd_namespace_common *ndns,
309 resource_size_t offset, void *buf, size_t size, int rw)
310{
311 struct pmem_device *pmem = dev_get_drvdata(ndns->claim);
312
313 if (unlikely(offset + size > pmem->size)) {
314 dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
315 return -EFAULT;
316 }
317
318 if (rw == READ) {
319 unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
320
321 if (unlikely(is_bad_pmem(&pmem->bb, offset / 512, sz_align)))
322 return -EIO;
323 return memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
324 } else {
325 memcpy_to_pmem(pmem->virt_addr + offset, buf, size);
326 wmb_pmem();
327 }
328
329 return 0;
330}
331
332static int nd_pfn_init(struct nd_pfn *nd_pfn)
333{
334 struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL);
335 struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev);
336 struct nd_namespace_common *ndns = nd_pfn->ndns;
337 u32 start_pad = 0, end_trunc = 0;
338 resource_size_t start, size;
339 struct nd_namespace_io *nsio;
340 struct nd_region *nd_region;
341 unsigned long npfns;
342 phys_addr_t offset;
343 u64 checksum;
344 int rc;
345
346 if (!pfn_sb)
347 return -ENOMEM;
348
349 nd_pfn->pfn_sb = pfn_sb;
350 rc = nd_pfn_validate(nd_pfn);
351 if (rc == -ENODEV)
352 /* no info block, do init */;
353 else
354 return rc;
355
356 nd_region = to_nd_region(nd_pfn->dev.parent);
357 if (nd_region->ro) {
358 dev_info(&nd_pfn->dev,
359 "%s is read-only, unable to init metadata\n",
360 dev_name(&nd_region->dev));
361 goto err;
362 }
363
364 memset(pfn_sb, 0, sizeof(*pfn_sb));
365
366 /*
367 * Check if pmem collides with 'System RAM' when section aligned and
368 * trim it accordingly
369 */
370 nsio = to_nd_namespace_io(&ndns->dev);
371 start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start);
372 size = resource_size(&nsio->res);
373 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
374 IORES_DESC_NONE) == REGION_MIXED) {
375
376 start = nsio->res.start;
377 start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
378 }
379
380 start = nsio->res.start;
381 size = PHYS_SECTION_ALIGN_UP(start + size) - start;
382 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
383 IORES_DESC_NONE) == REGION_MIXED) {
384 size = resource_size(&nsio->res);
385 end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
386 }
387
388 if (start_pad + end_trunc)
389 dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
390 dev_name(&ndns->dev), start_pad + end_trunc);
391
392 /*
393 * Note, we use 64 here for the standard size of struct page,
394 * debugging options may cause it to be larger in which case the
395 * implementation will limit the pfns advertised through
396 * ->direct_access() to those that are included in the memmap.
397 */
398 start += start_pad;
399 npfns = (pmem->size - start_pad - end_trunc - SZ_8K) / SZ_4K;
400 if (nd_pfn->mode == PFN_MODE_PMEM) {
401 unsigned long memmap_size;
402
403 /*
404 * vmemmap_populate_hugepages() allocates the memmap array in
405 * PMD_SIZE chunks.
406 */
407 memmap_size = ALIGN(64 * npfns, PMD_SIZE);
408 offset = ALIGN(start + SZ_8K + memmap_size, nd_pfn->align)
409 - start;
410 } else if (nd_pfn->mode == PFN_MODE_RAM)
411 offset = ALIGN(start + SZ_8K, nd_pfn->align) - start;
412 else
413 goto err;
414
415 if (offset + start_pad + end_trunc >= pmem->size) {
416 dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n",
417 dev_name(&ndns->dev));
418 goto err;
419 }
420
421 npfns = (pmem->size - offset - start_pad - end_trunc) / SZ_4K;
422 pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
423 pfn_sb->dataoff = cpu_to_le64(offset);
424 pfn_sb->npfns = cpu_to_le64(npfns);
425 memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN);
426 memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
427 memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
428 pfn_sb->version_major = cpu_to_le16(1);
429 pfn_sb->version_minor = cpu_to_le16(1);
430 pfn_sb->start_pad = cpu_to_le32(start_pad);
431 pfn_sb->end_trunc = cpu_to_le32(end_trunc);
432 checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
433 pfn_sb->checksum = cpu_to_le64(checksum);
434
435 rc = nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
436 if (rc)
437 goto err;
438
439 return 0;
440 err:
441 nd_pfn->pfn_sb = NULL;
442 kfree(pfn_sb);
443 return -ENXIO;
444}
445
446static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns)
447{
448 struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
449 struct pmem_device *pmem;
450
451 /* free pmem disk */
452 pmem = dev_get_drvdata(&nd_pfn->dev);
453 pmem_detach_disk(pmem);
454
455 /* release nd_pfn resources */
456 kfree(nd_pfn->pfn_sb);
457 nd_pfn->pfn_sb = NULL;
458
459 return 0;
460}
461
462/*
463 * We hotplug memory at section granularity, pad the reserved area from
464 * the previous section base to the namespace base address.
465 */
466static unsigned long init_altmap_base(resource_size_t base)
467{
468 unsigned long base_pfn = PHYS_PFN(base);
469
470 return PFN_SECTION_ALIGN_DOWN(base_pfn);
471}
472
473static unsigned long init_altmap_reserve(resource_size_t base)
474{
475 unsigned long reserve = PHYS_PFN(SZ_8K);
476 unsigned long base_pfn = PHYS_PFN(base);
477
478 reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn);
479 return reserve;
480}
481
482static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn)
483{
484 int rc;
485 struct resource res;
486 struct request_queue *q;
487 struct pmem_device *pmem;
488 struct vmem_altmap *altmap;
489 struct device *dev = &nd_pfn->dev;
490 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
491 struct nd_namespace_common *ndns = nd_pfn->ndns;
492 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
493 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
494 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
495 resource_size_t base = nsio->res.start + start_pad;
496 struct vmem_altmap __altmap = {
497 .base_pfn = init_altmap_base(base),
498 .reserve = init_altmap_reserve(base),
499 };
500
501 pmem = dev_get_drvdata(dev);
502 pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
503 pmem->pfn_pad = start_pad + end_trunc;
504 nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
505 if (nd_pfn->mode == PFN_MODE_RAM) {
506 if (pmem->data_offset < SZ_8K)
507 return -EINVAL;
508 nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
509 altmap = NULL;
510 } else if (nd_pfn->mode == PFN_MODE_PMEM) {
511 nd_pfn->npfns = (pmem->size - pmem->pfn_pad - pmem->data_offset)
512 / PAGE_SIZE;
513 if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
514 dev_info(&nd_pfn->dev,
515 "number of pfns truncated from %lld to %ld\n",
516 le64_to_cpu(nd_pfn->pfn_sb->npfns),
517 nd_pfn->npfns);
518 altmap = & __altmap;
519 altmap->free = PHYS_PFN(pmem->data_offset - SZ_8K);
520 altmap->alloc = 0;
521 } else {
522 rc = -ENXIO;
523 goto err;
524 }
525
526 /* establish pfn range for lookup, and switch to direct map */
527 q = pmem->pmem_queue;
528 memcpy(&res, &nsio->res, sizeof(res));
529 res.start += start_pad;
530 res.end -= end_trunc;
531 devm_memunmap(dev, (void __force *) pmem->virt_addr);
532 pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &res,
533 &q->q_usage_counter, altmap);
534 pmem->pfn_flags |= PFN_MAP;
535 if (IS_ERR(pmem->virt_addr)) {
536 rc = PTR_ERR(pmem->virt_addr);
537 goto err;
538 }
539
540 /* attach pmem disk in "pfn-mode" */
541 rc = pmem_attach_disk(dev, ndns, pmem);
542 if (rc)
543 goto err;
544
545 return rc;
546 err:
547 nvdimm_namespace_detach_pfn(ndns);
548 return rc;
549
550}
551
552static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
553{
554 struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
555 int rc;
556
557 if (!nd_pfn->uuid || !nd_pfn->ndns)
558 return -ENODEV;
559
560 rc = nd_pfn_init(nd_pfn);
561 if (rc)
562 return rc;
563 /* we need a valid pfn_sb before we can init a vmem_altmap */
564 return __nvdimm_namespace_attach_pfn(nd_pfn);
565}
566
567static int nd_pmem_probe(struct device *dev) 305static int nd_pmem_probe(struct device *dev)
568{ 306{
569 struct nd_region *nd_region = to_nd_region(dev->parent);
570 struct nd_namespace_common *ndns; 307 struct nd_namespace_common *ndns;
571 struct nd_namespace_io *nsio;
572 struct pmem_device *pmem;
573 308
574 ndns = nvdimm_namespace_common_probe(dev); 309 ndns = nvdimm_namespace_common_probe(dev);
575 if (IS_ERR(ndns)) 310 if (IS_ERR(ndns))
576 return PTR_ERR(ndns); 311 return PTR_ERR(ndns);
577 312
578 nsio = to_nd_namespace_io(&ndns->dev); 313 if (devm_nsio_enable(dev, to_nd_namespace_io(&ndns->dev)))
579 pmem = pmem_alloc(dev, &nsio->res, nd_region->id); 314 return -ENXIO;
580 if (IS_ERR(pmem))
581 return PTR_ERR(pmem);
582
583 pmem->ndns = ndns;
584 dev_set_drvdata(dev, pmem);
585 ndns->rw_bytes = pmem_rw_bytes;
586 if (devm_init_badblocks(dev, &pmem->bb))
587 return -ENOMEM;
588 nvdimm_badblocks_populate(nd_region, &pmem->bb, &nsio->res);
589 315
590 if (is_nd_btt(dev)) { 316 if (is_nd_btt(dev))
591 /* btt allocates its own request_queue */
592 blk_cleanup_queue(pmem->pmem_queue);
593 pmem->pmem_queue = NULL;
594 return nvdimm_namespace_attach_btt(ndns); 317 return nvdimm_namespace_attach_btt(ndns);
595 }
596 318
597 if (is_nd_pfn(dev)) 319 if (is_nd_pfn(dev))
598 return nvdimm_namespace_attach_pfn(ndns); 320 return pmem_attach_disk(dev, ndns);
599 321
600 if (nd_btt_probe(ndns, pmem) == 0 || nd_pfn_probe(ndns, pmem) == 0) { 322 /* if we find a valid info-block we'll come back as that personality */
601 /* 323 if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0)
602 * We'll come back as either btt-pmem, or pfn-pmem, so
603 * drop the queue allocation for now.
604 */
605 blk_cleanup_queue(pmem->pmem_queue);
606 return -ENXIO; 324 return -ENXIO;
607 }
608 325
609 return pmem_attach_disk(dev, ndns, pmem); 326 /* ...otherwise we're just a raw pmem device */
327 return pmem_attach_disk(dev, ndns);
610} 328}
611 329
612static int nd_pmem_remove(struct device *dev) 330static int nd_pmem_remove(struct device *dev)
613{ 331{
614 struct pmem_device *pmem = dev_get_drvdata(dev);
615
616 if (is_nd_btt(dev)) 332 if (is_nd_btt(dev))
617 nvdimm_namespace_detach_btt(pmem->ndns); 333 nvdimm_namespace_detach_btt(to_nd_btt(dev));
618 else if (is_nd_pfn(dev))
619 nvdimm_namespace_detach_pfn(pmem->ndns);
620 else
621 pmem_detach_disk(pmem);
622
623 return 0; 334 return 0;
624} 335}
625 336
626static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) 337static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
627{ 338{
628 struct pmem_device *pmem = dev_get_drvdata(dev);
629 struct nd_namespace_common *ndns = pmem->ndns;
630 struct nd_region *nd_region = to_nd_region(dev->parent); 339 struct nd_region *nd_region = to_nd_region(dev->parent);
631 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 340 struct pmem_device *pmem = dev_get_drvdata(dev);
632 struct resource res = { 341 resource_size_t offset = 0, end_trunc = 0;
633 .start = nsio->res.start + pmem->data_offset, 342 struct nd_namespace_common *ndns;
634 .end = nsio->res.end, 343 struct nd_namespace_io *nsio;
635 }; 344 struct resource res;
636 345
637 if (event != NVDIMM_REVALIDATE_POISON) 346 if (event != NVDIMM_REVALIDATE_POISON)
638 return; 347 return;
639 348
640 if (is_nd_pfn(dev)) { 349 if (is_nd_btt(dev)) {
350 struct nd_btt *nd_btt = to_nd_btt(dev);
351
352 ndns = nd_btt->ndns;
353 } else if (is_nd_pfn(dev)) {
641 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 354 struct nd_pfn *nd_pfn = to_nd_pfn(dev);
642 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 355 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
643 356
644 res.start += __le32_to_cpu(pfn_sb->start_pad); 357 ndns = nd_pfn->ndns;
645 res.end -= __le32_to_cpu(pfn_sb->end_trunc); 358 offset = pmem->data_offset + __le32_to_cpu(pfn_sb->start_pad);
646 } 359 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
360 } else
361 ndns = to_ndns(dev);
647 362
363 nsio = to_nd_namespace_io(&ndns->dev);
364 res.start = nsio->res.start + offset;
365 res.end = nsio->res.end - end_trunc;
648 nvdimm_badblocks_populate(nd_region, &pmem->bb, &res); 366 nvdimm_badblocks_populate(nd_region, &pmem->bb, &res);
649} 367}
650 368
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 4b7715e29cff..05a912359939 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -54,6 +54,7 @@ static int nd_region_probe(struct device *dev)
54 54
55 nd_region->btt_seed = nd_btt_create(nd_region); 55 nd_region->btt_seed = nd_btt_create(nd_region);
56 nd_region->pfn_seed = nd_pfn_create(nd_region); 56 nd_region->pfn_seed = nd_pfn_create(nd_region);
57 nd_region->dax_seed = nd_dax_create(nd_region);
57 if (err == 0) 58 if (err == 0)
58 return 0; 59 return 0;
59 60
@@ -86,6 +87,7 @@ static int nd_region_remove(struct device *dev)
86 nd_region->ns_seed = NULL; 87 nd_region->ns_seed = NULL;
87 nd_region->btt_seed = NULL; 88 nd_region->btt_seed = NULL;
88 nd_region->pfn_seed = NULL; 89 nd_region->pfn_seed = NULL;
90 nd_region->dax_seed = NULL;
89 dev_set_drvdata(dev, NULL); 91 dev_set_drvdata(dev, NULL);
90 nvdimm_bus_unlock(dev); 92 nvdimm_bus_unlock(dev);
91 93
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 139bf71ca549..9e1b054e0e61 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -306,6 +306,23 @@ static ssize_t pfn_seed_show(struct device *dev,
306} 306}
307static DEVICE_ATTR_RO(pfn_seed); 307static DEVICE_ATTR_RO(pfn_seed);
308 308
309static ssize_t dax_seed_show(struct device *dev,
310 struct device_attribute *attr, char *buf)
311{
312 struct nd_region *nd_region = to_nd_region(dev);
313 ssize_t rc;
314
315 nvdimm_bus_lock(dev);
316 if (nd_region->dax_seed)
317 rc = sprintf(buf, "%s\n", dev_name(nd_region->dax_seed));
318 else
319 rc = sprintf(buf, "\n");
320 nvdimm_bus_unlock(dev);
321
322 return rc;
323}
324static DEVICE_ATTR_RO(dax_seed);
325
309static ssize_t read_only_show(struct device *dev, 326static ssize_t read_only_show(struct device *dev,
310 struct device_attribute *attr, char *buf) 327 struct device_attribute *attr, char *buf)
311{ 328{
@@ -335,6 +352,7 @@ static struct attribute *nd_region_attributes[] = {
335 &dev_attr_mappings.attr, 352 &dev_attr_mappings.attr,
336 &dev_attr_btt_seed.attr, 353 &dev_attr_btt_seed.attr,
337 &dev_attr_pfn_seed.attr, 354 &dev_attr_pfn_seed.attr,
355 &dev_attr_dax_seed.attr,
338 &dev_attr_read_only.attr, 356 &dev_attr_read_only.attr,
339 &dev_attr_set_cookie.attr, 357 &dev_attr_set_cookie.attr,
340 &dev_attr_available_size.attr, 358 &dev_attr_available_size.attr,
@@ -353,6 +371,9 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
353 if (!is_nd_pmem(dev) && a == &dev_attr_pfn_seed.attr) 371 if (!is_nd_pmem(dev) && a == &dev_attr_pfn_seed.attr)
354 return 0; 372 return 0;
355 373
374 if (!is_nd_pmem(dev) && a == &dev_attr_dax_seed.attr)
375 return 0;
376
356 if (a != &dev_attr_set_cookie.attr 377 if (a != &dev_attr_set_cookie.attr
357 && a != &dev_attr_available_size.attr) 378 && a != &dev_attr_available_size.attr)
358 return a->mode; 379 return a->mode;
@@ -441,6 +462,13 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
441 nd_region_create_pfn_seed(nd_region); 462 nd_region_create_pfn_seed(nd_region);
442 nvdimm_bus_unlock(dev); 463 nvdimm_bus_unlock(dev);
443 } 464 }
465 if (is_nd_dax(dev) && probe) {
466 nd_region = to_nd_region(dev->parent);
467 nvdimm_bus_lock(dev);
468 if (nd_region->dax_seed == dev)
469 nd_region_create_dax_seed(nd_region);
470 nvdimm_bus_unlock(dev);
471 }
444} 472}
445 473
446void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev) 474void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev)
@@ -718,6 +746,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
718 ida_init(&nd_region->ns_ida); 746 ida_init(&nd_region->ns_ida);
719 ida_init(&nd_region->btt_ida); 747 ida_init(&nd_region->btt_ida);
720 ida_init(&nd_region->pfn_ida); 748 ida_init(&nd_region->pfn_ida);
749 ida_init(&nd_region->dax_ida);
721 dev = &nd_region->dev; 750 dev = &nd_region->dev;
722 dev_set_name(dev, "region%d", nd_region->id); 751 dev_set_name(dev, "region%d", nd_region->id);
723 dev->parent = &nvdimm_bus->dev; 752 dev->parent = &nvdimm_bus->dev;