aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2016-05-21 15:33:04 -0400
committerDan Williams <dan.j.williams@intel.com>2016-05-21 15:33:04 -0400
commit36092ee8ba695fce023b2118ececa6c2a56b1331 (patch)
treeb9579893cdd559e7b72fa569003b19792de58fad
parent1b982baf75e7d9585967fcfccd05b77bf9054010 (diff)
parent03dca343afe080968d90c4d9196404b5bbbc8461 (diff)
Merge branch 'for-4.7/dax' into libnvdimm-for-next
-rw-r--r--block/ioctl.c32
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/dax/Kconfig26
-rw-r--r--drivers/dax/Makefile4
-rw-r--r--drivers/dax/dax.c575
-rw-r--r--drivers/dax/dax.h24
-rw-r--r--drivers/dax/pmem.c158
-rw-r--r--drivers/nvdimm/bus.c9
-rw-r--r--drivers/nvdimm/claim.c23
-rw-r--r--drivers/nvdimm/core.c3
-rw-r--r--drivers/nvdimm/dax_devs.c35
-rw-r--r--drivers/nvdimm/dimm_devs.c5
-rw-r--r--drivers/nvdimm/nd-core.h3
-rw-r--r--drivers/nvdimm/nd.h11
-rw-r--r--drivers/nvdimm/pfn.h1
-rw-r--r--drivers/nvdimm/pfn_devs.c40
-rw-r--r--drivers/nvdimm/pmem.c3
-rw-r--r--drivers/nvdimm/region_devs.c5
-rw-r--r--fs/block_dev.c96
-rw-r--r--include/linux/fs.h8
-rw-r--r--include/uapi/linux/fs.h1
-rw-r--r--mm/huge_memory.c1
-rw-r--r--mm/hugetlb.c1
-rw-r--r--tools/testing/nvdimm/Kbuild9
-rw-r--r--tools/testing/nvdimm/config_check.c2
26 files changed, 935 insertions, 143 deletions
diff --git a/block/ioctl.c b/block/ioctl.c
index 4ff1f92f89ca..698c7933d582 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -407,35 +407,6 @@ static inline int is_unrecognized_ioctl(int ret)
407 ret == -ENOIOCTLCMD; 407 ret == -ENOIOCTLCMD;
408} 408}
409 409
410#ifdef CONFIG_FS_DAX
411bool blkdev_dax_capable(struct block_device *bdev)
412{
413 struct gendisk *disk = bdev->bd_disk;
414
415 if (!disk->fops->direct_access)
416 return false;
417
418 /*
419 * If the partition is not aligned on a page boundary, we can't
420 * do dax I/O to it.
421 */
422 if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
423 || (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
424 return false;
425
426 /*
427 * If the device has known bad blocks, force all I/O through the
428 * driver / page cache.
429 *
430 * TODO: support finer grained dax error handling
431 */
432 if (disk->bb && disk->bb->count)
433 return false;
434
435 return true;
436}
437#endif
438
439static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, 410static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
440 unsigned cmd, unsigned long arg) 411 unsigned cmd, unsigned long arg)
441{ 412{
@@ -598,9 +569,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
598 case BLKTRACESETUP: 569 case BLKTRACESETUP:
599 case BLKTRACETEARDOWN: 570 case BLKTRACETEARDOWN:
600 return blk_trace_ioctl(bdev, cmd, argp); 571 return blk_trace_ioctl(bdev, cmd, argp);
601 case BLKDAXGET:
602 return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
603 break;
604 case IOC_PR_REGISTER: 572 case IOC_PR_REGISTER:
605 return blkdev_pr_register(bdev, argp); 573 return blkdev_pr_register(bdev, argp);
606 case IOC_PR_RESERVE: 574 case IOC_PR_RESERVE:
diff --git a/drivers/Kconfig b/drivers/Kconfig
index d2ac339de85f..8298eab84a6f 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -190,6 +190,8 @@ source "drivers/android/Kconfig"
190 190
191source "drivers/nvdimm/Kconfig" 191source "drivers/nvdimm/Kconfig"
192 192
193source "drivers/dax/Kconfig"
194
193source "drivers/nvmem/Kconfig" 195source "drivers/nvmem/Kconfig"
194 196
195source "drivers/hwtracing/stm/Kconfig" 197source "drivers/hwtracing/stm/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 8f5d076baeb0..0b6f3d60193d 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -66,6 +66,7 @@ obj-$(CONFIG_PARPORT) += parport/
66obj-$(CONFIG_NVM) += lightnvm/ 66obj-$(CONFIG_NVM) += lightnvm/
67obj-y += base/ block/ misc/ mfd/ nfc/ 67obj-y += base/ block/ misc/ mfd/ nfc/
68obj-$(CONFIG_LIBNVDIMM) += nvdimm/ 68obj-$(CONFIG_LIBNVDIMM) += nvdimm/
69obj-$(CONFIG_DEV_DAX) += dax/
69obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ 70obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
70obj-$(CONFIG_NUBUS) += nubus/ 71obj-$(CONFIG_NUBUS) += nubus/
71obj-y += macintosh/ 72obj-y += macintosh/
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
new file mode 100644
index 000000000000..cedab7572de3
--- /dev/null
+++ b/drivers/dax/Kconfig
@@ -0,0 +1,26 @@
1menuconfig DEV_DAX
2 tristate "DAX: direct access to differentiated memory"
3 default m if NVDIMM_DAX
4 depends on TRANSPARENT_HUGEPAGE
5 help
6 Support raw access to differentiated (persistence, bandwidth,
7 latency...) memory via an mmap(2) capable character
8 device. Platform firmware or a device driver may identify a
9 platform memory resource that is differentiated from the
10 baseline memory pool. Mappings of a /dev/daxX.Y device impose
11 restrictions that make the mapping behavior deterministic.
12
13if DEV_DAX
14
15config DEV_DAX_PMEM
16 tristate "PMEM DAX: direct access to persistent memory"
17 depends on NVDIMM_DAX
18 default DEV_DAX
19 help
20 Support raw access to persistent memory. Note that this
21 driver consumes memory ranges allocated and exported by the
22 libnvdimm sub-system.
23
24 Say Y if unsure
25
26endif
diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile
new file mode 100644
index 000000000000..27c54e38478a
--- /dev/null
+++ b/drivers/dax/Makefile
@@ -0,0 +1,4 @@
1obj-$(CONFIG_DEV_DAX) += dax.o
2obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
3
4dax_pmem-y := pmem.o
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
new file mode 100644
index 000000000000..b891a129b275
--- /dev/null
+++ b/drivers/dax/dax.c
@@ -0,0 +1,575 @@
1/*
2 * Copyright(c) 2016 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/pagemap.h>
14#include <linux/module.h>
15#include <linux/device.h>
16#include <linux/pfn_t.h>
17#include <linux/slab.h>
18#include <linux/dax.h>
19#include <linux/fs.h>
20#include <linux/mm.h>
21
22static int dax_major;
23static struct class *dax_class;
24static DEFINE_IDA(dax_minor_ida);
25
26/**
27 * struct dax_region - mapping infrastructure for dax devices
28 * @id: kernel-wide unique region for a memory range
29 * @base: linear address corresponding to @res
30 * @kref: to pin while other agents have a need to do lookups
31 * @dev: parent device backing this region
32 * @align: allocation and mapping alignment for child dax devices
33 * @res: physical address range of the region
34 * @pfn_flags: identify whether the pfns are paged back or not
35 */
36struct dax_region {
37 int id;
38 struct ida ida;
39 void *base;
40 struct kref kref;
41 struct device *dev;
42 unsigned int align;
43 struct resource res;
44 unsigned long pfn_flags;
45};
46
47/**
48 * struct dax_dev - subdivision of a dax region
49 * @region - parent region
50 * @dev - device backing the character device
51 * @kref - enable this data to be tracked in filp->private_data
52 * @alive - !alive + rcu grace period == no new mappings can be established
53 * @id - child id in the region
54 * @num_resources - number of physical address extents in this device
55 * @res - array of physical address ranges
56 */
57struct dax_dev {
58 struct dax_region *region;
59 struct device *dev;
60 struct kref kref;
61 bool alive;
62 int id;
63 int num_resources;
64 struct resource res[0];
65};
66
67static void dax_region_free(struct kref *kref)
68{
69 struct dax_region *dax_region;
70
71 dax_region = container_of(kref, struct dax_region, kref);
72 kfree(dax_region);
73}
74
75void dax_region_put(struct dax_region *dax_region)
76{
77 kref_put(&dax_region->kref, dax_region_free);
78}
79EXPORT_SYMBOL_GPL(dax_region_put);
80
81static void dax_dev_free(struct kref *kref)
82{
83 struct dax_dev *dax_dev;
84
85 dax_dev = container_of(kref, struct dax_dev, kref);
86 dax_region_put(dax_dev->region);
87 kfree(dax_dev);
88}
89
90static void dax_dev_put(struct dax_dev *dax_dev)
91{
92 kref_put(&dax_dev->kref, dax_dev_free);
93}
94
95struct dax_region *alloc_dax_region(struct device *parent, int region_id,
96 struct resource *res, unsigned int align, void *addr,
97 unsigned long pfn_flags)
98{
99 struct dax_region *dax_region;
100
101 dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
102
103 if (!dax_region)
104 return NULL;
105
106 memcpy(&dax_region->res, res, sizeof(*res));
107 dax_region->pfn_flags = pfn_flags;
108 kref_init(&dax_region->kref);
109 dax_region->id = region_id;
110 ida_init(&dax_region->ida);
111 dax_region->align = align;
112 dax_region->dev = parent;
113 dax_region->base = addr;
114
115 return dax_region;
116}
117EXPORT_SYMBOL_GPL(alloc_dax_region);
118
119static ssize_t size_show(struct device *dev,
120 struct device_attribute *attr, char *buf)
121{
122 struct dax_dev *dax_dev = dev_get_drvdata(dev);
123 unsigned long long size = 0;
124 int i;
125
126 for (i = 0; i < dax_dev->num_resources; i++)
127 size += resource_size(&dax_dev->res[i]);
128
129 return sprintf(buf, "%llu\n", size);
130}
131static DEVICE_ATTR_RO(size);
132
133static struct attribute *dax_device_attributes[] = {
134 &dev_attr_size.attr,
135 NULL,
136};
137
138static const struct attribute_group dax_device_attribute_group = {
139 .attrs = dax_device_attributes,
140};
141
142static const struct attribute_group *dax_attribute_groups[] = {
143 &dax_device_attribute_group,
144 NULL,
145};
146
147static void unregister_dax_dev(void *_dev)
148{
149 struct device *dev = _dev;
150 struct dax_dev *dax_dev = dev_get_drvdata(dev);
151 struct dax_region *dax_region = dax_dev->region;
152
153 dev_dbg(dev, "%s\n", __func__);
154
155 /*
156 * Note, rcu is not protecting the liveness of dax_dev, rcu is
157 * ensuring that any fault handlers that might have seen
158 * dax_dev->alive == true, have completed. Any fault handlers
159 * that start after synchronize_rcu() has started will abort
160 * upon seeing dax_dev->alive == false.
161 */
162 dax_dev->alive = false;
163 synchronize_rcu();
164
165 get_device(dev);
166 device_unregister(dev);
167 ida_simple_remove(&dax_region->ida, dax_dev->id);
168 ida_simple_remove(&dax_minor_ida, MINOR(dev->devt));
169 put_device(dev);
170 dax_dev_put(dax_dev);
171}
172
173int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
174 int count)
175{
176 struct device *parent = dax_region->dev;
177 struct dax_dev *dax_dev;
178 struct device *dev;
179 int rc, minor;
180 dev_t dev_t;
181
182 dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL);
183 if (!dax_dev)
184 return -ENOMEM;
185 memcpy(dax_dev->res, res, sizeof(*res) * count);
186 dax_dev->num_resources = count;
187 kref_init(&dax_dev->kref);
188 dax_dev->alive = true;
189 dax_dev->region = dax_region;
190 kref_get(&dax_region->kref);
191
192 dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL);
193 if (dax_dev->id < 0) {
194 rc = dax_dev->id;
195 goto err_id;
196 }
197
198 minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL);
199 if (minor < 0) {
200 rc = minor;
201 goto err_minor;
202 }
203
204 dev_t = MKDEV(dax_major, minor);
205 dev = device_create_with_groups(dax_class, parent, dev_t, dax_dev,
206 dax_attribute_groups, "dax%d.%d", dax_region->id,
207 dax_dev->id);
208 if (IS_ERR(dev)) {
209 rc = PTR_ERR(dev);
210 goto err_create;
211 }
212 dax_dev->dev = dev;
213
214 rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev);
215 if (rc) {
216 unregister_dax_dev(dev);
217 return rc;
218 }
219
220 return 0;
221
222 err_create:
223 ida_simple_remove(&dax_minor_ida, minor);
224 err_minor:
225 ida_simple_remove(&dax_region->ida, dax_dev->id);
226 err_id:
227 dax_dev_put(dax_dev);
228
229 return rc;
230}
231EXPORT_SYMBOL_GPL(devm_create_dax_dev);
232
233/* return an unmapped area aligned to the dax region specified alignment */
234static unsigned long dax_dev_get_unmapped_area(struct file *filp,
235 unsigned long addr, unsigned long len, unsigned long pgoff,
236 unsigned long flags)
237{
238 unsigned long off, off_end, off_align, len_align, addr_align, align;
239 struct dax_dev *dax_dev = filp ? filp->private_data : NULL;
240 struct dax_region *dax_region;
241
242 if (!dax_dev || addr)
243 goto out;
244
245 dax_region = dax_dev->region;
246 align = dax_region->align;
247 off = pgoff << PAGE_SHIFT;
248 off_end = off + len;
249 off_align = round_up(off, align);
250
251 if ((off_end <= off_align) || ((off_end - off_align) < align))
252 goto out;
253
254 len_align = len + align;
255 if ((off + len_align) < off)
256 goto out;
257
258 addr_align = current->mm->get_unmapped_area(filp, addr, len_align,
259 pgoff, flags);
260 if (!IS_ERR_VALUE(addr_align)) {
261 addr_align += (off - addr_align) & (align - 1);
262 return addr_align;
263 }
264 out:
265 return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
266}
267
268static int __match_devt(struct device *dev, const void *data)
269{
270 const dev_t *devt = data;
271
272 return dev->devt == *devt;
273}
274
275static struct device *dax_dev_find(dev_t dev_t)
276{
277 return class_find_device(dax_class, NULL, &dev_t, __match_devt);
278}
279
280static int dax_dev_open(struct inode *inode, struct file *filp)
281{
282 struct dax_dev *dax_dev = NULL;
283 struct device *dev;
284
285 dev = dax_dev_find(inode->i_rdev);
286 if (!dev)
287 return -ENXIO;
288
289 device_lock(dev);
290 dax_dev = dev_get_drvdata(dev);
291 if (dax_dev) {
292 dev_dbg(dev, "%s\n", __func__);
293 filp->private_data = dax_dev;
294 kref_get(&dax_dev->kref);
295 inode->i_flags = S_DAX;
296 }
297 device_unlock(dev);
298
299 if (!dax_dev) {
300 put_device(dev);
301 return -ENXIO;
302 }
303 return 0;
304}
305
306static int dax_dev_release(struct inode *inode, struct file *filp)
307{
308 struct dax_dev *dax_dev = filp->private_data;
309 struct device *dev = dax_dev->dev;
310
311 dev_dbg(dax_dev->dev, "%s\n", __func__);
312 dax_dev_put(dax_dev);
313 put_device(dev);
314
315 return 0;
316}
317
318static int check_vma(struct dax_dev *dax_dev, struct vm_area_struct *vma,
319 const char *func)
320{
321 struct dax_region *dax_region = dax_dev->region;
322 struct device *dev = dax_dev->dev;
323 unsigned long mask;
324
325 if (!dax_dev->alive)
326 return -ENXIO;
327
328 /* prevent private / writable mappings from being established */
329 if ((vma->vm_flags & (VM_NORESERVE|VM_SHARED|VM_WRITE)) == VM_WRITE) {
330 dev_info(dev, "%s: %s: fail, attempted private mapping\n",
331 current->comm, func);
332 return -EINVAL;
333 }
334
335 mask = dax_region->align - 1;
336 if (vma->vm_start & mask || vma->vm_end & mask) {
337 dev_info(dev, "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n",
338 current->comm, func, vma->vm_start, vma->vm_end,
339 mask);
340 return -EINVAL;
341 }
342
343 if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV
344 && (vma->vm_flags & VM_DONTCOPY) == 0) {
345 dev_info(dev, "%s: %s: fail, dax range requires MADV_DONTFORK\n",
346 current->comm, func);
347 return -EINVAL;
348 }
349
350 if (!vma_is_dax(vma)) {
351 dev_info(dev, "%s: %s: fail, vma is not DAX capable\n",
352 current->comm, func);
353 return -EINVAL;
354 }
355
356 return 0;
357}
358
359static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff,
360 unsigned long size)
361{
362 struct resource *res;
363 phys_addr_t phys;
364 int i;
365
366 for (i = 0; i < dax_dev->num_resources; i++) {
367 res = &dax_dev->res[i];
368 phys = pgoff * PAGE_SIZE + res->start;
369 if (phys >= res->start && phys <= res->end)
370 break;
371 pgoff -= PHYS_PFN(resource_size(res));
372 }
373
374 if (i < dax_dev->num_resources) {
375 res = &dax_dev->res[i];
376 if (phys + size - 1 <= res->end)
377 return phys;
378 }
379
380 return -1;
381}
382
383static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
384 struct vm_fault *vmf)
385{
386 unsigned long vaddr = (unsigned long) vmf->virtual_address;
387 struct device *dev = dax_dev->dev;
388 struct dax_region *dax_region;
389 int rc = VM_FAULT_SIGBUS;
390 phys_addr_t phys;
391 pfn_t pfn;
392
393 if (check_vma(dax_dev, vma, __func__))
394 return VM_FAULT_SIGBUS;
395
396 dax_region = dax_dev->region;
397 if (dax_region->align > PAGE_SIZE) {
398 dev_dbg(dev, "%s: alignment > fault size\n", __func__);
399 return VM_FAULT_SIGBUS;
400 }
401
402 phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE);
403 if (phys == -1) {
404 dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
405 vmf->pgoff);
406 return VM_FAULT_SIGBUS;
407 }
408
409 pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
410
411 rc = vm_insert_mixed(vma, vaddr, pfn);
412
413 if (rc == -ENOMEM)
414 return VM_FAULT_OOM;
415 if (rc < 0 && rc != -EBUSY)
416 return VM_FAULT_SIGBUS;
417
418 return VM_FAULT_NOPAGE;
419}
420
421static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
422{
423 int rc;
424 struct file *filp = vma->vm_file;
425 struct dax_dev *dax_dev = filp->private_data;
426
427 dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
428 current->comm, (vmf->flags & FAULT_FLAG_WRITE)
429 ? "write" : "read", vma->vm_start, vma->vm_end);
430 rcu_read_lock();
431 rc = __dax_dev_fault(dax_dev, vma, vmf);
432 rcu_read_unlock();
433
434 return rc;
435}
436
437static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
438 struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd,
439 unsigned int flags)
440{
441 unsigned long pmd_addr = addr & PMD_MASK;
442 struct device *dev = dax_dev->dev;
443 struct dax_region *dax_region;
444 phys_addr_t phys;
445 pgoff_t pgoff;
446 pfn_t pfn;
447
448 if (check_vma(dax_dev, vma, __func__))
449 return VM_FAULT_SIGBUS;
450
451 dax_region = dax_dev->region;
452 if (dax_region->align > PMD_SIZE) {
453 dev_dbg(dev, "%s: alignment > fault size\n", __func__);
454 return VM_FAULT_SIGBUS;
455 }
456
457 /* dax pmd mappings require pfn_t_devmap() */
458 if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
459 dev_dbg(dev, "%s: alignment > fault size\n", __func__);
460 return VM_FAULT_SIGBUS;
461 }
462
463 pgoff = linear_page_index(vma, pmd_addr);
464 phys = pgoff_to_phys(dax_dev, pgoff, PAGE_SIZE);
465 if (phys == -1) {
466 dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
467 pgoff);
468 return VM_FAULT_SIGBUS;
469 }
470
471 pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
472
473 return vmf_insert_pfn_pmd(vma, addr, pmd, pfn,
474 flags & FAULT_FLAG_WRITE);
475}
476
477static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
478 pmd_t *pmd, unsigned int flags)
479{
480 int rc;
481 struct file *filp = vma->vm_file;
482 struct dax_dev *dax_dev = filp->private_data;
483
484 dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
485 current->comm, (flags & FAULT_FLAG_WRITE)
486 ? "write" : "read", vma->vm_start, vma->vm_end);
487
488 rcu_read_lock();
489 rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags);
490 rcu_read_unlock();
491
492 return rc;
493}
494
495static void dax_dev_vm_open(struct vm_area_struct *vma)
496{
497 struct file *filp = vma->vm_file;
498 struct dax_dev *dax_dev = filp->private_data;
499
500 dev_dbg(dax_dev->dev, "%s\n", __func__);
501 kref_get(&dax_dev->kref);
502}
503
504static void dax_dev_vm_close(struct vm_area_struct *vma)
505{
506 struct file *filp = vma->vm_file;
507 struct dax_dev *dax_dev = filp->private_data;
508
509 dev_dbg(dax_dev->dev, "%s\n", __func__);
510 dax_dev_put(dax_dev);
511}
512
513static const struct vm_operations_struct dax_dev_vm_ops = {
514 .fault = dax_dev_fault,
515 .pmd_fault = dax_dev_pmd_fault,
516 .open = dax_dev_vm_open,
517 .close = dax_dev_vm_close,
518};
519
520static int dax_dev_mmap(struct file *filp, struct vm_area_struct *vma)
521{
522 struct dax_dev *dax_dev = filp->private_data;
523 int rc;
524
525 dev_dbg(dax_dev->dev, "%s\n", __func__);
526
527 rc = check_vma(dax_dev, vma, __func__);
528 if (rc)
529 return rc;
530
531 kref_get(&dax_dev->kref);
532 vma->vm_ops = &dax_dev_vm_ops;
533 vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
534 return 0;
535
536}
537
538static const struct file_operations dax_fops = {
539 .llseek = noop_llseek,
540 .owner = THIS_MODULE,
541 .open = dax_dev_open,
542 .release = dax_dev_release,
543 .get_unmapped_area = dax_dev_get_unmapped_area,
544 .mmap = dax_dev_mmap,
545};
546
547static int __init dax_init(void)
548{
549 int rc;
550
551 rc = register_chrdev(0, "dax", &dax_fops);
552 if (rc < 0)
553 return rc;
554 dax_major = rc;
555
556 dax_class = class_create(THIS_MODULE, "dax");
557 if (IS_ERR(dax_class)) {
558 unregister_chrdev(dax_major, "dax");
559 return PTR_ERR(dax_class);
560 }
561
562 return 0;
563}
564
565static void __exit dax_exit(void)
566{
567 class_destroy(dax_class);
568 unregister_chrdev(dax_major, "dax");
569 ida_destroy(&dax_minor_ida);
570}
571
572MODULE_AUTHOR("Intel Corporation");
573MODULE_LICENSE("GPL v2");
574subsys_initcall(dax_init);
575module_exit(dax_exit);
diff --git a/drivers/dax/dax.h b/drivers/dax/dax.h
new file mode 100644
index 000000000000..d8b8f1f25054
--- /dev/null
+++ b/drivers/dax/dax.h
@@ -0,0 +1,24 @@
1/*
2 * Copyright(c) 2016 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#ifndef __DAX_H__
14#define __DAX_H__
15struct device;
16struct resource;
17struct dax_region;
18void dax_region_put(struct dax_region *dax_region);
19struct dax_region *alloc_dax_region(struct device *parent,
20 int region_id, struct resource *res, unsigned int align,
21 void *addr, unsigned long flags);
22int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
23 int count);
24#endif /* __DAX_H__ */
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
new file mode 100644
index 000000000000..55d510e36cd1
--- /dev/null
+++ b/drivers/dax/pmem.c
@@ -0,0 +1,158 @@
1/*
2 * Copyright(c) 2016 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/percpu-refcount.h>
14#include <linux/memremap.h>
15#include <linux/module.h>
16#include <linux/pfn_t.h>
17#include "../nvdimm/pfn.h"
18#include "../nvdimm/nd.h"
19#include "dax.h"
20
21struct dax_pmem {
22 struct device *dev;
23 struct percpu_ref ref;
24 struct completion cmp;
25};
26
27struct dax_pmem *to_dax_pmem(struct percpu_ref *ref)
28{
29 return container_of(ref, struct dax_pmem, ref);
30}
31
32static void dax_pmem_percpu_release(struct percpu_ref *ref)
33{
34 struct dax_pmem *dax_pmem = to_dax_pmem(ref);
35
36 dev_dbg(dax_pmem->dev, "%s\n", __func__);
37 complete(&dax_pmem->cmp);
38}
39
40static void dax_pmem_percpu_exit(void *data)
41{
42 struct percpu_ref *ref = data;
43 struct dax_pmem *dax_pmem = to_dax_pmem(ref);
44
45 dev_dbg(dax_pmem->dev, "%s\n", __func__);
46 percpu_ref_exit(ref);
47 wait_for_completion(&dax_pmem->cmp);
48}
49
50static void dax_pmem_percpu_kill(void *data)
51{
52 struct percpu_ref *ref = data;
53 struct dax_pmem *dax_pmem = to_dax_pmem(ref);
54
55 dev_dbg(dax_pmem->dev, "%s\n", __func__);
56 percpu_ref_kill(ref);
57}
58
59static int dax_pmem_probe(struct device *dev)
60{
61 int rc;
62 void *addr;
63 struct resource res;
64 struct nd_pfn_sb *pfn_sb;
65 struct dax_pmem *dax_pmem;
66 struct nd_region *nd_region;
67 struct nd_namespace_io *nsio;
68 struct dax_region *dax_region;
69 struct nd_namespace_common *ndns;
70 struct nd_dax *nd_dax = to_nd_dax(dev);
71 struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
72 struct vmem_altmap __altmap, *altmap = NULL;
73
74 ndns = nvdimm_namespace_common_probe(dev);
75 if (IS_ERR(ndns))
76 return PTR_ERR(ndns);
77 nsio = to_nd_namespace_io(&ndns->dev);
78
79 /* parse the 'pfn' info block via ->rw_bytes */
80 devm_nsio_enable(dev, nsio);
81 altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap);
82 if (IS_ERR(altmap))
83 return PTR_ERR(altmap);
84 devm_nsio_disable(dev, nsio);
85
86 pfn_sb = nd_pfn->pfn_sb;
87
88 if (!devm_request_mem_region(dev, nsio->res.start,
89 resource_size(&nsio->res), dev_name(dev))) {
90 dev_warn(dev, "could not reserve region %pR\n", &nsio->res);
91 return -EBUSY;
92 }
93
94 dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
95 if (!dax_pmem)
96 return -ENOMEM;
97
98 dax_pmem->dev = dev;
99 init_completion(&dax_pmem->cmp);
100 rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0,
101 GFP_KERNEL);
102 if (rc)
103 return rc;
104
105 rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
106 if (rc) {
107 dax_pmem_percpu_exit(&dax_pmem->ref);
108 return rc;
109 }
110
111 addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
112 if (IS_ERR(addr))
113 return PTR_ERR(addr);
114
115 rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref);
116 if (rc) {
117 dax_pmem_percpu_kill(&dax_pmem->ref);
118 return rc;
119 }
120
121 nd_region = to_nd_region(dev->parent);
122 dax_region = alloc_dax_region(dev, nd_region->id, &res,
123 le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP);
124 if (!dax_region)
125 return -ENOMEM;
126
127 /* TODO: support for subdividing a dax region... */
128 rc = devm_create_dax_dev(dax_region, &res, 1);
129
130 /* child dax_dev instances now own the lifetime of the dax_region */
131 dax_region_put(dax_region);
132
133 return rc;
134}
135
136static struct nd_device_driver dax_pmem_driver = {
137 .probe = dax_pmem_probe,
138 .drv = {
139 .name = "dax_pmem",
140 },
141 .type = ND_DRIVER_DAX_PMEM,
142};
143
144static int __init dax_pmem_init(void)
145{
146 return nd_driver_register(&dax_pmem_driver);
147}
148module_init(dax_pmem_init);
149
150static void __exit dax_pmem_exit(void)
151{
152 driver_unregister(&dax_pmem_driver.drv);
153}
154module_exit(dax_pmem_exit);
155
156MODULE_LICENSE("GPL v2");
157MODULE_AUTHOR("Intel Corporation");
158MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 04c2c3fda1ab..f085f8bceae8 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -124,9 +124,10 @@ static int nvdimm_bus_remove(struct device *dev)
124 struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver); 124 struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
125 struct module *provider = to_bus_provider(dev); 125 struct module *provider = to_bus_provider(dev);
126 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 126 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
127 int rc; 127 int rc = 0;
128 128
129 rc = nd_drv->remove(dev); 129 if (nd_drv->remove)
130 rc = nd_drv->remove(dev);
130 nd_region_disable(nvdimm_bus, dev); 131 nd_region_disable(nvdimm_bus, dev);
131 132
132 dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name, 133 dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
@@ -296,8 +297,8 @@ int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
296 return -EINVAL; 297 return -EINVAL;
297 } 298 }
298 299
299 if (!nd_drv->probe || !nd_drv->remove) { 300 if (!nd_drv->probe) {
300 pr_debug("->probe() and ->remove() must be specified\n"); 301 pr_debug("%s ->probe() must be specified\n", mod_name);
301 return -EINVAL; 302 return -EINVAL;
302 } 303 }
303 304
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 5f53db59a058..8b2e3c4fb0ad 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -93,6 +93,25 @@ static bool is_idle(struct device *dev, struct nd_namespace_common *ndns)
93 return true; 93 return true;
94} 94}
95 95
96struct nd_pfn *to_nd_pfn_safe(struct device *dev)
97{
98 /*
99 * pfn device attributes are re-used by dax device instances, so we
100 * need to be careful to correct device-to-nd_pfn conversion.
101 */
102 if (is_nd_pfn(dev))
103 return to_nd_pfn(dev);
104
105 if (is_nd_dax(dev)) {
106 struct nd_dax *nd_dax = to_nd_dax(dev);
107
108 return &nd_dax->nd_pfn;
109 }
110
111 WARN_ON(1);
112 return NULL;
113}
114
96static void nd_detach_and_reset(struct device *dev, 115static void nd_detach_and_reset(struct device *dev,
97 struct nd_namespace_common **_ndns) 116 struct nd_namespace_common **_ndns)
98{ 117{
@@ -106,8 +125,8 @@ static void nd_detach_and_reset(struct device *dev,
106 nd_btt->lbasize = 0; 125 nd_btt->lbasize = 0;
107 kfree(nd_btt->uuid); 126 kfree(nd_btt->uuid);
108 nd_btt->uuid = NULL; 127 nd_btt->uuid = NULL;
109 } else if (is_nd_pfn(dev)) { 128 } else if (is_nd_pfn(dev) || is_nd_dax(dev)) {
110 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 129 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
111 130
112 kfree(nd_pfn->uuid); 131 kfree(nd_pfn->uuid);
113 nd_pfn->uuid = NULL; 132 nd_pfn->uuid = NULL;
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index e8688a13cf4f..be89764315c2 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -648,6 +648,9 @@ static __exit void libnvdimm_exit(void)
648 nd_region_exit(); 648 nd_region_exit();
649 nvdimm_exit(); 649 nvdimm_exit();
650 nvdimm_bus_exit(); 650 nvdimm_bus_exit();
651 nd_region_devs_exit();
652 nvdimm_devs_exit();
653 ida_destroy(&nd_ida);
651} 654}
652 655
653MODULE_LICENSE("GPL v2"); 656MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c
index f90f7549e7f4..45fa82cae87c 100644
--- a/drivers/nvdimm/dax_devs.c
+++ b/drivers/nvdimm/dax_devs.c
@@ -15,6 +15,7 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include "nd-core.h" 17#include "nd-core.h"
18#include "pfn.h"
18#include "nd.h" 19#include "nd.h"
19 20
20static void nd_dax_release(struct device *dev) 21static void nd_dax_release(struct device *dev)
@@ -97,3 +98,37 @@ struct device *nd_dax_create(struct nd_region *nd_region)
97 __nd_device_register(dev); 98 __nd_device_register(dev);
98 return dev; 99 return dev;
99} 100}
101
102int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns)
103{
104 int rc;
105 struct nd_dax *nd_dax;
106 struct device *dax_dev;
107 struct nd_pfn *nd_pfn;
108 struct nd_pfn_sb *pfn_sb;
109 struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
110
111 if (ndns->force_raw)
112 return -ENODEV;
113
114 nvdimm_bus_lock(&ndns->dev);
115 nd_dax = nd_dax_alloc(nd_region);
116 nd_pfn = &nd_dax->nd_pfn;
117 dax_dev = nd_pfn_devinit(nd_pfn, ndns);
118 nvdimm_bus_unlock(&ndns->dev);
119 if (!dax_dev)
120 return -ENOMEM;
121 pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
122 nd_pfn->pfn_sb = pfn_sb;
123 rc = nd_pfn_validate(nd_pfn, DAX_SIG);
124 dev_dbg(dev, "%s: dax: %s\n", __func__,
125 rc == 0 ? dev_name(dax_dev) : "<none>");
126 if (rc < 0) {
127 __nd_detach_ndns(dax_dev, &nd_pfn->ndns);
128 put_device(dax_dev);
129 } else
130 __nd_device_register(dax_dev);
131
132 return rc;
133}
134EXPORT_SYMBOL(nd_dax_probe);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 79a35a02053c..bbde28d3dec5 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -552,3 +552,8 @@ int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count)
552 return 0; 552 return 0;
553} 553}
554EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count); 554EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count);
555
556void __exit nvdimm_devs_exit(void)
557{
558 ida_destroy(&dimm_ida);
559}
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 86d985ccce82..284cdaa268cf 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -49,6 +49,8 @@ bool is_nd_blk(struct device *dev);
49struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev); 49struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
50int __init nvdimm_bus_init(void); 50int __init nvdimm_bus_init(void);
51void nvdimm_bus_exit(void); 51void nvdimm_bus_exit(void);
52void nvdimm_devs_exit(void);
53void nd_region_devs_exit(void);
52void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev); 54void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev);
53struct nd_region; 55struct nd_region;
54void nd_region_create_blk_seed(struct nd_region *nd_region); 56void nd_region_create_blk_seed(struct nd_region *nd_region);
@@ -92,4 +94,5 @@ bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
92ssize_t nd_namespace_store(struct device *dev, 94ssize_t nd_namespace_store(struct device *dev,
93 struct nd_namespace_common **_ndns, const char *buf, 95 struct nd_namespace_common **_ndns, const char *buf,
94 size_t len); 96 size_t len);
97struct nd_pfn *to_nd_pfn_safe(struct device *dev);
95#endif /* __ND_CORE_H__ */ 98#endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 46910b8f32b1..d0ac93c31dda 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -232,7 +232,7 @@ bool is_nd_pfn(struct device *dev);
232struct device *nd_pfn_create(struct nd_region *nd_region); 232struct device *nd_pfn_create(struct nd_region *nd_region);
233struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, 233struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
234 struct nd_namespace_common *ndns); 234 struct nd_namespace_common *ndns);
235int nd_pfn_validate(struct nd_pfn *nd_pfn); 235int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig);
236extern struct attribute_group nd_pfn_attribute_group; 236extern struct attribute_group nd_pfn_attribute_group;
237#else 237#else
238static inline int nd_pfn_probe(struct device *dev, 238static inline int nd_pfn_probe(struct device *dev,
@@ -251,7 +251,7 @@ static inline struct device *nd_pfn_create(struct nd_region *nd_region)
251 return NULL; 251 return NULL;
252} 252}
253 253
254static inline int nd_pfn_validate(struct nd_pfn *nd_pfn) 254static inline int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
255{ 255{
256 return -ENODEV; 256 return -ENODEV;
257} 257}
@@ -259,9 +259,16 @@ static inline int nd_pfn_validate(struct nd_pfn *nd_pfn)
259 259
260struct nd_dax *to_nd_dax(struct device *dev); 260struct nd_dax *to_nd_dax(struct device *dev);
261#if IS_ENABLED(CONFIG_NVDIMM_DAX) 261#if IS_ENABLED(CONFIG_NVDIMM_DAX)
262int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns);
262bool is_nd_dax(struct device *dev); 263bool is_nd_dax(struct device *dev);
263struct device *nd_dax_create(struct nd_region *nd_region); 264struct device *nd_dax_create(struct nd_region *nd_region);
264#else 265#else
266static inline int nd_dax_probe(struct device *dev,
267 struct nd_namespace_common *ndns)
268{
269 return -ENODEV;
270}
271
265static inline bool is_nd_dax(struct device *dev) 272static inline bool is_nd_dax(struct device *dev)
266{ 273{
267 return false; 274 return false;
diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
index 9d2704c83fa7..dde9853453d3 100644
--- a/drivers/nvdimm/pfn.h
+++ b/drivers/nvdimm/pfn.h
@@ -19,6 +19,7 @@
19 19
20#define PFN_SIG_LEN 16 20#define PFN_SIG_LEN 16
21#define PFN_SIG "NVDIMM_PFN_INFO\0" 21#define PFN_SIG "NVDIMM_PFN_INFO\0"
22#define DAX_SIG "NVDIMM_DAX_INFO\0"
22 23
23struct nd_pfn_sb { 24struct nd_pfn_sb {
24 u8 signature[PFN_SIG_LEN]; 25 u8 signature[PFN_SIG_LEN];
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 2248056d29e7..f7718ec685fa 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -54,25 +54,6 @@ struct nd_pfn *to_nd_pfn(struct device *dev)
54} 54}
55EXPORT_SYMBOL(to_nd_pfn); 55EXPORT_SYMBOL(to_nd_pfn);
56 56
57static struct nd_pfn *to_nd_pfn_safe(struct device *dev)
58{
59 /*
60 * pfn device attributes are re-used by dax device instances, so we
61 * need to be careful to correct device-to-nd_pfn conversion.
62 */
63 if (is_nd_pfn(dev))
64 return to_nd_pfn(dev);
65
66 if (is_nd_dax(dev)) {
67 struct nd_dax *nd_dax = to_nd_dax(dev);
68
69 return &nd_dax->nd_pfn;
70 }
71
72 WARN_ON(1);
73 return NULL;
74}
75
76static ssize_t mode_show(struct device *dev, 57static ssize_t mode_show(struct device *dev,
77 struct device_attribute *attr, char *buf) 58 struct device_attribute *attr, char *buf)
78{ 59{
@@ -360,7 +341,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
360 return dev; 341 return dev;
361} 342}
362 343
363int nd_pfn_validate(struct nd_pfn *nd_pfn) 344int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
364{ 345{
365 u64 checksum, offset; 346 u64 checksum, offset;
366 struct nd_namespace_io *nsio; 347 struct nd_namespace_io *nsio;
@@ -377,7 +358,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
377 if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb))) 358 if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb)))
378 return -ENXIO; 359 return -ENXIO;
379 360
380 if (memcmp(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN) != 0) 361 if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0)
381 return -ENODEV; 362 return -ENODEV;
382 363
383 checksum = le64_to_cpu(pfn_sb->checksum); 364 checksum = le64_to_cpu(pfn_sb->checksum);
@@ -416,6 +397,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
416 return -ENODEV; 397 return -ENODEV;
417 } 398 }
418 399
400 if (nd_pfn->align == 0)
401 nd_pfn->align = le32_to_cpu(pfn_sb->align);
419 if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) { 402 if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) {
420 dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n", 403 dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
421 nd_pfn->align, nvdimm_namespace_capacity(ndns)); 404 nd_pfn->align, nvdimm_namespace_capacity(ndns));
@@ -436,8 +419,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
436 return -EBUSY; 419 return -EBUSY;
437 } 420 }
438 421
439 nd_pfn->align = le32_to_cpu(pfn_sb->align); 422 if ((nd_pfn->align && !IS_ALIGNED(offset, nd_pfn->align))
440 if (!is_power_of_2(offset) || offset < PAGE_SIZE) { 423 || !IS_ALIGNED(offset, PAGE_SIZE)) {
441 dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n", 424 dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n",
442 offset); 425 offset);
443 return -ENXIO; 426 return -ENXIO;
@@ -467,7 +450,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
467 pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); 450 pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
468 nd_pfn = to_nd_pfn(pfn_dev); 451 nd_pfn = to_nd_pfn(pfn_dev);
469 nd_pfn->pfn_sb = pfn_sb; 452 nd_pfn->pfn_sb = pfn_sb;
470 rc = nd_pfn_validate(nd_pfn); 453 rc = nd_pfn_validate(nd_pfn, PFN_SIG);
471 dev_dbg(dev, "%s: pfn: %s\n", __func__, 454 dev_dbg(dev, "%s: pfn: %s\n", __func__,
472 rc == 0 ? dev_name(pfn_dev) : "<none>"); 455 rc == 0 ? dev_name(pfn_dev) : "<none>");
473 if (rc < 0) { 456 if (rc < 0) {
@@ -552,6 +535,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
552 struct nd_pfn_sb *pfn_sb; 535 struct nd_pfn_sb *pfn_sb;
553 unsigned long npfns; 536 unsigned long npfns;
554 phys_addr_t offset; 537 phys_addr_t offset;
538 const char *sig;
555 u64 checksum; 539 u64 checksum;
556 int rc; 540 int rc;
557 541
@@ -560,7 +544,11 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
560 return -ENOMEM; 544 return -ENOMEM;
561 545
562 nd_pfn->pfn_sb = pfn_sb; 546 nd_pfn->pfn_sb = pfn_sb;
563 rc = nd_pfn_validate(nd_pfn); 547 if (is_nd_dax(&nd_pfn->dev))
548 sig = DAX_SIG;
549 else
550 sig = PFN_SIG;
551 rc = nd_pfn_validate(nd_pfn, sig);
564 if (rc != -ENODEV) 552 if (rc != -ENODEV)
565 return rc; 553 return rc;
566 554
@@ -635,7 +623,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
635 pfn_sb->mode = cpu_to_le32(nd_pfn->mode); 623 pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
636 pfn_sb->dataoff = cpu_to_le64(offset); 624 pfn_sb->dataoff = cpu_to_le64(offset);
637 pfn_sb->npfns = cpu_to_le64(npfns); 625 pfn_sb->npfns = cpu_to_le64(npfns);
638 memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN); 626 memcpy(pfn_sb->signature, sig, PFN_SIG_LEN);
639 memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); 627 memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
640 memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); 628 memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
641 pfn_sb->version_major = cpu_to_le16(1); 629 pfn_sb->version_major = cpu_to_le16(1);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index d9a0dbc2d023..042baec56931 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -320,7 +320,8 @@ static int nd_pmem_probe(struct device *dev)
320 return pmem_attach_disk(dev, ndns); 320 return pmem_attach_disk(dev, ndns);
321 321
322 /* if we find a valid info-block we'll come back as that personality */ 322 /* if we find a valid info-block we'll come back as that personality */
323 if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0) 323 if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0
324 || nd_dax_probe(dev, ndns) == 0)
324 return -ENXIO; 325 return -ENXIO;
325 326
326 /* ...otherwise we're just a raw pmem device */ 327 /* ...otherwise we're just a raw pmem device */
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 9e1b054e0e61..40fcfea26fbb 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -793,3 +793,8 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
793 __func__); 793 __func__);
794} 794}
795EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); 795EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
796
797void __exit nd_region_devs_exit(void)
798{
799 ida_destroy(&region_ida);
800}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 20a2c02b77c4..36ee10ca503e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -29,6 +29,7 @@
29#include <linux/log2.h> 29#include <linux/log2.h>
30#include <linux/cleancache.h> 30#include <linux/cleancache.h>
31#include <linux/dax.h> 31#include <linux/dax.h>
32#include <linux/badblocks.h>
32#include <asm/uaccess.h> 33#include <asm/uaccess.h>
33#include "internal.h" 34#include "internal.h"
34 35
@@ -1159,6 +1160,33 @@ void bd_set_size(struct block_device *bdev, loff_t size)
1159} 1160}
1160EXPORT_SYMBOL(bd_set_size); 1161EXPORT_SYMBOL(bd_set_size);
1161 1162
1163static bool blkdev_dax_capable(struct block_device *bdev)
1164{
1165 struct gendisk *disk = bdev->bd_disk;
1166
1167 if (!disk->fops->direct_access || !IS_ENABLED(CONFIG_FS_DAX))
1168 return false;
1169
1170 /*
1171 * If the partition is not aligned on a page boundary, we can't
1172 * do dax I/O to it.
1173 */
1174 if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
1175 || (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
1176 return false;
1177
1178 /*
1179 * If the device has known bad blocks, force all I/O through the
1180 * driver / page cache.
1181 *
1182 * TODO: support finer grained dax error handling
1183 */
1184 if (disk->bb && disk->bb->count)
1185 return false;
1186
1187 return true;
1188}
1189
1162static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); 1190static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1163 1191
1164/* 1192/*
@@ -1724,79 +1752,13 @@ static const struct address_space_operations def_blk_aops = {
1724 .is_dirty_writeback = buffer_check_dirty_writeback, 1752 .is_dirty_writeback = buffer_check_dirty_writeback,
1725}; 1753};
1726 1754
1727#ifdef CONFIG_FS_DAX
1728/*
1729 * In the raw block case we do not need to contend with truncation nor
1730 * unwritten file extents. Without those concerns there is no need for
1731 * additional locking beyond the mmap_sem context that these routines
1732 * are already executing under.
1733 *
1734 * Note, there is no protection if the block device is dynamically
1735 * resized (partition grow/shrink) during a fault. A stable block device
1736 * size is already not enforced in the blkdev_direct_IO path.
1737 *
1738 * For DAX, it is the responsibility of the block device driver to
1739 * ensure the whole-disk device size is stable while requests are in
1740 * flight.
1741 *
1742 * Finally, unlike the filemap_page_mkwrite() case there is no
1743 * filesystem superblock to sync against freezing. We still include a
1744 * pfn_mkwrite callback for dax drivers to receive write fault
1745 * notifications.
1746 */
1747static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1748{
1749 return __dax_fault(vma, vmf, blkdev_get_block, NULL);
1750}
1751
1752static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma,
1753 struct vm_fault *vmf)
1754{
1755 return dax_pfn_mkwrite(vma, vmf);
1756}
1757
1758static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
1759 pmd_t *pmd, unsigned int flags)
1760{
1761 return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
1762}
1763
1764static const struct vm_operations_struct blkdev_dax_vm_ops = {
1765 .fault = blkdev_dax_fault,
1766 .pmd_fault = blkdev_dax_pmd_fault,
1767 .pfn_mkwrite = blkdev_dax_pfn_mkwrite,
1768};
1769
1770static const struct vm_operations_struct blkdev_default_vm_ops = {
1771 .fault = filemap_fault,
1772 .map_pages = filemap_map_pages,
1773};
1774
1775static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
1776{
1777 struct inode *bd_inode = bdev_file_inode(file);
1778
1779 file_accessed(file);
1780 if (IS_DAX(bd_inode)) {
1781 vma->vm_ops = &blkdev_dax_vm_ops;
1782 vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
1783 } else {
1784 vma->vm_ops = &blkdev_default_vm_ops;
1785 }
1786
1787 return 0;
1788}
1789#else
1790#define blkdev_mmap generic_file_mmap
1791#endif
1792
1793const struct file_operations def_blk_fops = { 1755const struct file_operations def_blk_fops = {
1794 .open = blkdev_open, 1756 .open = blkdev_open,
1795 .release = blkdev_close, 1757 .release = blkdev_close,
1796 .llseek = block_llseek, 1758 .llseek = block_llseek,
1797 .read_iter = blkdev_read_iter, 1759 .read_iter = blkdev_read_iter,
1798 .write_iter = blkdev_write_iter, 1760 .write_iter = blkdev_write_iter,
1799 .mmap = blkdev_mmap, 1761 .mmap = generic_file_mmap,
1800 .fsync = blkdev_fsync, 1762 .fsync = blkdev_fsync,
1801 .unlocked_ioctl = block_ioctl, 1763 .unlocked_ioctl = block_ioctl,
1802#ifdef CONFIG_COMPAT 1764#ifdef CONFIG_COMPAT
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 70e61b58baaf..8363a10660f6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2320,14 +2320,6 @@ extern struct super_block *freeze_bdev(struct block_device *);
2320extern void emergency_thaw_all(void); 2320extern void emergency_thaw_all(void);
2321extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); 2321extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
2322extern int fsync_bdev(struct block_device *); 2322extern int fsync_bdev(struct block_device *);
2323#ifdef CONFIG_FS_DAX
2324extern bool blkdev_dax_capable(struct block_device *bdev);
2325#else
2326static inline bool blkdev_dax_capable(struct block_device *bdev)
2327{
2328 return false;
2329}
2330#endif
2331 2323
2332extern struct super_block *blockdev_superblock; 2324extern struct super_block *blockdev_superblock;
2333 2325
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index a079d50376e1..fbff8b28aa35 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -222,7 +222,6 @@ struct fsxattr {
222#define BLKSECDISCARD _IO(0x12,125) 222#define BLKSECDISCARD _IO(0x12,125)
223#define BLKROTATIONAL _IO(0x12,126) 223#define BLKROTATIONAL _IO(0x12,126)
224#define BLKZEROOUT _IO(0x12,127) 224#define BLKZEROOUT _IO(0x12,127)
225#define BLKDAXGET _IO(0x12,129)
226 225
227#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ 226#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
228#define FIBMAP _IO(0x00,1) /* bmap access */ 227#define FIBMAP _IO(0x00,1) /* bmap access */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 86f9f8b82f8e..52ea012d8a80 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1013,6 +1013,7 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
1013 insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write); 1013 insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write);
1014 return VM_FAULT_NOPAGE; 1014 return VM_FAULT_NOPAGE;
1015} 1015}
1016EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
1016 1017
1017static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, 1018static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
1018 pmd_t *pmd) 1019 pmd_t *pmd)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 19d0d08b396f..b14e98129b07 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -624,6 +624,7 @@ pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
624{ 624{
625 return vma_hugecache_offset(hstate_vma(vma), vma, address); 625 return vma_hugecache_offset(hstate_vma(vma), vma, address);
626} 626}
627EXPORT_SYMBOL_GPL(linear_hugepage_index);
627 628
628/* 629/*
629 * Return the size of the pages allocated when backing a VMA. In the majority 630 * Return the size of the pages allocated when backing a VMA. In the majority
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 5ff6d3c126a9..785985677159 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -16,6 +16,7 @@ ldflags-y += --wrap=phys_to_pfn_t
16DRIVERS := ../../../drivers 16DRIVERS := ../../../drivers
17NVDIMM_SRC := $(DRIVERS)/nvdimm 17NVDIMM_SRC := $(DRIVERS)/nvdimm
18ACPI_SRC := $(DRIVERS)/acpi 18ACPI_SRC := $(DRIVERS)/acpi
19DAX_SRC := $(DRIVERS)/dax
19 20
20obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o 21obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
21obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o 22obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
@@ -23,6 +24,8 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o
23obj-$(CONFIG_ND_BLK) += nd_blk.o 24obj-$(CONFIG_ND_BLK) += nd_blk.o
24obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o 25obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
25obj-$(CONFIG_ACPI_NFIT) += nfit.o 26obj-$(CONFIG_ACPI_NFIT) += nfit.o
27obj-$(CONFIG_DEV_DAX) += dax.o
28obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
26 29
27nfit-y := $(ACPI_SRC)/nfit.o 30nfit-y := $(ACPI_SRC)/nfit.o
28nfit-y += config_check.o 31nfit-y += config_check.o
@@ -39,6 +42,12 @@ nd_blk-y += config_check.o
39nd_e820-y := $(NVDIMM_SRC)/e820.o 42nd_e820-y := $(NVDIMM_SRC)/e820.o
40nd_e820-y += config_check.o 43nd_e820-y += config_check.o
41 44
45dax-y := $(DAX_SRC)/dax.o
46dax-y += config_check.o
47
48dax_pmem-y := $(DAX_SRC)/pmem.o
49dax_pmem-y += config_check.o
50
42libnvdimm-y := $(NVDIMM_SRC)/core.o 51libnvdimm-y := $(NVDIMM_SRC)/core.o
43libnvdimm-y += $(NVDIMM_SRC)/bus.o 52libnvdimm-y += $(NVDIMM_SRC)/bus.o
44libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o 53libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o
diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c
index f2c7615554eb..adf18bfeca00 100644
--- a/tools/testing/nvdimm/config_check.c
+++ b/tools/testing/nvdimm/config_check.c
@@ -12,4 +12,6 @@ void check(void)
12 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); 12 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
13 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); 13 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
14 BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); 14 BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
15 BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));
16 BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX_PMEM));
15} 17}