aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/obsolete/sysfs-class-dax22
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c1
-rw-r--r--drivers/acpi/nfit/core.c8
-rw-r--r--drivers/acpi/numa.c1
-rw-r--r--drivers/base/memory.c1
-rw-r--r--drivers/dax/Kconfig28
-rw-r--r--drivers/dax/Makefile6
-rw-r--r--drivers/dax/bus.c503
-rw-r--r--drivers/dax/bus.h61
-rw-r--r--drivers/dax/dax-private.h34
-rw-r--r--drivers/dax/dax.h18
-rw-r--r--drivers/dax/device-dax.h25
-rw-r--r--drivers/dax/device.c363
-rw-r--r--drivers/dax/kmem.c108
-rw-r--r--drivers/dax/pmem.c153
-rw-r--r--drivers/dax/pmem/Makefile7
-rw-r--r--drivers/dax/pmem/compat.c73
-rw-r--r--drivers/dax/pmem/core.c71
-rw-r--r--drivers/dax/pmem/pmem.c40
-rw-r--r--drivers/dax/super.c41
-rw-r--r--drivers/nvdimm/e820.c1
-rw-r--r--drivers/nvdimm/nd.h2
-rw-r--r--drivers/nvdimm/of_pmem.c1
-rw-r--r--drivers/nvdimm/region_devs.c1
-rw-r--r--include/linux/acpi.h5
-rw-r--r--include/linux/libnvdimm.h1
-rw-r--r--kernel/resource.c18
-rw-r--r--mm/memory_hotplug.c32
-rw-r--r--tools/testing/nvdimm/Kbuild7
-rw-r--r--tools/testing/nvdimm/dax-dev.c16
30 files changed, 1111 insertions, 537 deletions
diff --git a/Documentation/ABI/obsolete/sysfs-class-dax b/Documentation/ABI/obsolete/sysfs-class-dax
new file mode 100644
index 000000000000..2cb9fc5e8bd1
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-class-dax
@@ -0,0 +1,22 @@
1What: /sys/class/dax/
2Date: May, 2016
3KernelVersion: v4.7
4Contact: linux-nvdimm@lists.01.org
5Description: Device DAX is the device-centric analogue of Filesystem
6 DAX (CONFIG_FS_DAX). It allows memory ranges to be
7 allocated and mapped without need of an intervening file
8 system. Device DAX is strict, precise and predictable.
9 Specifically this interface:
10
11 1/ Guarantees fault granularity with respect to a given
12 page size (pte, pmd, or pud) set at configuration time.
13
14 2/ Enforces deterministic behavior by being strict about
15 what fault scenarios are supported.
16
17 The /sys/class/dax/ interface enumerates all the
18 device-dax instances in the system. The ABI is
19 deprecated and will be removed after 2020. It is
20 replaced with the DAX bus interface /sys/bus/dax/ where
21 device-dax instances can be found under
22 /sys/bus/dax/devices/
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index bba281b1fe1b..96c53b23e58f 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -239,6 +239,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
239 memset(&ndr_desc, 0, sizeof(ndr_desc)); 239 memset(&ndr_desc, 0, sizeof(ndr_desc));
240 ndr_desc.attr_groups = region_attr_groups; 240 ndr_desc.attr_groups = region_attr_groups;
241 ndr_desc.numa_node = dev_to_node(&p->pdev->dev); 241 ndr_desc.numa_node = dev_to_node(&p->pdev->dev);
242 ndr_desc.target_node = ndr_desc.numa_node;
242 ndr_desc.res = &p->res; 243 ndr_desc.res = &p->res;
243 ndr_desc.of_node = p->dn; 244 ndr_desc.of_node = p->dn;
244 ndr_desc.provider_data = p; 245 ndr_desc.provider_data = p;
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index df8979008dd4..5a389a4f4f65 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2956,11 +2956,15 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
2956 ndr_desc->res = &res; 2956 ndr_desc->res = &res;
2957 ndr_desc->provider_data = nfit_spa; 2957 ndr_desc->provider_data = nfit_spa;
2958 ndr_desc->attr_groups = acpi_nfit_region_attribute_groups; 2958 ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
2959 if (spa->flags & ACPI_NFIT_PROXIMITY_VALID) 2959 if (spa->flags & ACPI_NFIT_PROXIMITY_VALID) {
2960 ndr_desc->numa_node = acpi_map_pxm_to_online_node( 2960 ndr_desc->numa_node = acpi_map_pxm_to_online_node(
2961 spa->proximity_domain); 2961 spa->proximity_domain);
2962 else 2962 ndr_desc->target_node = acpi_map_pxm_to_node(
2963 spa->proximity_domain);
2964 } else {
2963 ndr_desc->numa_node = NUMA_NO_NODE; 2965 ndr_desc->numa_node = NUMA_NO_NODE;
2966 ndr_desc->target_node = NUMA_NO_NODE;
2967 }
2964 2968
2965 /* 2969 /*
2966 * Persistence domain bits are hierarchical, if 2970 * Persistence domain bits are hierarchical, if
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 7bbbf8256a41..867f6e3f2b4f 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -84,6 +84,7 @@ int acpi_map_pxm_to_node(int pxm)
84 84
85 return node; 85 return node;
86} 86}
87EXPORT_SYMBOL(acpi_map_pxm_to_node);
87 88
88/** 89/**
89 * acpi_map_pxm_to_online_node - Map proximity ID to online node 90 * acpi_map_pxm_to_online_node - Map proximity ID to online node
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 048cbf7d5233..cb8347500ce2 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -88,6 +88,7 @@ unsigned long __weak memory_block_size_bytes(void)
88{ 88{
89 return MIN_MEMORY_BLOCK_SIZE; 89 return MIN_MEMORY_BLOCK_SIZE;
90} 90}
91EXPORT_SYMBOL_GPL(memory_block_size_bytes);
91 92
92static unsigned long get_memory_block_size(void) 93static unsigned long get_memory_block_size(void)
93{ 94{
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index e0700bf4893a..5ef624fe3934 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -23,12 +23,38 @@ config DEV_DAX
23config DEV_DAX_PMEM 23config DEV_DAX_PMEM
24 tristate "PMEM DAX: direct access to persistent memory" 24 tristate "PMEM DAX: direct access to persistent memory"
25 depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX 25 depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX
26 depends on m # until we can kill DEV_DAX_PMEM_COMPAT
26 default DEV_DAX 27 default DEV_DAX
27 help 28 help
28 Support raw access to persistent memory. Note that this 29 Support raw access to persistent memory. Note that this
29 driver consumes memory ranges allocated and exported by the 30 driver consumes memory ranges allocated and exported by the
30 libnvdimm sub-system. 31 libnvdimm sub-system.
31 32
32 Say Y if unsure 33 Say M if unsure
34
35config DEV_DAX_KMEM
36 tristate "KMEM DAX: volatile-use of persistent memory"
37 default DEV_DAX
38 depends on DEV_DAX
39 depends on MEMORY_HOTPLUG # for add_memory() and friends
40 help
41 Support access to persistent memory as if it were RAM. This
42 allows easier use of persistent memory by unmodified
43 applications.
44
45 To use this feature, a DAX device must be unbound from the
46 device_dax driver (PMEM DAX) and bound to this kmem driver
47 on each boot.
48
49 Say N if unsure.
50
51config DEV_DAX_PMEM_COMPAT
52 tristate "PMEM DAX: support the deprecated /sys/class/dax interface"
53 depends on DEV_DAX_PMEM
54 default DEV_DAX_PMEM
55 help
56 Older versions of the libdaxctl library expect to find all
57 device-dax instances under /sys/class/dax. If libdaxctl in
58 your distribution is older than v58 say M, otherwise say N.
33 59
34endif 60endif
diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile
index 574286fac87c..81f7d54dadfb 100644
--- a/drivers/dax/Makefile
+++ b/drivers/dax/Makefile
@@ -1,8 +1,10 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2obj-$(CONFIG_DAX) += dax.o 2obj-$(CONFIG_DAX) += dax.o
3obj-$(CONFIG_DEV_DAX) += device_dax.o 3obj-$(CONFIG_DEV_DAX) += device_dax.o
4obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o 4obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
5 5
6dax-y := super.o 6dax-y := super.o
7dax_pmem-y := pmem.o 7dax-y += bus.o
8device_dax-y := device.o 8device_dax-y := device.o
9
10obj-y += pmem/
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
new file mode 100644
index 000000000000..2109cfe80219
--- /dev/null
+++ b/drivers/dax/bus.c
@@ -0,0 +1,503 @@
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */
3#include <linux/memremap.h>
4#include <linux/device.h>
5#include <linux/mutex.h>
6#include <linux/list.h>
7#include <linux/slab.h>
8#include <linux/dax.h>
9#include "dax-private.h"
10#include "bus.h"
11
12static struct class *dax_class;
13
14static DEFINE_MUTEX(dax_bus_lock);
15
16#define DAX_NAME_LEN 30
17struct dax_id {
18 struct list_head list;
19 char dev_name[DAX_NAME_LEN];
20};
21
22static int dax_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
23{
24 /*
25 * We only ever expect to handle device-dax instances, i.e. the
26 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
27 */
28 return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0);
29}
30
31static struct dax_device_driver *to_dax_drv(struct device_driver *drv)
32{
33 return container_of(drv, struct dax_device_driver, drv);
34}
35
36static struct dax_id *__dax_match_id(struct dax_device_driver *dax_drv,
37 const char *dev_name)
38{
39 struct dax_id *dax_id;
40
41 lockdep_assert_held(&dax_bus_lock);
42
43 list_for_each_entry(dax_id, &dax_drv->ids, list)
44 if (sysfs_streq(dax_id->dev_name, dev_name))
45 return dax_id;
46 return NULL;
47}
48
49static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev)
50{
51 int match;
52
53 mutex_lock(&dax_bus_lock);
54 match = !!__dax_match_id(dax_drv, dev_name(dev));
55 mutex_unlock(&dax_bus_lock);
56
57 return match;
58}
59
60enum id_action {
61 ID_REMOVE,
62 ID_ADD,
63};
64
65static ssize_t do_id_store(struct device_driver *drv, const char *buf,
66 size_t count, enum id_action action)
67{
68 struct dax_device_driver *dax_drv = to_dax_drv(drv);
69 unsigned int region_id, id;
70 char devname[DAX_NAME_LEN];
71 struct dax_id *dax_id;
72 ssize_t rc = count;
73 int fields;
74
75 fields = sscanf(buf, "dax%d.%d", &region_id, &id);
76 if (fields != 2)
77 return -EINVAL;
78 sprintf(devname, "dax%d.%d", region_id, id);
79 if (!sysfs_streq(buf, devname))
80 return -EINVAL;
81
82 mutex_lock(&dax_bus_lock);
83 dax_id = __dax_match_id(dax_drv, buf);
84 if (!dax_id) {
85 if (action == ID_ADD) {
86 dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL);
87 if (dax_id) {
88 strncpy(dax_id->dev_name, buf, DAX_NAME_LEN);
89 list_add(&dax_id->list, &dax_drv->ids);
90 } else
91 rc = -ENOMEM;
92 } else
93 /* nothing to remove */;
94 } else if (action == ID_REMOVE) {
95 list_del(&dax_id->list);
96 kfree(dax_id);
97 } else
98 /* dax_id already added */;
99 mutex_unlock(&dax_bus_lock);
100
101 if (rc < 0)
102 return rc;
103 if (action == ID_ADD)
104 rc = driver_attach(drv);
105 if (rc)
106 return rc;
107 return count;
108}
109
110static ssize_t new_id_store(struct device_driver *drv, const char *buf,
111 size_t count)
112{
113 return do_id_store(drv, buf, count, ID_ADD);
114}
115static DRIVER_ATTR_WO(new_id);
116
117static ssize_t remove_id_store(struct device_driver *drv, const char *buf,
118 size_t count)
119{
120 return do_id_store(drv, buf, count, ID_REMOVE);
121}
122static DRIVER_ATTR_WO(remove_id);
123
124static struct attribute *dax_drv_attrs[] = {
125 &driver_attr_new_id.attr,
126 &driver_attr_remove_id.attr,
127 NULL,
128};
129ATTRIBUTE_GROUPS(dax_drv);
130
131static int dax_bus_match(struct device *dev, struct device_driver *drv);
132
133static struct bus_type dax_bus_type = {
134 .name = "dax",
135 .uevent = dax_bus_uevent,
136 .match = dax_bus_match,
137 .drv_groups = dax_drv_groups,
138};
139
140static int dax_bus_match(struct device *dev, struct device_driver *drv)
141{
142 struct dax_device_driver *dax_drv = to_dax_drv(drv);
143
144 /*
145 * All but the 'device-dax' driver, which has 'match_always'
146 * set, requires an exact id match.
147 */
148 if (dax_drv->match_always)
149 return 1;
150
151 return dax_match_id(dax_drv, dev);
152}
153
154/*
155 * Rely on the fact that drvdata is set before the attributes are
156 * registered, and that the attributes are unregistered before drvdata
157 * is cleared to assume that drvdata is always valid.
158 */
159static ssize_t id_show(struct device *dev,
160 struct device_attribute *attr, char *buf)
161{
162 struct dax_region *dax_region = dev_get_drvdata(dev);
163
164 return sprintf(buf, "%d\n", dax_region->id);
165}
166static DEVICE_ATTR_RO(id);
167
168static ssize_t region_size_show(struct device *dev,
169 struct device_attribute *attr, char *buf)
170{
171 struct dax_region *dax_region = dev_get_drvdata(dev);
172
173 return sprintf(buf, "%llu\n", (unsigned long long)
174 resource_size(&dax_region->res));
175}
176static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
177 region_size_show, NULL);
178
179static ssize_t align_show(struct device *dev,
180 struct device_attribute *attr, char *buf)
181{
182 struct dax_region *dax_region = dev_get_drvdata(dev);
183
184 return sprintf(buf, "%u\n", dax_region->align);
185}
186static DEVICE_ATTR_RO(align);
187
188static struct attribute *dax_region_attributes[] = {
189 &dev_attr_region_size.attr,
190 &dev_attr_align.attr,
191 &dev_attr_id.attr,
192 NULL,
193};
194
195static const struct attribute_group dax_region_attribute_group = {
196 .name = "dax_region",
197 .attrs = dax_region_attributes,
198};
199
200static const struct attribute_group *dax_region_attribute_groups[] = {
201 &dax_region_attribute_group,
202 NULL,
203};
204
205static void dax_region_free(struct kref *kref)
206{
207 struct dax_region *dax_region;
208
209 dax_region = container_of(kref, struct dax_region, kref);
210 kfree(dax_region);
211}
212
213void dax_region_put(struct dax_region *dax_region)
214{
215 kref_put(&dax_region->kref, dax_region_free);
216}
217EXPORT_SYMBOL_GPL(dax_region_put);
218
219static void dax_region_unregister(void *region)
220{
221 struct dax_region *dax_region = region;
222
223 sysfs_remove_groups(&dax_region->dev->kobj,
224 dax_region_attribute_groups);
225 dax_region_put(dax_region);
226}
227
228struct dax_region *alloc_dax_region(struct device *parent, int region_id,
229 struct resource *res, int target_node, unsigned int align,
230 unsigned long pfn_flags)
231{
232 struct dax_region *dax_region;
233
234 /*
235 * The DAX core assumes that it can store its private data in
236 * parent->driver_data. This WARN is a reminder / safeguard for
237 * developers of device-dax drivers.
238 */
239 if (dev_get_drvdata(parent)) {
240 dev_WARN(parent, "dax core failed to setup private data\n");
241 return NULL;
242 }
243
244 if (!IS_ALIGNED(res->start, align)
245 || !IS_ALIGNED(resource_size(res), align))
246 return NULL;
247
248 dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
249 if (!dax_region)
250 return NULL;
251
252 dev_set_drvdata(parent, dax_region);
253 memcpy(&dax_region->res, res, sizeof(*res));
254 dax_region->pfn_flags = pfn_flags;
255 kref_init(&dax_region->kref);
256 dax_region->id = region_id;
257 dax_region->align = align;
258 dax_region->dev = parent;
259 dax_region->target_node = target_node;
260 if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
261 kfree(dax_region);
262 return NULL;
263 }
264
265 kref_get(&dax_region->kref);
266 if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
267 return NULL;
268 return dax_region;
269}
270EXPORT_SYMBOL_GPL(alloc_dax_region);
271
272static ssize_t size_show(struct device *dev,
273 struct device_attribute *attr, char *buf)
274{
275 struct dev_dax *dev_dax = to_dev_dax(dev);
276 unsigned long long size = resource_size(&dev_dax->region->res);
277
278 return sprintf(buf, "%llu\n", size);
279}
280static DEVICE_ATTR_RO(size);
281
282static int dev_dax_target_node(struct dev_dax *dev_dax)
283{
284 struct dax_region *dax_region = dev_dax->region;
285
286 return dax_region->target_node;
287}
288
289static ssize_t target_node_show(struct device *dev,
290 struct device_attribute *attr, char *buf)
291{
292 struct dev_dax *dev_dax = to_dev_dax(dev);
293
294 return sprintf(buf, "%d\n", dev_dax_target_node(dev_dax));
295}
296static DEVICE_ATTR_RO(target_node);
297
298static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
299 char *buf)
300{
301 /*
302 * We only ever expect to handle device-dax instances, i.e. the
303 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
304 */
305 return sprintf(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
306}
307static DEVICE_ATTR_RO(modalias);
308
309static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
310{
311 struct device *dev = container_of(kobj, struct device, kobj);
312 struct dev_dax *dev_dax = to_dev_dax(dev);
313
314 if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0)
315 return 0;
316 return a->mode;
317}
318
319static struct attribute *dev_dax_attributes[] = {
320 &dev_attr_modalias.attr,
321 &dev_attr_size.attr,
322 &dev_attr_target_node.attr,
323 NULL,
324};
325
326static const struct attribute_group dev_dax_attribute_group = {
327 .attrs = dev_dax_attributes,
328 .is_visible = dev_dax_visible,
329};
330
331static const struct attribute_group *dax_attribute_groups[] = {
332 &dev_dax_attribute_group,
333 NULL,
334};
335
336void kill_dev_dax(struct dev_dax *dev_dax)
337{
338 struct dax_device *dax_dev = dev_dax->dax_dev;
339 struct inode *inode = dax_inode(dax_dev);
340
341 kill_dax(dax_dev);
342 unmap_mapping_range(inode->i_mapping, 0, 0, 1);
343}
344EXPORT_SYMBOL_GPL(kill_dev_dax);
345
346static void dev_dax_release(struct device *dev)
347{
348 struct dev_dax *dev_dax = to_dev_dax(dev);
349 struct dax_region *dax_region = dev_dax->region;
350 struct dax_device *dax_dev = dev_dax->dax_dev;
351
352 dax_region_put(dax_region);
353 put_dax(dax_dev);
354 kfree(dev_dax);
355}
356
357static void unregister_dev_dax(void *dev)
358{
359 struct dev_dax *dev_dax = to_dev_dax(dev);
360
361 dev_dbg(dev, "%s\n", __func__);
362
363 kill_dev_dax(dev_dax);
364 device_del(dev);
365 put_device(dev);
366}
367
368struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
369 struct dev_pagemap *pgmap, enum dev_dax_subsys subsys)
370{
371 struct device *parent = dax_region->dev;
372 struct dax_device *dax_dev;
373 struct dev_dax *dev_dax;
374 struct inode *inode;
375 struct device *dev;
376 int rc = -ENOMEM;
377
378 if (id < 0)
379 return ERR_PTR(-EINVAL);
380
381 dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL);
382 if (!dev_dax)
383 return ERR_PTR(-ENOMEM);
384
385 memcpy(&dev_dax->pgmap, pgmap, sizeof(*pgmap));
386
387 /*
388 * No 'host' or dax_operations since there is no access to this
389 * device outside of mmap of the resulting character device.
390 */
391 dax_dev = alloc_dax(dev_dax, NULL, NULL);
392 if (!dax_dev)
393 goto err;
394
395 /* a device_dax instance is dead while the driver is not attached */
396 kill_dax(dax_dev);
397
398 /* from here on we're committed to teardown via dax_dev_release() */
399 dev = &dev_dax->dev;
400 device_initialize(dev);
401
402 dev_dax->dax_dev = dax_dev;
403 dev_dax->region = dax_region;
404 dev_dax->target_node = dax_region->target_node;
405 kref_get(&dax_region->kref);
406
407 inode = dax_inode(dax_dev);
408 dev->devt = inode->i_rdev;
409 if (subsys == DEV_DAX_BUS)
410 dev->bus = &dax_bus_type;
411 else
412 dev->class = dax_class;
413 dev->parent = parent;
414 dev->groups = dax_attribute_groups;
415 dev->release = dev_dax_release;
416 dev_set_name(dev, "dax%d.%d", dax_region->id, id);
417
418 rc = device_add(dev);
419 if (rc) {
420 kill_dev_dax(dev_dax);
421 put_device(dev);
422 return ERR_PTR(rc);
423 }
424
425 rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev);
426 if (rc)
427 return ERR_PTR(rc);
428
429 return dev_dax;
430
431 err:
432 kfree(dev_dax);
433
434 return ERR_PTR(rc);
435}
436EXPORT_SYMBOL_GPL(__devm_create_dev_dax);
437
438static int match_always_count;
439
440int __dax_driver_register(struct dax_device_driver *dax_drv,
441 struct module *module, const char *mod_name)
442{
443 struct device_driver *drv = &dax_drv->drv;
444 int rc = 0;
445
446 INIT_LIST_HEAD(&dax_drv->ids);
447 drv->owner = module;
448 drv->name = mod_name;
449 drv->mod_name = mod_name;
450 drv->bus = &dax_bus_type;
451
452 /* there can only be one default driver */
453 mutex_lock(&dax_bus_lock);
454 match_always_count += dax_drv->match_always;
455 if (match_always_count > 1) {
456 match_always_count--;
457 WARN_ON(1);
458 rc = -EINVAL;
459 }
460 mutex_unlock(&dax_bus_lock);
461 if (rc)
462 return rc;
463 return driver_register(drv);
464}
465EXPORT_SYMBOL_GPL(__dax_driver_register);
466
467void dax_driver_unregister(struct dax_device_driver *dax_drv)
468{
469 struct device_driver *drv = &dax_drv->drv;
470 struct dax_id *dax_id, *_id;
471
472 mutex_lock(&dax_bus_lock);
473 match_always_count -= dax_drv->match_always;
474 list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) {
475 list_del(&dax_id->list);
476 kfree(dax_id);
477 }
478 mutex_unlock(&dax_bus_lock);
479 driver_unregister(drv);
480}
481EXPORT_SYMBOL_GPL(dax_driver_unregister);
482
483int __init dax_bus_init(void)
484{
485 int rc;
486
487 if (IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)) {
488 dax_class = class_create(THIS_MODULE, "dax");
489 if (IS_ERR(dax_class))
490 return PTR_ERR(dax_class);
491 }
492
493 rc = bus_register(&dax_bus_type);
494 if (rc)
495 class_destroy(dax_class);
496 return rc;
497}
498
499void __exit dax_bus_exit(void)
500{
501 bus_unregister(&dax_bus_type);
502 class_destroy(dax_class);
503}
diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
new file mode 100644
index 000000000000..8619e3299943
--- /dev/null
+++ b/drivers/dax/bus.h
@@ -0,0 +1,61 @@
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */
3#ifndef __DAX_BUS_H__
4#define __DAX_BUS_H__
5#include <linux/device.h>
6
7struct dev_dax;
8struct resource;
9struct dax_device;
10struct dax_region;
11void dax_region_put(struct dax_region *dax_region);
12struct dax_region *alloc_dax_region(struct device *parent, int region_id,
13 struct resource *res, int target_node, unsigned int align,
14 unsigned long flags);
15
16enum dev_dax_subsys {
17 DEV_DAX_BUS,
18 DEV_DAX_CLASS,
19};
20
21struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
22 struct dev_pagemap *pgmap, enum dev_dax_subsys subsys);
23
24static inline struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
25 int id, struct dev_pagemap *pgmap)
26{
27 return __devm_create_dev_dax(dax_region, id, pgmap, DEV_DAX_BUS);
28}
29
30/* to be deleted when DEV_DAX_CLASS is removed */
31struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys);
32
33struct dax_device_driver {
34 struct device_driver drv;
35 struct list_head ids;
36 int match_always;
37};
38
39int __dax_driver_register(struct dax_device_driver *dax_drv,
40 struct module *module, const char *mod_name);
41#define dax_driver_register(driver) \
42 __dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
43void dax_driver_unregister(struct dax_device_driver *dax_drv);
44void kill_dev_dax(struct dev_dax *dev_dax);
45
46#if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
47int dev_dax_probe(struct device *dev);
48#endif
49
50/*
51 * While run_dax() is potentially a generic operation that could be
52 * defined in include/linux/dax.h we don't want to grow any users
53 * outside of drivers/dax/
54 */
55void run_dax(struct dax_device *dax_dev);
56
57#define MODULE_ALIAS_DAX_DEVICE(type) \
58 MODULE_ALIAS("dax:t" __stringify(type) "*")
59#define DAX_DEVICE_MODALIAS_FMT "dax:t%d"
60
61#endif /* __DAX_BUS_H__ */
diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
index b6fc4f04636d..a45612148ca0 100644
--- a/drivers/dax/dax-private.h
+++ b/drivers/dax/dax-private.h
@@ -16,10 +16,17 @@
16#include <linux/device.h> 16#include <linux/device.h>
17#include <linux/cdev.h> 17#include <linux/cdev.h>
18 18
19/* private routines between core files */
20struct dax_device;
21struct dax_device *inode_dax(struct inode *inode);
22struct inode *dax_inode(struct dax_device *dax_dev);
23int dax_bus_init(void);
24void dax_bus_exit(void);
25
19/** 26/**
20 * struct dax_region - mapping infrastructure for dax devices 27 * struct dax_region - mapping infrastructure for dax devices
21 * @id: kernel-wide unique region for a memory range 28 * @id: kernel-wide unique region for a memory range
22 * @base: linear address corresponding to @res 29 * @target_node: effective numa node if this memory range is onlined
23 * @kref: to pin while other agents have a need to do lookups 30 * @kref: to pin while other agents have a need to do lookups
24 * @dev: parent device backing this region 31 * @dev: parent device backing this region
25 * @align: allocation and mapping alignment for child dax devices 32 * @align: allocation and mapping alignment for child dax devices
@@ -28,8 +35,7 @@
28 */ 35 */
29struct dax_region { 36struct dax_region {
30 int id; 37 int id;
31 struct ida ida; 38 int target_node;
32 void *base;
33 struct kref kref; 39 struct kref kref;
34 struct device *dev; 40 struct device *dev;
35 unsigned int align; 41 unsigned int align;
@@ -38,20 +44,28 @@ struct dax_region {
38}; 44};
39 45
40/** 46/**
41 * struct dev_dax - instance data for a subdivision of a dax region 47 * struct dev_dax - instance data for a subdivision of a dax region, and
48 * data while the device is activated in the driver.
42 * @region - parent region 49 * @region - parent region
43 * @dax_dev - core dax functionality 50 * @dax_dev - core dax functionality
51 * @target_node: effective numa node if dev_dax memory range is onlined
44 * @dev - device core 52 * @dev - device core
45 * @id - child id in the region 53 * @pgmap - pgmap for memmap setup / lifetime (driver owned)
46 * @num_resources - number of physical address extents in this device 54 * @ref: pgmap reference count (driver owned)
47 * @res - array of physical address ranges 55 * @cmp: @ref final put completion (driver owned)
48 */ 56 */
49struct dev_dax { 57struct dev_dax {
50 struct dax_region *region; 58 struct dax_region *region;
51 struct dax_device *dax_dev; 59 struct dax_device *dax_dev;
60 int target_node;
52 struct device dev; 61 struct device dev;
53 int id; 62 struct dev_pagemap pgmap;
54 int num_resources; 63 struct percpu_ref ref;
55 struct resource res[0]; 64 struct completion cmp;
56}; 65};
66
67static inline struct dev_dax *to_dev_dax(struct device *dev)
68{
69 return container_of(dev, struct dev_dax, dev);
70}
57#endif 71#endif
diff --git a/drivers/dax/dax.h b/drivers/dax/dax.h
deleted file mode 100644
index f9e5feea742c..000000000000
--- a/drivers/dax/dax.h
+++ /dev/null
@@ -1,18 +0,0 @@
1/*
2 * Copyright(c) 2016 - 2017 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#ifndef __DAX_H__
14#define __DAX_H__
15struct dax_device;
16struct dax_device *inode_dax(struct inode *inode);
17struct inode *dax_inode(struct dax_device *dax_dev);
18#endif /* __DAX_H__ */
diff --git a/drivers/dax/device-dax.h b/drivers/dax/device-dax.h
deleted file mode 100644
index 688b051750bd..000000000000
--- a/drivers/dax/device-dax.h
+++ /dev/null
@@ -1,25 +0,0 @@
1/*
2 * Copyright(c) 2016 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#ifndef __DEVICE_DAX_H__
14#define __DEVICE_DAX_H__
15struct device;
16struct dev_dax;
17struct resource;
18struct dax_region;
19void dax_region_put(struct dax_region *dax_region);
20struct dax_region *alloc_dax_region(struct device *parent,
21 int region_id, struct resource *res, unsigned int align,
22 void *addr, unsigned long flags);
23struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
24 int id, struct resource *res, int count);
25#endif /* __DEVICE_DAX_H__ */
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 948806e57cee..e428468ab661 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -1,15 +1,6 @@
1/* 1// SPDX-License-Identifier: GPL-2.0
2 * Copyright(c) 2016 - 2017 Intel Corporation. All rights reserved. 2/* Copyright(c) 2016-2018 Intel Corporation. All rights reserved. */
3 * 3#include <linux/memremap.h>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/pagemap.h> 4#include <linux/pagemap.h>
14#include <linux/module.h> 5#include <linux/module.h>
15#include <linux/device.h> 6#include <linux/device.h>
@@ -21,161 +12,39 @@
21#include <linux/mm.h> 12#include <linux/mm.h>
22#include <linux/mman.h> 13#include <linux/mman.h>
23#include "dax-private.h" 14#include "dax-private.h"
24#include "dax.h" 15#include "bus.h"
25 16
26static struct class *dax_class; 17static struct dev_dax *ref_to_dev_dax(struct percpu_ref *ref)
27
28/*
29 * Rely on the fact that drvdata is set before the attributes are
30 * registered, and that the attributes are unregistered before drvdata
31 * is cleared to assume that drvdata is always valid.
32 */
33static ssize_t id_show(struct device *dev,
34 struct device_attribute *attr, char *buf)
35{
36 struct dax_region *dax_region = dev_get_drvdata(dev);
37
38 return sprintf(buf, "%d\n", dax_region->id);
39}
40static DEVICE_ATTR_RO(id);
41
42static ssize_t region_size_show(struct device *dev,
43 struct device_attribute *attr, char *buf)
44{
45 struct dax_region *dax_region = dev_get_drvdata(dev);
46
47 return sprintf(buf, "%llu\n", (unsigned long long)
48 resource_size(&dax_region->res));
49}
50static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
51 region_size_show, NULL);
52
53static ssize_t align_show(struct device *dev,
54 struct device_attribute *attr, char *buf)
55{
56 struct dax_region *dax_region = dev_get_drvdata(dev);
57
58 return sprintf(buf, "%u\n", dax_region->align);
59}
60static DEVICE_ATTR_RO(align);
61
62static struct attribute *dax_region_attributes[] = {
63 &dev_attr_region_size.attr,
64 &dev_attr_align.attr,
65 &dev_attr_id.attr,
66 NULL,
67};
68
69static const struct attribute_group dax_region_attribute_group = {
70 .name = "dax_region",
71 .attrs = dax_region_attributes,
72};
73
74static const struct attribute_group *dax_region_attribute_groups[] = {
75 &dax_region_attribute_group,
76 NULL,
77};
78
79static void dax_region_free(struct kref *kref)
80{
81 struct dax_region *dax_region;
82
83 dax_region = container_of(kref, struct dax_region, kref);
84 kfree(dax_region);
85}
86
87void dax_region_put(struct dax_region *dax_region)
88{ 18{
89 kref_put(&dax_region->kref, dax_region_free); 19 return container_of(ref, struct dev_dax, ref);
90} 20}
91EXPORT_SYMBOL_GPL(dax_region_put);
92 21
93static void dax_region_unregister(void *region) 22static void dev_dax_percpu_release(struct percpu_ref *ref)
94{ 23{
95 struct dax_region *dax_region = region; 24 struct dev_dax *dev_dax = ref_to_dev_dax(ref);
96 25
97 sysfs_remove_groups(&dax_region->dev->kobj, 26 dev_dbg(&dev_dax->dev, "%s\n", __func__);
98 dax_region_attribute_groups); 27 complete(&dev_dax->cmp);
99 dax_region_put(dax_region);
100} 28}
101 29
102struct dax_region *alloc_dax_region(struct device *parent, int region_id, 30static void dev_dax_percpu_exit(void *data)
103 struct resource *res, unsigned int align, void *addr,
104 unsigned long pfn_flags)
105{ 31{
106 struct dax_region *dax_region; 32 struct percpu_ref *ref = data;
107 33 struct dev_dax *dev_dax = ref_to_dev_dax(ref);
108 /*
109 * The DAX core assumes that it can store its private data in
110 * parent->driver_data. This WARN is a reminder / safeguard for
111 * developers of device-dax drivers.
112 */
113 if (dev_get_drvdata(parent)) {
114 dev_WARN(parent, "dax core failed to setup private data\n");
115 return NULL;
116 }
117
118 if (!IS_ALIGNED(res->start, align)
119 || !IS_ALIGNED(resource_size(res), align))
120 return NULL;
121
122 dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
123 if (!dax_region)
124 return NULL;
125
126 dev_set_drvdata(parent, dax_region);
127 memcpy(&dax_region->res, res, sizeof(*res));
128 dax_region->pfn_flags = pfn_flags;
129 kref_init(&dax_region->kref);
130 dax_region->id = region_id;
131 ida_init(&dax_region->ida);
132 dax_region->align = align;
133 dax_region->dev = parent;
134 dax_region->base = addr;
135 if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
136 kfree(dax_region);
137 return NULL;
138 }
139 34
140 kref_get(&dax_region->kref); 35 dev_dbg(&dev_dax->dev, "%s\n", __func__);
141 if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region)) 36 wait_for_completion(&dev_dax->cmp);
142 return NULL; 37 percpu_ref_exit(ref);
143 return dax_region;
144} 38}
145EXPORT_SYMBOL_GPL(alloc_dax_region);
146 39
147static struct dev_dax *to_dev_dax(struct device *dev) 40static void dev_dax_percpu_kill(struct percpu_ref *data)
148{ 41{
149 return container_of(dev, struct dev_dax, dev); 42 struct percpu_ref *ref = data;
150} 43 struct dev_dax *dev_dax = ref_to_dev_dax(ref);
151
152static ssize_t size_show(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct dev_dax *dev_dax = to_dev_dax(dev);
156 unsigned long long size = 0;
157 int i;
158 44
159 for (i = 0; i < dev_dax->num_resources; i++) 45 dev_dbg(&dev_dax->dev, "%s\n", __func__);
160 size += resource_size(&dev_dax->res[i]); 46 percpu_ref_kill(ref);
161
162 return sprintf(buf, "%llu\n", size);
163} 47}
164static DEVICE_ATTR_RO(size);
165
166static struct attribute *dev_dax_attributes[] = {
167 &dev_attr_size.attr,
168 NULL,
169};
170
171static const struct attribute_group dev_dax_attribute_group = {
172 .attrs = dev_dax_attributes,
173};
174
175static const struct attribute_group *dax_attribute_groups[] = {
176 &dev_dax_attribute_group,
177 NULL,
178};
179 48
180static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, 49static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
181 const char *func) 50 const char *func)
@@ -226,21 +95,11 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
226__weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, 95__weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
227 unsigned long size) 96 unsigned long size)
228{ 97{
229 struct resource *res; 98 struct resource *res = &dev_dax->region->res;
230 /* gcc-4.6.3-nolibc for i386 complains that this is uninitialized */ 99 phys_addr_t phys;
231 phys_addr_t uninitialized_var(phys);
232 int i;
233
234 for (i = 0; i < dev_dax->num_resources; i++) {
235 res = &dev_dax->res[i];
236 phys = pgoff * PAGE_SIZE + res->start;
237 if (phys >= res->start && phys <= res->end)
238 break;
239 pgoff -= PHYS_PFN(resource_size(res));
240 }
241 100
242 if (i < dev_dax->num_resources) { 101 phys = pgoff * PAGE_SIZE + res->start;
243 res = &dev_dax->res[i]; 102 if (phys >= res->start && phys <= res->end) {
244 if (phys + size - 1 <= res->end) 103 if (phys + size - 1 <= res->end)
245 return phys; 104 return phys;
246 } 105 }
@@ -576,152 +435,100 @@ static const struct file_operations dax_fops = {
576 .mmap_supported_flags = MAP_SYNC, 435 .mmap_supported_flags = MAP_SYNC,
577}; 436};
578 437
579static void dev_dax_release(struct device *dev) 438static void dev_dax_cdev_del(void *cdev)
580{ 439{
581 struct dev_dax *dev_dax = to_dev_dax(dev); 440 cdev_del(cdev);
582 struct dax_region *dax_region = dev_dax->region;
583 struct dax_device *dax_dev = dev_dax->dax_dev;
584
585 if (dev_dax->id >= 0)
586 ida_simple_remove(&dax_region->ida, dev_dax->id);
587 dax_region_put(dax_region);
588 put_dax(dax_dev);
589 kfree(dev_dax);
590} 441}
591 442
592static void kill_dev_dax(struct dev_dax *dev_dax) 443static void dev_dax_kill(void *dev_dax)
593{ 444{
594 struct dax_device *dax_dev = dev_dax->dax_dev; 445 kill_dev_dax(dev_dax);
595 struct inode *inode = dax_inode(dax_dev);
596
597 kill_dax(dax_dev);
598 unmap_mapping_range(inode->i_mapping, 0, 0, 1);
599} 446}
600 447
601static void unregister_dev_dax(void *dev) 448int dev_dax_probe(struct device *dev)
602{ 449{
603 struct dev_dax *dev_dax = to_dev_dax(dev); 450 struct dev_dax *dev_dax = to_dev_dax(dev);
604 struct dax_device *dax_dev = dev_dax->dax_dev; 451 struct dax_device *dax_dev = dev_dax->dax_dev;
605 struct inode *inode = dax_inode(dax_dev); 452 struct resource *res = &dev_dax->region->res;
606 struct cdev *cdev = inode->i_cdev;
607
608 dev_dbg(dev, "trace\n");
609
610 kill_dev_dax(dev_dax);
611 cdev_device_del(cdev, dev);
612 put_device(dev);
613}
614
615struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
616 int id, struct resource *res, int count)
617{
618 struct device *parent = dax_region->dev;
619 struct dax_device *dax_dev;
620 struct dev_dax *dev_dax;
621 struct inode *inode; 453 struct inode *inode;
622 struct device *dev;
623 struct cdev *cdev; 454 struct cdev *cdev;
624 int rc, i; 455 void *addr;
625 456 int rc;
626 if (!count) 457
627 return ERR_PTR(-EINVAL); 458 /* 1:1 map region resource range to device-dax instance range */
628 459 if (!devm_request_mem_region(dev, res->start, resource_size(res),
629 dev_dax = kzalloc(struct_size(dev_dax, res, count), GFP_KERNEL); 460 dev_name(dev))) {
630 if (!dev_dax) 461 dev_warn(dev, "could not reserve region %pR\n", res);
631 return ERR_PTR(-ENOMEM); 462 return -EBUSY;
632
633 for (i = 0; i < count; i++) {
634 if (!IS_ALIGNED(res[i].start, dax_region->align)
635 || !IS_ALIGNED(resource_size(&res[i]),
636 dax_region->align)) {
637 rc = -EINVAL;
638 break;
639 }
640 dev_dax->res[i].start = res[i].start;
641 dev_dax->res[i].end = res[i].end;
642 } 463 }
643 464
644 if (i < count) 465 init_completion(&dev_dax->cmp);
645 goto err_id; 466 rc = percpu_ref_init(&dev_dax->ref, dev_dax_percpu_release, 0,
467 GFP_KERNEL);
468 if (rc)
469 return rc;
646 470
647 if (id < 0) { 471 rc = devm_add_action_or_reset(dev, dev_dax_percpu_exit, &dev_dax->ref);
648 id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL); 472 if (rc)
649 dev_dax->id = id; 473 return rc;
650 if (id < 0) {
651 rc = id;
652 goto err_id;
653 }
654 } else {
655 /* region provider owns @id lifetime */
656 dev_dax->id = -1;
657 }
658 474
659 /* 475 dev_dax->pgmap.ref = &dev_dax->ref;
660 * No 'host' or dax_operations since there is no access to this 476 dev_dax->pgmap.kill = dev_dax_percpu_kill;
661 * device outside of mmap of the resulting character device. 477 addr = devm_memremap_pages(dev, &dev_dax->pgmap);
662 */ 478 if (IS_ERR(addr)) {
663 dax_dev = alloc_dax(dev_dax, NULL, NULL); 479 devm_remove_action(dev, dev_dax_percpu_exit, &dev_dax->ref);
664 if (!dax_dev) { 480 percpu_ref_exit(&dev_dax->ref);
665 rc = -ENOMEM; 481 return PTR_ERR(addr);
666 goto err_dax;
667 } 482 }
668 483
669 /* from here on we're committed to teardown via dax_dev_release() */
670 dev = &dev_dax->dev;
671 device_initialize(dev);
672
673 inode = dax_inode(dax_dev); 484 inode = dax_inode(dax_dev);
674 cdev = inode->i_cdev; 485 cdev = inode->i_cdev;
675 cdev_init(cdev, &dax_fops); 486 cdev_init(cdev, &dax_fops);
676 cdev->owner = parent->driver->owner; 487 if (dev->class) {
677 488 /* for the CONFIG_DEV_DAX_PMEM_COMPAT case */
678 dev_dax->num_resources = count; 489 cdev->owner = dev->parent->driver->owner;
679 dev_dax->dax_dev = dax_dev; 490 } else
680 dev_dax->region = dax_region; 491 cdev->owner = dev->driver->owner;
681 kref_get(&dax_region->kref); 492 cdev_set_parent(cdev, &dev->kobj);
682 493 rc = cdev_add(cdev, dev->devt, 1);
683 dev->devt = inode->i_rdev;
684 dev->class = dax_class;
685 dev->parent = parent;
686 dev->groups = dax_attribute_groups;
687 dev->release = dev_dax_release;
688 dev_set_name(dev, "dax%d.%d", dax_region->id, id);
689
690 rc = cdev_device_add(cdev, dev);
691 if (rc) {
692 kill_dev_dax(dev_dax);
693 put_device(dev);
694 return ERR_PTR(rc);
695 }
696
697 rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev);
698 if (rc) 494 if (rc)
699 return ERR_PTR(rc); 495 return rc;
700 496
701 return dev_dax; 497 rc = devm_add_action_or_reset(dev, dev_dax_cdev_del, cdev);
498 if (rc)
499 return rc;
702 500
703 err_dax: 501 run_dax(dax_dev);
704 if (dev_dax->id >= 0) 502 return devm_add_action_or_reset(dev, dev_dax_kill, dev_dax);
705 ida_simple_remove(&dax_region->ida, dev_dax->id); 503}
706 err_id: 504EXPORT_SYMBOL_GPL(dev_dax_probe);
707 kfree(dev_dax);
708 505
709 return ERR_PTR(rc); 506static int dev_dax_remove(struct device *dev)
507{
508 /* all probe actions are unwound by devm */
509 return 0;
710} 510}
711EXPORT_SYMBOL_GPL(devm_create_dev_dax); 511
512static struct dax_device_driver device_dax_driver = {
513 .drv = {
514 .probe = dev_dax_probe,
515 .remove = dev_dax_remove,
516 },
517 .match_always = 1,
518};
712 519
713static int __init dax_init(void) 520static int __init dax_init(void)
714{ 521{
715 dax_class = class_create(THIS_MODULE, "dax"); 522 return dax_driver_register(&device_dax_driver);
716 return PTR_ERR_OR_ZERO(dax_class);
717} 523}
718 524
719static void __exit dax_exit(void) 525static void __exit dax_exit(void)
720{ 526{
721 class_destroy(dax_class); 527 dax_driver_unregister(&device_dax_driver);
722} 528}
723 529
724MODULE_AUTHOR("Intel Corporation"); 530MODULE_AUTHOR("Intel Corporation");
725MODULE_LICENSE("GPL v2"); 531MODULE_LICENSE("GPL v2");
726subsys_initcall(dax_init); 532module_init(dax_init);
727module_exit(dax_exit); 533module_exit(dax_exit);
534MODULE_ALIAS_DAX_DEVICE(0);
diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
new file mode 100644
index 000000000000..a02318c6d28a
--- /dev/null
+++ b/drivers/dax/kmem.c
@@ -0,0 +1,108 @@
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright(c) 2016-2019 Intel Corporation. All rights reserved. */
3#include <linux/memremap.h>
4#include <linux/pagemap.h>
5#include <linux/memory.h>
6#include <linux/module.h>
7#include <linux/device.h>
8#include <linux/pfn_t.h>
9#include <linux/slab.h>
10#include <linux/dax.h>
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/mman.h>
14#include "dax-private.h"
15#include "bus.h"
16
17int dev_dax_kmem_probe(struct device *dev)
18{
19 struct dev_dax *dev_dax = to_dev_dax(dev);
20 struct resource *res = &dev_dax->region->res;
21 resource_size_t kmem_start;
22 resource_size_t kmem_size;
23 resource_size_t kmem_end;
24 struct resource *new_res;
25 int numa_node;
26 int rc;
27
28 /*
29 * Ensure good NUMA information for the persistent memory.
30 * Without this check, there is a risk that slow memory
31 * could be mixed in a node with faster memory, causing
32 * unavoidable performance issues.
33 */
34 numa_node = dev_dax->target_node;
35 if (numa_node < 0) {
36 dev_warn(dev, "rejecting DAX region %pR with invalid node: %d\n",
37 res, numa_node);
38 return -EINVAL;
39 }
40
41 /* Hotplug starting at the beginning of the next block: */
42 kmem_start = ALIGN(res->start, memory_block_size_bytes());
43
44 kmem_size = resource_size(res);
45 /* Adjust the size down to compensate for moving up kmem_start: */
46 kmem_size -= kmem_start - res->start;
47 /* Align the size down to cover only complete blocks: */
48 kmem_size &= ~(memory_block_size_bytes() - 1);
49 kmem_end = kmem_start + kmem_size;
50
51 /* Region is permanently reserved. Hot-remove not yet implemented. */
52 new_res = request_mem_region(kmem_start, kmem_size, dev_name(dev));
53 if (!new_res) {
54 dev_warn(dev, "could not reserve region [%pa-%pa]\n",
55 &kmem_start, &kmem_end);
56 return -EBUSY;
57 }
58
59 /*
60 * Set flags appropriate for System RAM. Leave ..._BUSY clear
61 * so that add_memory() can add a child resource. Do not
62 * inherit flags from the parent since it may set new flags
63 * unknown to us that will break add_memory() below.
64 */
65 new_res->flags = IORESOURCE_SYSTEM_RAM;
66 new_res->name = dev_name(dev);
67
68 rc = add_memory(numa_node, new_res->start, resource_size(new_res));
69 if (rc)
70 return rc;
71
72 return 0;
73}
74
75static int dev_dax_kmem_remove(struct device *dev)
76{
77 /*
78 * Purposely leak the request_mem_region() for the device-dax
79 * range and return '0' to ->remove() attempts. The removal of
80 * the device from the driver always succeeds, but the region
81 * is permanently pinned as reserved by the unreleased
82 * request_mem_region().
83 */
84 return 0;
85}
86
87static struct dax_device_driver device_dax_kmem_driver = {
88 .drv = {
89 .probe = dev_dax_kmem_probe,
90 .remove = dev_dax_kmem_remove,
91 },
92};
93
94static int __init dax_kmem_init(void)
95{
96 return dax_driver_register(&device_dax_kmem_driver);
97}
98
99static void __exit dax_kmem_exit(void)
100{
101 dax_driver_unregister(&device_dax_kmem_driver);
102}
103
104MODULE_AUTHOR("Intel Corporation");
105MODULE_LICENSE("GPL v2");
106module_init(dax_kmem_init);
107module_exit(dax_kmem_exit);
108MODULE_ALIAS_DAX_DEVICE(0);
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
deleted file mode 100644
index 2c1f459c0c63..000000000000
--- a/drivers/dax/pmem.c
+++ /dev/null
@@ -1,153 +0,0 @@
1/*
2 * Copyright(c) 2016 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/percpu-refcount.h>
14#include <linux/memremap.h>
15#include <linux/module.h>
16#include <linux/pfn_t.h>
17#include "../nvdimm/pfn.h"
18#include "../nvdimm/nd.h"
19#include "device-dax.h"
20
21struct dax_pmem {
22 struct device *dev;
23 struct percpu_ref ref;
24 struct dev_pagemap pgmap;
25 struct completion cmp;
26};
27
28static struct dax_pmem *to_dax_pmem(struct percpu_ref *ref)
29{
30 return container_of(ref, struct dax_pmem, ref);
31}
32
33static void dax_pmem_percpu_release(struct percpu_ref *ref)
34{
35 struct dax_pmem *dax_pmem = to_dax_pmem(ref);
36
37 dev_dbg(dax_pmem->dev, "trace\n");
38 complete(&dax_pmem->cmp);
39}
40
41static void dax_pmem_percpu_exit(void *data)
42{
43 struct percpu_ref *ref = data;
44 struct dax_pmem *dax_pmem = to_dax_pmem(ref);
45
46 dev_dbg(dax_pmem->dev, "trace\n");
47 wait_for_completion(&dax_pmem->cmp);
48 percpu_ref_exit(ref);
49}
50
51static void dax_pmem_percpu_kill(struct percpu_ref *ref)
52{
53 struct dax_pmem *dax_pmem = to_dax_pmem(ref);
54
55 dev_dbg(dax_pmem->dev, "trace\n");
56 percpu_ref_kill(ref);
57}
58
59static int dax_pmem_probe(struct device *dev)
60{
61 void *addr;
62 struct resource res;
63 int rc, id, region_id;
64 struct nd_pfn_sb *pfn_sb;
65 struct dev_dax *dev_dax;
66 struct dax_pmem *dax_pmem;
67 struct nd_namespace_io *nsio;
68 struct dax_region *dax_region;
69 struct nd_namespace_common *ndns;
70 struct nd_dax *nd_dax = to_nd_dax(dev);
71 struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
72
73 ndns = nvdimm_namespace_common_probe(dev);
74 if (IS_ERR(ndns))
75 return PTR_ERR(ndns);
76 nsio = to_nd_namespace_io(&ndns->dev);
77
78 dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
79 if (!dax_pmem)
80 return -ENOMEM;
81
82 /* parse the 'pfn' info block via ->rw_bytes */
83 rc = devm_nsio_enable(dev, nsio);
84 if (rc)
85 return rc;
86 rc = nvdimm_setup_pfn(nd_pfn, &dax_pmem->pgmap);
87 if (rc)
88 return rc;
89 devm_nsio_disable(dev, nsio);
90
91 pfn_sb = nd_pfn->pfn_sb;
92
93 if (!devm_request_mem_region(dev, nsio->res.start,
94 resource_size(&nsio->res),
95 dev_name(&ndns->dev))) {
96 dev_warn(dev, "could not reserve region %pR\n", &nsio->res);
97 return -EBUSY;
98 }
99
100 dax_pmem->dev = dev;
101 init_completion(&dax_pmem->cmp);
102 rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0,
103 GFP_KERNEL);
104 if (rc)
105 return rc;
106
107 rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
108 if (rc) {
109 percpu_ref_exit(&dax_pmem->ref);
110 return rc;
111 }
112
113 dax_pmem->pgmap.ref = &dax_pmem->ref;
114 dax_pmem->pgmap.kill = dax_pmem_percpu_kill;
115 addr = devm_memremap_pages(dev, &dax_pmem->pgmap);
116 if (IS_ERR(addr))
117 return PTR_ERR(addr);
118
119 /* adjust the dax_region resource to the start of data */
120 memcpy(&res, &dax_pmem->pgmap.res, sizeof(res));
121 res.start += le64_to_cpu(pfn_sb->dataoff);
122
123 rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", &region_id, &id);
124 if (rc != 2)
125 return -EINVAL;
126
127 dax_region = alloc_dax_region(dev, region_id, &res,
128 le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP);
129 if (!dax_region)
130 return -ENOMEM;
131
132 /* TODO: support for subdividing a dax region... */
133 dev_dax = devm_create_dev_dax(dax_region, id, &res, 1);
134
135 /* child dev_dax instances now own the lifetime of the dax_region */
136 dax_region_put(dax_region);
137
138 return PTR_ERR_OR_ZERO(dev_dax);
139}
140
141static struct nd_device_driver dax_pmem_driver = {
142 .probe = dax_pmem_probe,
143 .drv = {
144 .name = "dax_pmem",
145 },
146 .type = ND_DRIVER_DAX_PMEM,
147};
148
149module_nd_driver(dax_pmem_driver);
150
151MODULE_LICENSE("GPL v2");
152MODULE_AUTHOR("Intel Corporation");
153MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
diff --git a/drivers/dax/pmem/Makefile b/drivers/dax/pmem/Makefile
new file mode 100644
index 000000000000..e2e79bd3fdcf
--- /dev/null
+++ b/drivers/dax/pmem/Makefile
@@ -0,0 +1,7 @@
1obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
2obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o
3obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o
4
5dax_pmem-y := pmem.o
6dax_pmem_core-y := core.o
7dax_pmem_compat-y := compat.o
diff --git a/drivers/dax/pmem/compat.c b/drivers/dax/pmem/compat.c
new file mode 100644
index 000000000000..d7b15e6f30c5
--- /dev/null
+++ b/drivers/dax/pmem/compat.c
@@ -0,0 +1,73 @@
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */
3#include <linux/percpu-refcount.h>
4#include <linux/memremap.h>
5#include <linux/module.h>
6#include <linux/pfn_t.h>
7#include <linux/nd.h>
8#include "../bus.h"
9
10/* we need the private definitions to implement compat suport */
11#include "../dax-private.h"
12
13static int dax_pmem_compat_probe(struct device *dev)
14{
15 struct dev_dax *dev_dax = __dax_pmem_probe(dev, DEV_DAX_CLASS);
16 int rc;
17
18 if (IS_ERR(dev_dax))
19 return PTR_ERR(dev_dax);
20
21 if (!devres_open_group(&dev_dax->dev, dev_dax, GFP_KERNEL))
22 return -ENOMEM;
23
24 device_lock(&dev_dax->dev);
25 rc = dev_dax_probe(&dev_dax->dev);
26 device_unlock(&dev_dax->dev);
27
28 devres_close_group(&dev_dax->dev, dev_dax);
29 if (rc)
30 devres_release_group(&dev_dax->dev, dev_dax);
31
32 return rc;
33}
34
35static int dax_pmem_compat_release(struct device *dev, void *data)
36{
37 device_lock(dev);
38 devres_release_group(dev, to_dev_dax(dev));
39 device_unlock(dev);
40
41 return 0;
42}
43
44static int dax_pmem_compat_remove(struct device *dev)
45{
46 device_for_each_child(dev, NULL, dax_pmem_compat_release);
47 return 0;
48}
49
50static struct nd_device_driver dax_pmem_compat_driver = {
51 .probe = dax_pmem_compat_probe,
52 .remove = dax_pmem_compat_remove,
53 .drv = {
54 .name = "dax_pmem_compat",
55 },
56 .type = ND_DRIVER_DAX_PMEM,
57};
58
59static int __init dax_pmem_compat_init(void)
60{
61 return nd_driver_register(&dax_pmem_compat_driver);
62}
63module_init(dax_pmem_compat_init);
64
65static void __exit dax_pmem_compat_exit(void)
66{
67 driver_unregister(&dax_pmem_compat_driver.drv);
68}
69module_exit(dax_pmem_compat_exit);
70
71MODULE_LICENSE("GPL v2");
72MODULE_AUTHOR("Intel Corporation");
73MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
diff --git a/drivers/dax/pmem/core.c b/drivers/dax/pmem/core.c
new file mode 100644
index 000000000000..f71019ce0647
--- /dev/null
+++ b/drivers/dax/pmem/core.c
@@ -0,0 +1,71 @@
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */
3#include <linux/memremap.h>
4#include <linux/module.h>
5#include <linux/pfn_t.h>
6#include "../../nvdimm/pfn.h"
7#include "../../nvdimm/nd.h"
8#include "../bus.h"
9
10struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
11{
12 struct resource res;
13 int rc, id, region_id;
14 resource_size_t offset;
15 struct nd_pfn_sb *pfn_sb;
16 struct dev_dax *dev_dax;
17 struct nd_namespace_io *nsio;
18 struct dax_region *dax_region;
19 struct dev_pagemap pgmap = { 0 };
20 struct nd_namespace_common *ndns;
21 struct nd_dax *nd_dax = to_nd_dax(dev);
22 struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
23 struct nd_region *nd_region = to_nd_region(dev->parent);
24
25 ndns = nvdimm_namespace_common_probe(dev);
26 if (IS_ERR(ndns))
27 return ERR_CAST(ndns);
28 nsio = to_nd_namespace_io(&ndns->dev);
29
30 /* parse the 'pfn' info block via ->rw_bytes */
31 rc = devm_nsio_enable(dev, nsio);
32 if (rc)
33 return ERR_PTR(rc);
34 rc = nvdimm_setup_pfn(nd_pfn, &pgmap);
35 if (rc)
36 return ERR_PTR(rc);
37 devm_nsio_disable(dev, nsio);
38
39 /* reserve the metadata area, device-dax will reserve the data */
40 pfn_sb = nd_pfn->pfn_sb;
41 offset = le64_to_cpu(pfn_sb->dataoff);
42 if (!devm_request_mem_region(dev, nsio->res.start, offset,
43 dev_name(&ndns->dev))) {
44 dev_warn(dev, "could not reserve metadata\n");
45 return ERR_PTR(-EBUSY);
46 }
47
48 rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", &region_id, &id);
49 if (rc != 2)
50 return ERR_PTR(-EINVAL);
51
52 /* adjust the dax_region resource to the start of data */
53 memcpy(&res, &pgmap.res, sizeof(res));
54 res.start += offset;
55 dax_region = alloc_dax_region(dev, region_id, &res,
56 nd_region->target_node, le32_to_cpu(pfn_sb->align),
57 PFN_DEV|PFN_MAP);
58 if (!dax_region)
59 return ERR_PTR(-ENOMEM);
60
61 dev_dax = __devm_create_dev_dax(dax_region, id, &pgmap, subsys);
62
63 /* child dev_dax instances now own the lifetime of the dax_region */
64 dax_region_put(dax_region);
65
66 return dev_dax;
67}
68EXPORT_SYMBOL_GPL(__dax_pmem_probe);
69
70MODULE_LICENSE("GPL v2");
71MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/dax/pmem/pmem.c b/drivers/dax/pmem/pmem.c
new file mode 100644
index 000000000000..0ae4238a0ef8
--- /dev/null
+++ b/drivers/dax/pmem/pmem.c
@@ -0,0 +1,40 @@
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */
3#include <linux/percpu-refcount.h>
4#include <linux/memremap.h>
5#include <linux/module.h>
6#include <linux/pfn_t.h>
7#include <linux/nd.h>
8#include "../bus.h"
9
10static int dax_pmem_probe(struct device *dev)
11{
12 return PTR_ERR_OR_ZERO(__dax_pmem_probe(dev, DEV_DAX_BUS));
13}
14
15static struct nd_device_driver dax_pmem_driver = {
16 .probe = dax_pmem_probe,
17 .drv = {
18 .name = "dax_pmem",
19 },
20 .type = ND_DRIVER_DAX_PMEM,
21};
22
23static int __init dax_pmem_init(void)
24{
25 return nd_driver_register(&dax_pmem_driver);
26}
27module_init(dax_pmem_init);
28
29static void __exit dax_pmem_exit(void)
30{
31 driver_unregister(&dax_pmem_driver.drv);
32}
33module_exit(dax_pmem_exit);
34
35MODULE_LICENSE("GPL v2");
36MODULE_AUTHOR("Intel Corporation");
37#if !IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
38/* For compat builds, don't load this module by default */
39MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
40#endif
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 0cb8c30ea278..0a339b85133e 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -22,6 +22,7 @@
22#include <linux/uio.h> 22#include <linux/uio.h>
23#include <linux/dax.h> 23#include <linux/dax.h>
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include "dax-private.h"
25 26
26static dev_t dax_devt; 27static dev_t dax_devt;
27DEFINE_STATIC_SRCU(dax_srcu); 28DEFINE_STATIC_SRCU(dax_srcu);
@@ -383,11 +384,15 @@ void kill_dax(struct dax_device *dax_dev)
383 spin_lock(&dax_host_lock); 384 spin_lock(&dax_host_lock);
384 hlist_del_init(&dax_dev->list); 385 hlist_del_init(&dax_dev->list);
385 spin_unlock(&dax_host_lock); 386 spin_unlock(&dax_host_lock);
386
387 dax_dev->private = NULL;
388} 387}
389EXPORT_SYMBOL_GPL(kill_dax); 388EXPORT_SYMBOL_GPL(kill_dax);
390 389
390void run_dax(struct dax_device *dax_dev)
391{
392 set_bit(DAXDEV_ALIVE, &dax_dev->flags);
393}
394EXPORT_SYMBOL_GPL(run_dax);
395
391static struct inode *dax_alloc_inode(struct super_block *sb) 396static struct inode *dax_alloc_inode(struct super_block *sb)
392{ 397{
393 struct dax_device *dax_dev; 398 struct dax_device *dax_dev;
@@ -602,6 +607,8 @@ EXPORT_SYMBOL_GPL(dax_inode);
602 607
603void *dax_get_private(struct dax_device *dax_dev) 608void *dax_get_private(struct dax_device *dax_dev)
604{ 609{
610 if (!test_bit(DAXDEV_ALIVE, &dax_dev->flags))
611 return NULL;
605 return dax_dev->private; 612 return dax_dev->private;
606} 613}
607EXPORT_SYMBOL_GPL(dax_get_private); 614EXPORT_SYMBOL_GPL(dax_get_private);
@@ -615,7 +622,7 @@ static void init_once(void *_dax_dev)
615 inode_init_once(inode); 622 inode_init_once(inode);
616} 623}
617 624
618static int __dax_fs_init(void) 625static int dax_fs_init(void)
619{ 626{
620 int rc; 627 int rc;
621 628
@@ -647,35 +654,45 @@ static int __dax_fs_init(void)
647 return rc; 654 return rc;
648} 655}
649 656
650static void __dax_fs_exit(void) 657static void dax_fs_exit(void)
651{ 658{
652 kern_unmount(dax_mnt); 659 kern_unmount(dax_mnt);
653 unregister_filesystem(&dax_fs_type); 660 unregister_filesystem(&dax_fs_type);
654 kmem_cache_destroy(dax_cache); 661 kmem_cache_destroy(dax_cache);
655} 662}
656 663
657static int __init dax_fs_init(void) 664static int __init dax_core_init(void)
658{ 665{
659 int rc; 666 int rc;
660 667
661 rc = __dax_fs_init(); 668 rc = dax_fs_init();
662 if (rc) 669 if (rc)
663 return rc; 670 return rc;
664 671
665 rc = alloc_chrdev_region(&dax_devt, 0, MINORMASK+1, "dax"); 672 rc = alloc_chrdev_region(&dax_devt, 0, MINORMASK+1, "dax");
666 if (rc) 673 if (rc)
667 __dax_fs_exit(); 674 goto err_chrdev;
668 return rc; 675
676 rc = dax_bus_init();
677 if (rc)
678 goto err_bus;
679 return 0;
680
681err_bus:
682 unregister_chrdev_region(dax_devt, MINORMASK+1);
683err_chrdev:
684 dax_fs_exit();
685 return 0;
669} 686}
670 687
671static void __exit dax_fs_exit(void) 688static void __exit dax_core_exit(void)
672{ 689{
673 unregister_chrdev_region(dax_devt, MINORMASK+1); 690 unregister_chrdev_region(dax_devt, MINORMASK+1);
674 ida_destroy(&dax_minor_ida); 691 ida_destroy(&dax_minor_ida);
675 __dax_fs_exit(); 692 dax_fs_exit();
676} 693}
677 694
678MODULE_AUTHOR("Intel Corporation"); 695MODULE_AUTHOR("Intel Corporation");
679MODULE_LICENSE("GPL v2"); 696MODULE_LICENSE("GPL v2");
680subsys_initcall(dax_fs_init); 697subsys_initcall(dax_core_init);
681module_exit(dax_fs_exit); 698module_exit(dax_core_exit);
diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c
index 521eaf53a52a..36be9b619187 100644
--- a/drivers/nvdimm/e820.c
+++ b/drivers/nvdimm/e820.c
@@ -47,6 +47,7 @@ static int e820_register_one(struct resource *res, void *data)
47 ndr_desc.res = res; 47 ndr_desc.res = res;
48 ndr_desc.attr_groups = e820_pmem_region_attribute_groups; 48 ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
49 ndr_desc.numa_node = e820_range_to_nid(res->start); 49 ndr_desc.numa_node = e820_range_to_nid(res->start);
50 ndr_desc.target_node = ndr_desc.numa_node;
50 set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); 51 set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
51 if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) 52 if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
52 return -ENXIO; 53 return -ENXIO;
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 379bf4305e61..a5ac3b240293 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -153,7 +153,7 @@ struct nd_region {
153 u16 ndr_mappings; 153 u16 ndr_mappings;
154 u64 ndr_size; 154 u64 ndr_size;
155 u64 ndr_start; 155 u64 ndr_start;
156 int id, num_lanes, ro, numa_node; 156 int id, num_lanes, ro, numa_node, target_node;
157 void *provider_data; 157 void *provider_data;
158 struct kernfs_node *bb_state; 158 struct kernfs_node *bb_state;
159 struct badblocks bb; 159 struct badblocks bb;
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
index 11b9821eba85..a0c8dcfa0bf9 100644
--- a/drivers/nvdimm/of_pmem.c
+++ b/drivers/nvdimm/of_pmem.c
@@ -68,6 +68,7 @@ static int of_pmem_region_probe(struct platform_device *pdev)
68 memset(&ndr_desc, 0, sizeof(ndr_desc)); 68 memset(&ndr_desc, 0, sizeof(ndr_desc));
69 ndr_desc.attr_groups = region_attr_groups; 69 ndr_desc.attr_groups = region_attr_groups;
70 ndr_desc.numa_node = dev_to_node(&pdev->dev); 70 ndr_desc.numa_node = dev_to_node(&pdev->dev);
71 ndr_desc.target_node = ndr_desc.numa_node;
71 ndr_desc.res = &pdev->resource[i]; 72 ndr_desc.res = &pdev->resource[i];
72 ndr_desc.of_node = np; 73 ndr_desc.of_node = np;
73 set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); 74 set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 3b58baa44b5c..b4ef7d9ff22e 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1072,6 +1072,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
1072 nd_region->flags = ndr_desc->flags; 1072 nd_region->flags = ndr_desc->flags;
1073 nd_region->ro = ro; 1073 nd_region->ro = ro;
1074 nd_region->numa_node = ndr_desc->numa_node; 1074 nd_region->numa_node = ndr_desc->numa_node;
1075 nd_region->target_node = ndr_desc->target_node;
1075 ida_init(&nd_region->ns_ida); 1076 ida_init(&nd_region->ns_ida);
1076 ida_init(&nd_region->btt_ida); 1077 ida_init(&nd_region->btt_ida);
1077 ida_init(&nd_region->pfn_ida); 1078 ida_init(&nd_region->pfn_ida);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6ac47f5ea514..d5dcebd7aad3 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -400,12 +400,17 @@ extern bool acpi_osi_is_win8(void);
400 400
401#ifdef CONFIG_ACPI_NUMA 401#ifdef CONFIG_ACPI_NUMA
402int acpi_map_pxm_to_online_node(int pxm); 402int acpi_map_pxm_to_online_node(int pxm);
403int acpi_map_pxm_to_node(int pxm);
403int acpi_get_node(acpi_handle handle); 404int acpi_get_node(acpi_handle handle);
404#else 405#else
405static inline int acpi_map_pxm_to_online_node(int pxm) 406static inline int acpi_map_pxm_to_online_node(int pxm)
406{ 407{
407 return 0; 408 return 0;
408} 409}
410static inline int acpi_map_pxm_to_node(int pxm)
411{
412 return 0;
413}
409static inline int acpi_get_node(acpi_handle handle) 414static inline int acpi_get_node(acpi_handle handle)
410{ 415{
411 return 0; 416 return 0;
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 43348303cb4b..feb342d026f2 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -130,6 +130,7 @@ struct nd_region_desc {
130 void *provider_data; 130 void *provider_data;
131 int num_lanes; 131 int num_lanes;
132 int numa_node; 132 int numa_node;
133 int target_node;
133 unsigned long flags; 134 unsigned long flags;
134 struct device_node *of_node; 135 struct device_node *of_node;
135}; 136};
diff --git a/kernel/resource.c b/kernel/resource.c
index e81b17b53fa5..92190f62ebc5 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -382,7 +382,7 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
382 int (*func)(struct resource *, void *)) 382 int (*func)(struct resource *, void *))
383{ 383{
384 struct resource res; 384 struct resource res;
385 int ret = -1; 385 int ret = -EINVAL;
386 386
387 while (start < end && 387 while (start < end &&
388 !find_next_iomem_res(start, end, flags, desc, first_lvl, &res)) { 388 !find_next_iomem_res(start, end, flags, desc, first_lvl, &res)) {
@@ -452,6 +452,9 @@ int walk_mem_res(u64 start, u64 end, void *arg,
452 * This function calls the @func callback against all memory ranges of type 452 * This function calls the @func callback against all memory ranges of type
453 * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY. 453 * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
454 * It is to be used only for System RAM. 454 * It is to be used only for System RAM.
455 *
456 * This will find System RAM ranges that are children of top-level resources
457 * in addition to top-level System RAM resources.
455 */ 458 */
456int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, 459int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
457 void *arg, int (*func)(unsigned long, unsigned long, void *)) 460 void *arg, int (*func)(unsigned long, unsigned long, void *))
@@ -460,14 +463,14 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
460 unsigned long flags; 463 unsigned long flags;
461 struct resource res; 464 struct resource res;
462 unsigned long pfn, end_pfn; 465 unsigned long pfn, end_pfn;
463 int ret = -1; 466 int ret = -EINVAL;
464 467
465 start = (u64) start_pfn << PAGE_SHIFT; 468 start = (u64) start_pfn << PAGE_SHIFT;
466 end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1; 469 end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
467 flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; 470 flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
468 while (start < end && 471 while (start < end &&
469 !find_next_iomem_res(start, end, flags, IORES_DESC_NONE, 472 !find_next_iomem_res(start, end, flags, IORES_DESC_NONE,
470 true, &res)) { 473 false, &res)) {
471 pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT; 474 pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
472 end_pfn = (res.end + 1) >> PAGE_SHIFT; 475 end_pfn = (res.end + 1) >> PAGE_SHIFT;
473 if (end_pfn > pfn) 476 if (end_pfn > pfn)
@@ -1128,6 +1131,15 @@ struct resource * __request_region(struct resource *parent,
1128 conflict = __request_resource(parent, res); 1131 conflict = __request_resource(parent, res);
1129 if (!conflict) 1132 if (!conflict)
1130 break; 1133 break;
1134 /*
1135 * mm/hmm.c reserves physical addresses which then
1136 * become unavailable to other users. Conflicts are
1137 * not expected. Warn to aid debugging if encountered.
1138 */
1139 if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) {
1140 pr_warn("Unaddressable device %s %pR conflicts with %pR",
1141 conflict->name, conflict, res);
1142 }
1131 if (conflict != parent) { 1143 if (conflict != parent) {
1132 if (!(conflict->flags & IORESOURCE_BUSY)) { 1144 if (!(conflict->flags & IORESOURCE_BUSY)) {
1133 parent = conflict; 1145 parent = conflict;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index cd23c081924d..f767582af4f8 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -101,28 +101,24 @@ u64 max_mem_size = U64_MAX;
101/* add this memory to iomem resource */ 101/* add this memory to iomem resource */
102static struct resource *register_memory_resource(u64 start, u64 size) 102static struct resource *register_memory_resource(u64 start, u64 size)
103{ 103{
104 struct resource *res, *conflict; 104 struct resource *res;
105 unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
106 char *resource_name = "System RAM";
105 107
106 if (start + size > max_mem_size) 108 if (start + size > max_mem_size)
107 return ERR_PTR(-E2BIG); 109 return ERR_PTR(-E2BIG);
108 110
109 res = kzalloc(sizeof(struct resource), GFP_KERNEL); 111 /*
110 if (!res) 112 * Request ownership of the new memory range. This might be
111 return ERR_PTR(-ENOMEM); 113 * a child of an existing resource that was present but
112 114 * not marked as busy.
113 res->name = "System RAM"; 115 */
114 res->start = start; 116 res = __request_region(&iomem_resource, start, size,
115 res->end = start + size - 1; 117 resource_name, flags);
116 res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; 118
117 conflict = request_resource_conflict(&iomem_resource, res); 119 if (!res) {
118 if (conflict) { 120 pr_debug("Unable to reserve System RAM region: %016llx->%016llx\n",
119 if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) { 121 start, start + size);
120 pr_debug("Device unaddressable memory block "
121 "memory hotplug at %#010llx !\n",
122 (unsigned long long)start);
123 }
124 pr_debug("System RAM resource %pR cannot be added\n", res);
125 kfree(res);
126 return ERR_PTR(-EEXIST); 122 return ERR_PTR(-EEXIST);
127 } 123 }
128 return res; 124 return res;
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 10ddf223055b..e1286d2cdfbf 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -35,6 +35,8 @@ obj-$(CONFIG_DAX) += dax.o
35endif 35endif
36obj-$(CONFIG_DEV_DAX) += device_dax.o 36obj-$(CONFIG_DEV_DAX) += device_dax.o
37obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o 37obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
38obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o
39obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o
38 40
39nfit-y := $(ACPI_SRC)/core.o 41nfit-y := $(ACPI_SRC)/core.o
40nfit-y += $(ACPI_SRC)/intel.o 42nfit-y += $(ACPI_SRC)/intel.o
@@ -57,6 +59,7 @@ nd_e820-y := $(NVDIMM_SRC)/e820.o
57nd_e820-y += config_check.o 59nd_e820-y += config_check.o
58 60
59dax-y := $(DAX_SRC)/super.o 61dax-y := $(DAX_SRC)/super.o
62dax-y += $(DAX_SRC)/bus.o
60dax-y += config_check.o 63dax-y += config_check.o
61 64
62device_dax-y := $(DAX_SRC)/device.o 65device_dax-y := $(DAX_SRC)/device.o
@@ -64,7 +67,9 @@ device_dax-y += dax-dev.o
64device_dax-y += device_dax_test.o 67device_dax-y += device_dax_test.o
65device_dax-y += config_check.o 68device_dax-y += config_check.o
66 69
67dax_pmem-y := $(DAX_SRC)/pmem.o 70dax_pmem-y := $(DAX_SRC)/pmem/pmem.o
71dax_pmem_core-y := $(DAX_SRC)/pmem/core.o
72dax_pmem_compat-y := $(DAX_SRC)/pmem/compat.o
68dax_pmem-y += config_check.o 73dax_pmem-y += config_check.o
69 74
70libnvdimm-y := $(NVDIMM_SRC)/core.o 75libnvdimm-y := $(NVDIMM_SRC)/core.o
diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c
index 36ee3d8797c3..f36e708265b8 100644
--- a/tools/testing/nvdimm/dax-dev.c
+++ b/tools/testing/nvdimm/dax-dev.c
@@ -17,20 +17,11 @@
17phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, 17phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
18 unsigned long size) 18 unsigned long size)
19{ 19{
20 struct resource *res; 20 struct resource *res = &dev_dax->region->res;
21 phys_addr_t addr; 21 phys_addr_t addr;
22 int i;
23 22
24 for (i = 0; i < dev_dax->num_resources; i++) { 23 addr = pgoff * PAGE_SIZE + res->start;
25 res = &dev_dax->res[i]; 24 if (addr >= res->start && addr <= res->end) {
26 addr = pgoff * PAGE_SIZE + res->start;
27 if (addr >= res->start && addr <= res->end)
28 break;
29 pgoff -= PHYS_PFN(resource_size(res));
30 }
31
32 if (i < dev_dax->num_resources) {
33 res = &dev_dax->res[i];
34 if (addr + size - 1 <= res->end) { 25 if (addr + size - 1 <= res->end) {
35 if (get_nfit_res(addr)) { 26 if (get_nfit_res(addr)) {
36 struct page *page; 27 struct page *page;
@@ -44,6 +35,5 @@ phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
44 return addr; 35 return addr;
45 } 36 }
46 } 37 }
47
48 return -1; 38 return -1;
49} 39}