diff options
author | Dan Williams <dan.j.williams@intel.com> | 2017-04-07 19:42:08 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2017-04-27 03:10:39 -0400 |
commit | c36eaa6ca346dc8d45e9125af62db75625b42a6f (patch) | |
tree | 79980e092687fe6bbbc693b25fcbe1378efc4de9 | |
parent | f8bc0881fe95496ddf3f8a16808d9860db207941 (diff) |
device-dax: switch to srcu, fix rcu_read_lock() vs pte allocation
commit 956a4cd2c957acf638ff29951aabaa9d8e92bbc2 upstream.
The following warning triggers with a new unit test that stresses the
device-dax interface.
===============================
[ ERR: suspicious RCU usage. ]
4.11.0-rc4+ #1049 Tainted: G O
-------------------------------
./include/linux/rcupdate.h:521 Illegal context switch in RCU read-side critical section!
other info that might help us debug this:
rcu_scheduler_active = 2, debug_locks = 0
2 locks held by fio/9070:
#0: (&mm->mmap_sem){++++++}, at: [<ffffffff8d0739d7>] __do_page_fault+0x167/0x4f0
#1: (rcu_read_lock){......}, at: [<ffffffffc03fbd02>] dax_dev_huge_fault+0x32/0x620 [dax]
Call Trace:
dump_stack+0x86/0xc3
lockdep_rcu_suspicious+0xd7/0x110
___might_sleep+0xac/0x250
__might_sleep+0x4a/0x80
__alloc_pages_nodemask+0x23a/0x360
alloc_pages_current+0xa1/0x1f0
pte_alloc_one+0x17/0x80
__pte_alloc+0x1e/0x120
__get_locked_pte+0x1bf/0x1d0
insert_pfn.isra.70+0x3a/0x100
? lookup_memtype+0xa6/0xd0
vm_insert_mixed+0x64/0x90
dax_dev_huge_fault+0x520/0x620 [dax]
? dax_dev_huge_fault+0x32/0x620 [dax]
dax_dev_fault+0x10/0x20 [dax]
__do_fault+0x1e/0x140
__handle_mm_fault+0x9af/0x10d0
handle_mm_fault+0x16d/0x370
? handle_mm_fault+0x47/0x370
__do_page_fault+0x28c/0x4f0
trace_do_page_fault+0x58/0x2a0
do_async_page_fault+0x1a/0xa0
async_page_fault+0x28/0x30
Inserting a page table entry may trigger an allocation while we are
holding a read lock to keep the device instance alive for the duration
of the fault. Use srcu for this keep-alive protection.
Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | drivers/dax/Kconfig | 1 | ||||
-rw-r--r-- | drivers/dax/dax.c | 13 |
2 files changed, 8 insertions, 6 deletions
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index 3e2ab3b14eea..9e95bf94eb13 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig | |||
@@ -2,6 +2,7 @@ menuconfig DEV_DAX | |||
2 | tristate "DAX: direct access to differentiated memory" | 2 | tristate "DAX: direct access to differentiated memory" |
3 | default m if NVDIMM_DAX | 3 | default m if NVDIMM_DAX |
4 | depends on TRANSPARENT_HUGEPAGE | 4 | depends on TRANSPARENT_HUGEPAGE |
5 | select SRCU | ||
5 | help | 6 | help |
6 | Support raw access to differentiated (persistence, bandwidth, | 7 | Support raw access to differentiated (persistence, bandwidth, |
7 | latency...) memory via an mmap(2) capable character | 8 | latency...) memory via an mmap(2) capable character |
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index 152552d2c306..193224889e41 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "dax.h" | 24 | #include "dax.h" |
25 | 25 | ||
26 | static dev_t dax_devt; | 26 | static dev_t dax_devt; |
27 | DEFINE_STATIC_SRCU(dax_srcu); | ||
27 | static struct class *dax_class; | 28 | static struct class *dax_class; |
28 | static DEFINE_IDA(dax_minor_ida); | 29 | static DEFINE_IDA(dax_minor_ida); |
29 | static int nr_dax = CONFIG_NR_DEV_DAX; | 30 | static int nr_dax = CONFIG_NR_DEV_DAX; |
@@ -59,7 +60,7 @@ struct dax_region { | |||
59 | * @region - parent region | 60 | * @region - parent region |
60 | * @dev - device backing the character device | 61 | * @dev - device backing the character device |
61 | * @cdev - core chardev data | 62 | * @cdev - core chardev data |
62 | * @alive - !alive + rcu grace period == no new mappings can be established | 63 | * @alive - !alive + srcu grace period == no new mappings can be established |
63 | * @id - child id in the region | 64 | * @id - child id in the region |
64 | * @num_resources - number of physical address extents in this device | 65 | * @num_resources - number of physical address extents in this device |
65 | * @res - array of physical address ranges | 66 | * @res - array of physical address ranges |
@@ -437,7 +438,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, | |||
437 | static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | 438 | static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr, |
438 | pmd_t *pmd, unsigned int flags) | 439 | pmd_t *pmd, unsigned int flags) |
439 | { | 440 | { |
440 | int rc; | 441 | int rc, id; |
441 | struct file *filp = vma->vm_file; | 442 | struct file *filp = vma->vm_file; |
442 | struct dax_dev *dax_dev = filp->private_data; | 443 | struct dax_dev *dax_dev = filp->private_data; |
443 | 444 | ||
@@ -445,9 +446,9 @@ static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | |||
445 | current->comm, (flags & FAULT_FLAG_WRITE) | 446 | current->comm, (flags & FAULT_FLAG_WRITE) |
446 | ? "write" : "read", vma->vm_start, vma->vm_end); | 447 | ? "write" : "read", vma->vm_start, vma->vm_end); |
447 | 448 | ||
448 | rcu_read_lock(); | 449 | id = srcu_read_lock(&dax_srcu); |
449 | rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags); | 450 | rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags); |
450 | rcu_read_unlock(); | 451 | srcu_read_unlock(&dax_srcu, id); |
451 | 452 | ||
452 | return rc; | 453 | return rc; |
453 | } | 454 | } |
@@ -563,11 +564,11 @@ static void unregister_dax_dev(void *dev) | |||
563 | * Note, rcu is not protecting the liveness of dax_dev, rcu is | 564 | * Note, rcu is not protecting the liveness of dax_dev, rcu is |
564 | * ensuring that any fault handlers that might have seen | 565 | * ensuring that any fault handlers that might have seen |
565 | * dax_dev->alive == true, have completed. Any fault handlers | 566 | * dax_dev->alive == true, have completed. Any fault handlers |
566 | * that start after synchronize_rcu() has started will abort | 567 | * that start after synchronize_srcu() has started will abort |
567 | * upon seeing dax_dev->alive == false. | 568 | * upon seeing dax_dev->alive == false. |
568 | */ | 569 | */ |
569 | dax_dev->alive = false; | 570 | dax_dev->alive = false; |
570 | synchronize_rcu(); | 571 | synchronize_srcu(&dax_srcu); |
571 | unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1); | 572 | unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1); |
572 | cdev_del(cdev); | 573 | cdev_del(cdev); |
573 | device_unregister(dev); | 574 | device_unregister(dev); |