aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2017-04-07 19:42:08 -0400
committerDan Williams <dan.j.williams@intel.com>2017-04-12 16:45:18 -0400
commit956a4cd2c957acf638ff29951aabaa9d8e92bbc2 (patch)
treed4e8def7593d04b58bad676f1329cdd8446bd00f
parent4aa5615e080a9855e607accc75b07ab79b252dde (diff)
device-dax: switch to srcu, fix rcu_read_lock() vs pte allocation
The following warning triggers with a new unit test that stresses the device-dax interface. =============================== [ ERR: suspicious RCU usage. ] 4.11.0-rc4+ #1049 Tainted: G O ------------------------------- ./include/linux/rcupdate.h:521 Illegal context switch in RCU read-side critical section! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 0 2 locks held by fio/9070: #0: (&mm->mmap_sem){++++++}, at: [<ffffffff8d0739d7>] __do_page_fault+0x167/0x4f0 #1: (rcu_read_lock){......}, at: [<ffffffffc03fbd02>] dax_dev_huge_fault+0x32/0x620 [dax] Call Trace: dump_stack+0x86/0xc3 lockdep_rcu_suspicious+0xd7/0x110 ___might_sleep+0xac/0x250 __might_sleep+0x4a/0x80 __alloc_pages_nodemask+0x23a/0x360 alloc_pages_current+0xa1/0x1f0 pte_alloc_one+0x17/0x80 __pte_alloc+0x1e/0x120 __get_locked_pte+0x1bf/0x1d0 insert_pfn.isra.70+0x3a/0x100 ? lookup_memtype+0xa6/0xd0 vm_insert_mixed+0x64/0x90 dax_dev_huge_fault+0x520/0x620 [dax] ? dax_dev_huge_fault+0x32/0x620 [dax] dax_dev_fault+0x10/0x20 [dax] __do_fault+0x1e/0x140 __handle_mm_fault+0x9af/0x10d0 handle_mm_fault+0x16d/0x370 ? handle_mm_fault+0x47/0x370 __do_page_fault+0x28c/0x4f0 trace_do_page_fault+0x58/0x2a0 do_async_page_fault+0x1a/0xa0 async_page_fault+0x28/0x30 Inserting a page table entry may trigger an allocation while we are holding a read lock to keep the device instance alive for the duration of the fault. Use srcu for this keep-alive protection. Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") Cc: <stable@vger.kernel.org> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--drivers/dax/Kconfig1
-rw-r--r--drivers/dax/dax.c13
2 files changed, 8 insertions, 6 deletions
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index 3e2ab3b14eea..9e95bf94eb13 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -2,6 +2,7 @@ menuconfig DEV_DAX
2 tristate "DAX: direct access to differentiated memory" 2 tristate "DAX: direct access to differentiated memory"
3 default m if NVDIMM_DAX 3 default m if NVDIMM_DAX
4 depends on TRANSPARENT_HUGEPAGE 4 depends on TRANSPARENT_HUGEPAGE
5 select SRCU
5 help 6 help
6 Support raw access to differentiated (persistence, bandwidth, 7 Support raw access to differentiated (persistence, bandwidth,
7 latency...) memory via an mmap(2) capable character 8 latency...) memory via an mmap(2) capable character
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 80c6db279ae1..806f180c80d8 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -25,6 +25,7 @@
25#include "dax.h" 25#include "dax.h"
26 26
27static dev_t dax_devt; 27static dev_t dax_devt;
28DEFINE_STATIC_SRCU(dax_srcu);
28static struct class *dax_class; 29static struct class *dax_class;
29static DEFINE_IDA(dax_minor_ida); 30static DEFINE_IDA(dax_minor_ida);
30static int nr_dax = CONFIG_NR_DEV_DAX; 31static int nr_dax = CONFIG_NR_DEV_DAX;
@@ -60,7 +61,7 @@ struct dax_region {
60 * @region - parent region 61 * @region - parent region
61 * @dev - device backing the character device 62 * @dev - device backing the character device
62 * @cdev - core chardev data 63 * @cdev - core chardev data
63 * @alive - !alive + rcu grace period == no new mappings can be established 64 * @alive - !alive + srcu grace period == no new mappings can be established
64 * @id - child id in the region 65 * @id - child id in the region
65 * @num_resources - number of physical address extents in this device 66 * @num_resources - number of physical address extents in this device
66 * @res - array of physical address ranges 67 * @res - array of physical address ranges
@@ -569,7 +570,7 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
569static int dax_dev_huge_fault(struct vm_fault *vmf, 570static int dax_dev_huge_fault(struct vm_fault *vmf,
570 enum page_entry_size pe_size) 571 enum page_entry_size pe_size)
571{ 572{
572 int rc; 573 int rc, id;
573 struct file *filp = vmf->vma->vm_file; 574 struct file *filp = vmf->vma->vm_file;
574 struct dax_dev *dax_dev = filp->private_data; 575 struct dax_dev *dax_dev = filp->private_data;
575 576
@@ -578,7 +579,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf,
578 ? "write" : "read", 579 ? "write" : "read",
579 vmf->vma->vm_start, vmf->vma->vm_end); 580 vmf->vma->vm_start, vmf->vma->vm_end);
580 581
581 rcu_read_lock(); 582 id = srcu_read_lock(&dax_srcu);
582 switch (pe_size) { 583 switch (pe_size) {
583 case PE_SIZE_PTE: 584 case PE_SIZE_PTE:
584 rc = __dax_dev_pte_fault(dax_dev, vmf); 585 rc = __dax_dev_pte_fault(dax_dev, vmf);
@@ -592,7 +593,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf,
592 default: 593 default:
593 return VM_FAULT_FALLBACK; 594 return VM_FAULT_FALLBACK;
594 } 595 }
595 rcu_read_unlock(); 596 srcu_read_unlock(&dax_srcu, id);
596 597
597 return rc; 598 return rc;
598} 599}
@@ -713,11 +714,11 @@ static void unregister_dax_dev(void *dev)
713 * Note, rcu is not protecting the liveness of dax_dev, rcu is 714 * Note, rcu is not protecting the liveness of dax_dev, rcu is
714 * ensuring that any fault handlers that might have seen 715 * ensuring that any fault handlers that might have seen
715 * dax_dev->alive == true, have completed. Any fault handlers 716 * dax_dev->alive == true, have completed. Any fault handlers
716 * that start after synchronize_rcu() has started will abort 717 * that start after synchronize_srcu() has started will abort
717 * upon seeing dax_dev->alive == false. 718 * upon seeing dax_dev->alive == false.
718 */ 719 */
719 dax_dev->alive = false; 720 dax_dev->alive = false;
720 synchronize_rcu(); 721 synchronize_srcu(&dax_srcu);
721 unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1); 722 unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1);
722 cdev_del(cdev); 723 cdev_del(cdev);
723 device_unregister(dev); 724 device_unregister(dev);