diff options
author | Rakesh Pandit <rakesh@tuxera.com> | 2017-06-05 07:43:11 -0400 |
---|---|---|
committer | Christoph Hellwig <hch@lst.de> | 2017-06-07 05:08:50 -0400 |
commit | 82b057caefaff2a891f821a617d939f46e03e844 (patch) | |
tree | 6aa7192a8f0b9ad71aa8f6385427b426b50bb69c | |
parent | 82654b6b8ef8b93ee87a97fc562f87f081fc2f91 (diff) |
nvme-pci: fix multiple ctrl removal scheduling
Commit c5f6ce97c1210 tries to address multiple resets but fails as
work_busy doesn't involve any synchronization and can fail. This is
reproducible easily as can be seen by WARNING below which is triggered
with line:
WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING)
Allowing multiple resets can result in multiple controller removal as
well if different conditions inside nvme_reset_work fail and which
might deadlock on device_release_driver.
[ 480.327007] WARNING: CPU: 3 PID: 150 at drivers/nvme/host/pci.c:1900 nvme_reset_work+0x36c/0xec0
[ 480.327008] Modules linked in: rfcomm fuse nf_conntrack_netbios_ns nf_conntrack_broadcast...
[ 480.327044] btusb videobuf2_core ghash_clmulni_intel snd_hwdep cfg80211 acer_wmi hci_uart..
[ 480.327065] CPU: 3 PID: 150 Comm: kworker/u16:2 Not tainted 4.12.0-rc1+ #13
[ 480.327065] Hardware name: Acer Predator G9-591/Mustang_SLS, BIOS V1.10 03/03/2016
[ 480.327066] Workqueue: nvme nvme_reset_work
[ 480.327067] task: ffff880498ad8000 task.stack: ffffc90002218000
[ 480.327068] RIP: 0010:nvme_reset_work+0x36c/0xec0
[ 480.327069] RSP: 0018:ffffc9000221bdb8 EFLAGS: 00010246
[ 480.327070] RAX: 0000000000460000 RBX: ffff880498a98128 RCX: dead000000000200
[ 480.327070] RDX: 0000000000000001 RSI: ffff8804b1028020 RDI: ffff880498a98128
[ 480.327071] RBP: ffffc9000221be50 R08: 0000000000000000 R09: 0000000000000000
[ 480.327071] R10: ffffc90001963ce8 R11: 000000000000020d R12: ffff880498a98000
[ 480.327072] R13: ffff880498a53500 R14: ffff880498a98130 R15: ffff880498a98128
[ 480.327072] FS: 0000000000000000(0000) GS:ffff8804c1cc0000(0000) knlGS:0000000000000000
[ 480.327073] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 480.327074] CR2: 00007ffcf3c37f78 CR3: 0000000001e09000 CR4: 00000000003406e0
[ 480.327074] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 480.327075] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 480.327075] Call Trace:
[ 480.327079] ? __switch_to+0x227/0x400
[ 480.327081] process_one_work+0x18c/0x3a0
[ 480.327082] worker_thread+0x4e/0x3b0
[ 480.327084] kthread+0x109/0x140
[ 480.327085] ? process_one_work+0x3a0/0x3a0
[ 480.327087] ? kthread_park+0x60/0x60
[ 480.327102] ret_from_fork+0x2c/0x40
[ 480.327103] Code: e8 5a dc ff ff 85 c0 41 89 c1 0f.....
This patch addresses the problem by using state of controller to
decide whether reset should be queued or not as state change is
synchronizated using controller spinlock. Also cancel_work_sync is
used to make sure remove cancels the reset_work and waits for it to
finish. This patch also changes return value from -ENODEV to more
appropriate -EBUSY if nvme_reset fails to change state.
Fixes: c5f6ce97c1210 ("nvme: don't schedule multiple resets")
Signed-off-by: Rakesh Pandit <rakesh@tuxera.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
-rw-r--r-- | drivers/nvme/host/pci.c | 13 |
1 files changed, 6 insertions, 7 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d52701df7245..951042a375d6 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c | |||
@@ -1367,7 +1367,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) | |||
1367 | bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO); | 1367 | bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO); |
1368 | 1368 | ||
1369 | /* If there is a reset ongoing, we shouldn't reset again. */ | 1369 | /* If there is a reset ongoing, we shouldn't reset again. */ |
1370 | if (work_busy(&dev->reset_work)) | 1370 | if (dev->ctrl.state == NVME_CTRL_RESETTING) |
1371 | return false; | 1371 | return false; |
1372 | 1372 | ||
1373 | /* We shouldn't reset unless the controller is on fatal error state | 1373 | /* We shouldn't reset unless the controller is on fatal error state |
@@ -1903,7 +1903,7 @@ static void nvme_reset_work(struct work_struct *work) | |||
1903 | bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); | 1903 | bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); |
1904 | int result = -ENODEV; | 1904 | int result = -ENODEV; |
1905 | 1905 | ||
1906 | if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING)) | 1906 | if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) |
1907 | goto out; | 1907 | goto out; |
1908 | 1908 | ||
1909 | /* | 1909 | /* |
@@ -1913,9 +1913,6 @@ static void nvme_reset_work(struct work_struct *work) | |||
1913 | if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) | 1913 | if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) |
1914 | nvme_dev_disable(dev, false); | 1914 | nvme_dev_disable(dev, false); |
1915 | 1915 | ||
1916 | if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) | ||
1917 | goto out; | ||
1918 | |||
1919 | result = nvme_pci_enable(dev); | 1916 | result = nvme_pci_enable(dev); |
1920 | if (result) | 1917 | if (result) |
1921 | goto out; | 1918 | goto out; |
@@ -2009,8 +2006,8 @@ static int nvme_reset(struct nvme_dev *dev) | |||
2009 | { | 2006 | { |
2010 | if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q)) | 2007 | if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q)) |
2011 | return -ENODEV; | 2008 | return -ENODEV; |
2012 | if (work_busy(&dev->reset_work)) | 2009 | if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) |
2013 | return -ENODEV; | 2010 | return -EBUSY; |
2014 | if (!queue_work(nvme_workq, &dev->reset_work)) | 2011 | if (!queue_work(nvme_workq, &dev->reset_work)) |
2015 | return -EBUSY; | 2012 | return -EBUSY; |
2016 | return 0; | 2013 | return 0; |
@@ -2136,6 +2133,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
2136 | if (result) | 2133 | if (result) |
2137 | goto release_pools; | 2134 | goto release_pools; |
2138 | 2135 | ||
2136 | nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING); | ||
2139 | dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); | 2137 | dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); |
2140 | 2138 | ||
2141 | queue_work(nvme_workq, &dev->reset_work); | 2139 | queue_work(nvme_workq, &dev->reset_work); |
@@ -2179,6 +2177,7 @@ static void nvme_remove(struct pci_dev *pdev) | |||
2179 | 2177 | ||
2180 | nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); | 2178 | nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); |
2181 | 2179 | ||
2180 | cancel_work_sync(&dev->reset_work); | ||
2182 | pci_set_drvdata(pdev, NULL); | 2181 | pci_set_drvdata(pdev, NULL); |
2183 | 2182 | ||
2184 | if (!pci_device_is_present(pdev)) { | 2183 | if (!pci_device_is_present(pdev)) { |