aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeith Busch <keith.busch@intel.com>2017-03-01 14:22:12 -0500
committerJens Axboe <axboe@fb.com>2017-03-02 10:56:59 -0500
commit302ad8cc09339ea261eef58a8d5f4a116a8ffda5 (patch)
tree622b89d58a88a0f3f241933386522224394e6f49
parentf91328c40a559362b6e7b7bfee01ca17fda87592 (diff)
nvme: Complete all stuck requests
If the nvme driver is shutting down its controller, the drievr will not start the queues up again, preventing blk-mq's hot CPU notifier from making forward progress. To fix that, this patch starts a request_queue freeze when the driver resets a controller so no new requests may enter. The driver will wait for frozen after IO queues are restarted to ensure the queue reference can be reinitialized when nvme requests to unfreeze the queues. If the driver is doing a safe shutdown, the driver will wait for the controller to successfully complete all inflight requests so that we don't unnecessarily fail them. Once the controller has been disabled, the queues will be restarted to force remaining entered requests to end in failure so that blk-mq's hot cpu notifier may progress. Signed-off-by: Keith Busch <keith.busch@intel.com> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--drivers/nvme/host/core.c47
-rw-r--r--drivers/nvme/host/nvme.h4
-rw-r--r--drivers/nvme/host/pci.c33
3 files changed, 79 insertions, 5 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 25ec4e585220..9b3b57fef446 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2344,6 +2344,53 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
2344} 2344}
2345EXPORT_SYMBOL_GPL(nvme_kill_queues); 2345EXPORT_SYMBOL_GPL(nvme_kill_queues);
2346 2346
2347void nvme_unfreeze(struct nvme_ctrl *ctrl)
2348{
2349 struct nvme_ns *ns;
2350
2351 mutex_lock(&ctrl->namespaces_mutex);
2352 list_for_each_entry(ns, &ctrl->namespaces, list)
2353 blk_mq_unfreeze_queue(ns->queue);
2354 mutex_unlock(&ctrl->namespaces_mutex);
2355}
2356EXPORT_SYMBOL_GPL(nvme_unfreeze);
2357
2358void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
2359{
2360 struct nvme_ns *ns;
2361
2362 mutex_lock(&ctrl->namespaces_mutex);
2363 list_for_each_entry(ns, &ctrl->namespaces, list) {
2364 timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
2365 if (timeout <= 0)
2366 break;
2367 }
2368 mutex_unlock(&ctrl->namespaces_mutex);
2369}
2370EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
2371
2372void nvme_wait_freeze(struct nvme_ctrl *ctrl)
2373{
2374 struct nvme_ns *ns;
2375
2376 mutex_lock(&ctrl->namespaces_mutex);
2377 list_for_each_entry(ns, &ctrl->namespaces, list)
2378 blk_mq_freeze_queue_wait(ns->queue);
2379 mutex_unlock(&ctrl->namespaces_mutex);
2380}
2381EXPORT_SYMBOL_GPL(nvme_wait_freeze);
2382
2383void nvme_start_freeze(struct nvme_ctrl *ctrl)
2384{
2385 struct nvme_ns *ns;
2386
2387 mutex_lock(&ctrl->namespaces_mutex);
2388 list_for_each_entry(ns, &ctrl->namespaces, list)
2389 blk_mq_freeze_queue_start(ns->queue);
2390 mutex_unlock(&ctrl->namespaces_mutex);
2391}
2392EXPORT_SYMBOL_GPL(nvme_start_freeze);
2393
2347void nvme_stop_queues(struct nvme_ctrl *ctrl) 2394void nvme_stop_queues(struct nvme_ctrl *ctrl)
2348{ 2395{
2349 struct nvme_ns *ns; 2396 struct nvme_ns *ns;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a3da1e90b99d..2aa20e3e5675 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -294,6 +294,10 @@ void nvme_queue_async_events(struct nvme_ctrl *ctrl);
294void nvme_stop_queues(struct nvme_ctrl *ctrl); 294void nvme_stop_queues(struct nvme_ctrl *ctrl);
295void nvme_start_queues(struct nvme_ctrl *ctrl); 295void nvme_start_queues(struct nvme_ctrl *ctrl);
296void nvme_kill_queues(struct nvme_ctrl *ctrl); 296void nvme_kill_queues(struct nvme_ctrl *ctrl);
297void nvme_unfreeze(struct nvme_ctrl *ctrl);
298void nvme_wait_freeze(struct nvme_ctrl *ctrl);
299void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
300void nvme_start_freeze(struct nvme_ctrl *ctrl);
297 301
298#define NVME_QID_ANY -1 302#define NVME_QID_ANY -1
299struct request *nvme_alloc_request(struct request_queue *q, 303struct request *nvme_alloc_request(struct request_queue *q,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index eee8f8426ff2..26a5fd05fe88 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1675,21 +1675,34 @@ static void nvme_pci_disable(struct nvme_dev *dev)
1675static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) 1675static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
1676{ 1676{
1677 int i, queues; 1677 int i, queues;
1678 u32 csts = -1; 1678 bool dead = true;
1679 struct pci_dev *pdev = to_pci_dev(dev->dev);
1679 1680
1680 del_timer_sync(&dev->watchdog_timer); 1681 del_timer_sync(&dev->watchdog_timer);
1681 1682
1682 mutex_lock(&dev->shutdown_lock); 1683 mutex_lock(&dev->shutdown_lock);
1683 if (pci_is_enabled(to_pci_dev(dev->dev))) { 1684 if (pci_is_enabled(pdev)) {
1684 nvme_stop_queues(&dev->ctrl); 1685 u32 csts = readl(dev->bar + NVME_REG_CSTS);
1685 csts = readl(dev->bar + NVME_REG_CSTS); 1686
1687 if (dev->ctrl.state == NVME_CTRL_LIVE)
1688 nvme_start_freeze(&dev->ctrl);
1689 dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
1690 pdev->error_state != pci_channel_io_normal);
1686 } 1691 }
1687 1692
1693 /*
1694 * Give the controller a chance to complete all entered requests if
1695 * doing a safe shutdown.
1696 */
1697 if (!dead && shutdown)
1698 nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
1699 nvme_stop_queues(&dev->ctrl);
1700
1688 queues = dev->online_queues - 1; 1701 queues = dev->online_queues - 1;
1689 for (i = dev->queue_count - 1; i > 0; i--) 1702 for (i = dev->queue_count - 1; i > 0; i--)
1690 nvme_suspend_queue(dev->queues[i]); 1703 nvme_suspend_queue(dev->queues[i]);
1691 1704
1692 if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { 1705 if (dead) {
1693 /* A device might become IO incapable very soon during 1706 /* A device might become IO incapable very soon during
1694 * probe, before the admin queue is configured. Thus, 1707 * probe, before the admin queue is configured. Thus,
1695 * queue_count can be 0 here. 1708 * queue_count can be 0 here.
@@ -1704,6 +1717,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
1704 1717
1705 blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); 1718 blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
1706 blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl); 1719 blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
1720
1721 /*
1722 * The driver will not be starting up queues again if shutting down so
1723 * must flush all entered requests to their failed completion to avoid
1724 * deadlocking blk-mq hot-cpu notifier.
1725 */
1726 if (shutdown)
1727 nvme_start_queues(&dev->ctrl);
1707 mutex_unlock(&dev->shutdown_lock); 1728 mutex_unlock(&dev->shutdown_lock);
1708} 1729}
1709 1730
@@ -1826,7 +1847,9 @@ static void nvme_reset_work(struct work_struct *work)
1826 nvme_remove_namespaces(&dev->ctrl); 1847 nvme_remove_namespaces(&dev->ctrl);
1827 } else { 1848 } else {
1828 nvme_start_queues(&dev->ctrl); 1849 nvme_start_queues(&dev->ctrl);
1850 nvme_wait_freeze(&dev->ctrl);
1829 nvme_dev_add(dev); 1851 nvme_dev_add(dev);
1852 nvme_unfreeze(&dev->ctrl);
1830 } 1853 }
1831 1854
1832 if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) { 1855 if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {