aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/nvme/host/core.c21
-rw-r--r--drivers/nvme/host/fc.c20
-rw-r--r--drivers/nvme/host/pci.c13
-rw-r--r--drivers/nvme/host/rdma.c44
4 files changed, 67 insertions, 31 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index a60926410438..903d5813023a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -56,7 +56,7 @@ MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
56static int nvme_char_major; 56static int nvme_char_major;
57module_param(nvme_char_major, int, 0); 57module_param(nvme_char_major, int, 0);
58 58
59static unsigned long default_ps_max_latency_us = 25000; 59static unsigned long default_ps_max_latency_us = 100000;
60module_param(default_ps_max_latency_us, ulong, 0644); 60module_param(default_ps_max_latency_us, ulong, 0644);
61MODULE_PARM_DESC(default_ps_max_latency_us, 61MODULE_PARM_DESC(default_ps_max_latency_us,
62 "max power saving latency for new devices; use PM QOS to change per device"); 62 "max power saving latency for new devices; use PM QOS to change per device");
@@ -1342,7 +1342,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
1342 * transitioning between power states. Therefore, when running 1342 * transitioning between power states. Therefore, when running
1343 * in any given state, we will enter the next lower-power 1343 * in any given state, we will enter the next lower-power
1344 * non-operational state after waiting 50 * (enlat + exlat) 1344 * non-operational state after waiting 50 * (enlat + exlat)
1345 * microseconds, as long as that state's total latency is under 1345 * microseconds, as long as that state's exit latency is under
1346 * the requested maximum latency. 1346 * the requested maximum latency.
1347 * 1347 *
1348 * We will not autonomously enter any non-operational state for 1348 * We will not autonomously enter any non-operational state for
@@ -1387,7 +1387,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
1387 * lowest-power state, not the number of states. 1387 * lowest-power state, not the number of states.
1388 */ 1388 */
1389 for (state = (int)ctrl->npss; state >= 0; state--) { 1389 for (state = (int)ctrl->npss; state >= 0; state--) {
1390 u64 total_latency_us, transition_ms; 1390 u64 total_latency_us, exit_latency_us, transition_ms;
1391 1391
1392 if (target) 1392 if (target)
1393 table->entries[state] = target; 1393 table->entries[state] = target;
@@ -1408,12 +1408,15 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
1408 NVME_PS_FLAGS_NON_OP_STATE)) 1408 NVME_PS_FLAGS_NON_OP_STATE))
1409 continue; 1409 continue;
1410 1410
1411 total_latency_us = 1411 exit_latency_us =
1412 (u64)le32_to_cpu(ctrl->psd[state].entry_lat) + 1412 (u64)le32_to_cpu(ctrl->psd[state].exit_lat);
1413 + le32_to_cpu(ctrl->psd[state].exit_lat); 1413 if (exit_latency_us > ctrl->ps_max_latency_us)
1414 if (total_latency_us > ctrl->ps_max_latency_us)
1415 continue; 1414 continue;
1416 1415
1416 total_latency_us =
1417 exit_latency_us +
1418 le32_to_cpu(ctrl->psd[state].entry_lat);
1419
1417 /* 1420 /*
1418 * This state is good. Use it as the APST idle 1421 * This state is good. Use it as the APST idle
1419 * target for higher power states. 1422 * target for higher power states.
@@ -2438,6 +2441,10 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
2438 struct nvme_ns *ns; 2441 struct nvme_ns *ns;
2439 2442
2440 mutex_lock(&ctrl->namespaces_mutex); 2443 mutex_lock(&ctrl->namespaces_mutex);
2444
2445 /* Forcibly start all queues to avoid having stuck requests */
2446 blk_mq_start_hw_queues(ctrl->admin_q);
2447
2441 list_for_each_entry(ns, &ctrl->namespaces, list) { 2448 list_for_each_entry(ns, &ctrl->namespaces, list) {
2442 /* 2449 /*
2443 * Revalidating a dead namespace sets capacity to 0. This will 2450 * Revalidating a dead namespace sets capacity to 0. This will
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5b14cbefb724..92964cef0f4b 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1139,6 +1139,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
1139/* *********************** NVME Ctrl Routines **************************** */ 1139/* *********************** NVME Ctrl Routines **************************** */
1140 1140
1141static void __nvme_fc_final_op_cleanup(struct request *rq); 1141static void __nvme_fc_final_op_cleanup(struct request *rq);
1142static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
1142 1143
1143static int 1144static int
1144nvme_fc_reinit_request(void *data, struct request *rq) 1145nvme_fc_reinit_request(void *data, struct request *rq)
@@ -1265,7 +1266,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
1265 struct nvme_command *sqe = &op->cmd_iu.sqe; 1266 struct nvme_command *sqe = &op->cmd_iu.sqe;
1266 __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); 1267 __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
1267 union nvme_result result; 1268 union nvme_result result;
1268 bool complete_rq; 1269 bool complete_rq, terminate_assoc = true;
1269 1270
1270 /* 1271 /*
1271 * WARNING: 1272 * WARNING:
@@ -1294,6 +1295,14 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
1294 * fabricate a CQE, the following fields will not be set as they 1295 * fabricate a CQE, the following fields will not be set as they
1295 * are not referenced: 1296 * are not referenced:
1296 * cqe.sqid, cqe.sqhd, cqe.command_id 1297 * cqe.sqid, cqe.sqhd, cqe.command_id
1298 *
1299 * Failure or error of an individual i/o, in a transport
1300 * detected fashion unrelated to the nvme completion status,
1301 * potentially cause the initiator and target sides to get out
1302 * of sync on SQ head/tail (aka outstanding io count allowed).
1303 * Per FC-NVME spec, failure of an individual command requires
1304 * the connection to be terminated, which in turn requires the
1305 * association to be terminated.
1297 */ 1306 */
1298 1307
1299 fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, 1308 fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
@@ -1359,6 +1368,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
1359 goto done; 1368 goto done;
1360 } 1369 }
1361 1370
1371 terminate_assoc = false;
1372
1362done: 1373done:
1363 if (op->flags & FCOP_FLAGS_AEN) { 1374 if (op->flags & FCOP_FLAGS_AEN) {
1364 nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); 1375 nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
@@ -1366,7 +1377,7 @@ done:
1366 atomic_set(&op->state, FCPOP_STATE_IDLE); 1377 atomic_set(&op->state, FCPOP_STATE_IDLE);
1367 op->flags = FCOP_FLAGS_AEN; /* clear other flags */ 1378 op->flags = FCOP_FLAGS_AEN; /* clear other flags */
1368 nvme_fc_ctrl_put(ctrl); 1379 nvme_fc_ctrl_put(ctrl);
1369 return; 1380 goto check_error;
1370 } 1381 }
1371 1382
1372 complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); 1383 complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
@@ -1379,6 +1390,10 @@ done:
1379 nvme_end_request(rq, status, result); 1390 nvme_end_request(rq, status, result);
1380 } else 1391 } else
1381 __nvme_fc_final_op_cleanup(rq); 1392 __nvme_fc_final_op_cleanup(rq);
1393
1394check_error:
1395 if (terminate_assoc)
1396 nvme_fc_error_recovery(ctrl, "transport detected io error");
1382} 1397}
1383 1398
1384static int 1399static int
@@ -2791,6 +2806,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
2791 ctrl->ctrl.opts = NULL; 2806 ctrl->ctrl.opts = NULL;
2792 /* initiate nvme ctrl ref counting teardown */ 2807 /* initiate nvme ctrl ref counting teardown */
2793 nvme_uninit_ctrl(&ctrl->ctrl); 2808 nvme_uninit_ctrl(&ctrl->ctrl);
2809 nvme_put_ctrl(&ctrl->ctrl);
2794 2810
2795 /* as we're past the point where we transition to the ref 2811 /* as we're past the point where we transition to the ref
2796 * counting teardown path, if we return a bad pointer here, 2812 * counting teardown path, if we return a bad pointer here,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d52701df7245..951042a375d6 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1367,7 +1367,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
1367 bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO); 1367 bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
1368 1368
1369 /* If there is a reset ongoing, we shouldn't reset again. */ 1369 /* If there is a reset ongoing, we shouldn't reset again. */
1370 if (work_busy(&dev->reset_work)) 1370 if (dev->ctrl.state == NVME_CTRL_RESETTING)
1371 return false; 1371 return false;
1372 1372
1373 /* We shouldn't reset unless the controller is on fatal error state 1373 /* We shouldn't reset unless the controller is on fatal error state
@@ -1903,7 +1903,7 @@ static void nvme_reset_work(struct work_struct *work)
1903 bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); 1903 bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
1904 int result = -ENODEV; 1904 int result = -ENODEV;
1905 1905
1906 if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING)) 1906 if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING))
1907 goto out; 1907 goto out;
1908 1908
1909 /* 1909 /*
@@ -1913,9 +1913,6 @@ static void nvme_reset_work(struct work_struct *work)
1913 if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) 1913 if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
1914 nvme_dev_disable(dev, false); 1914 nvme_dev_disable(dev, false);
1915 1915
1916 if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
1917 goto out;
1918
1919 result = nvme_pci_enable(dev); 1916 result = nvme_pci_enable(dev);
1920 if (result) 1917 if (result)
1921 goto out; 1918 goto out;
@@ -2009,8 +2006,8 @@ static int nvme_reset(struct nvme_dev *dev)
2009{ 2006{
2010 if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q)) 2007 if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
2011 return -ENODEV; 2008 return -ENODEV;
2012 if (work_busy(&dev->reset_work)) 2009 if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
2013 return -ENODEV; 2010 return -EBUSY;
2014 if (!queue_work(nvme_workq, &dev->reset_work)) 2011 if (!queue_work(nvme_workq, &dev->reset_work))
2015 return -EBUSY; 2012 return -EBUSY;
2016 return 0; 2013 return 0;
@@ -2136,6 +2133,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2136 if (result) 2133 if (result)
2137 goto release_pools; 2134 goto release_pools;
2138 2135
2136 nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
2139 dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); 2137 dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
2140 2138
2141 queue_work(nvme_workq, &dev->reset_work); 2139 queue_work(nvme_workq, &dev->reset_work);
@@ -2179,6 +2177,7 @@ static void nvme_remove(struct pci_dev *pdev)
2179 2177
2180 nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); 2178 nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
2181 2179
2180 cancel_work_sync(&dev->reset_work);
2182 pci_set_drvdata(pdev, NULL); 2181 pci_set_drvdata(pdev, NULL);
2183 2182
2184 if (!pci_device_is_present(pdev)) { 2183 if (!pci_device_is_present(pdev)) {
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 28bd255c144d..24397d306d53 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -753,28 +753,26 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
753 if (ret) 753 if (ret)
754 goto requeue; 754 goto requeue;
755 755
756 blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
757
758 ret = nvmf_connect_admin_queue(&ctrl->ctrl); 756 ret = nvmf_connect_admin_queue(&ctrl->ctrl);
759 if (ret) 757 if (ret)
760 goto stop_admin_q; 758 goto requeue;
761 759
762 set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); 760 set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
763 761
764 ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); 762 ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
765 if (ret) 763 if (ret)
766 goto stop_admin_q; 764 goto requeue;
767 765
768 nvme_start_keep_alive(&ctrl->ctrl); 766 nvme_start_keep_alive(&ctrl->ctrl);
769 767
770 if (ctrl->queue_count > 1) { 768 if (ctrl->queue_count > 1) {
771 ret = nvme_rdma_init_io_queues(ctrl); 769 ret = nvme_rdma_init_io_queues(ctrl);
772 if (ret) 770 if (ret)
773 goto stop_admin_q; 771 goto requeue;
774 772
775 ret = nvme_rdma_connect_io_queues(ctrl); 773 ret = nvme_rdma_connect_io_queues(ctrl);
776 if (ret) 774 if (ret)
777 goto stop_admin_q; 775 goto requeue;
778 } 776 }
779 777
780 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 778 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@ -782,7 +780,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
782 ctrl->ctrl.opts->nr_reconnects = 0; 780 ctrl->ctrl.opts->nr_reconnects = 0;
783 781
784 if (ctrl->queue_count > 1) { 782 if (ctrl->queue_count > 1) {
785 nvme_start_queues(&ctrl->ctrl);
786 nvme_queue_scan(&ctrl->ctrl); 783 nvme_queue_scan(&ctrl->ctrl);
787 nvme_queue_async_events(&ctrl->ctrl); 784 nvme_queue_async_events(&ctrl->ctrl);
788 } 785 }
@@ -791,8 +788,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
791 788
792 return; 789 return;
793 790
794stop_admin_q:
795 blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
796requeue: 791requeue:
797 dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", 792 dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
798 ctrl->ctrl.opts->nr_reconnects); 793 ctrl->ctrl.opts->nr_reconnects);
@@ -823,6 +818,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
823 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 818 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
824 nvme_cancel_request, &ctrl->ctrl); 819 nvme_cancel_request, &ctrl->ctrl);
825 820
821 /*
822 * queues are not a live anymore, so restart the queues to fail fast
823 * new IO
824 */
825 blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
826 nvme_start_queues(&ctrl->ctrl);
827
826 nvme_rdma_reconnect_or_remove(ctrl); 828 nvme_rdma_reconnect_or_remove(ctrl);
827} 829}
828 830
@@ -1433,7 +1435,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
1433/* 1435/*
1434 * We cannot accept any other command until the Connect command has completed. 1436 * We cannot accept any other command until the Connect command has completed.
1435 */ 1437 */
1436static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, 1438static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
1437 struct request *rq) 1439 struct request *rq)
1438{ 1440{
1439 if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { 1441 if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
@@ -1441,11 +1443,22 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
1441 1443
1442 if (!blk_rq_is_passthrough(rq) || 1444 if (!blk_rq_is_passthrough(rq) ||
1443 cmd->common.opcode != nvme_fabrics_command || 1445 cmd->common.opcode != nvme_fabrics_command ||
1444 cmd->fabrics.fctype != nvme_fabrics_type_connect) 1446 cmd->fabrics.fctype != nvme_fabrics_type_connect) {
1445 return false; 1447 /*
1448 * reconnecting state means transport disruption, which
1449 * can take a long time and even might fail permanently,
1450 * so we can't let incoming I/O be requeued forever.
1451 * fail it fast to allow upper layers a chance to
1452 * failover.
1453 */
1454 if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
1455 return -EIO;
1456 else
1457 return -EAGAIN;
1458 }
1446 } 1459 }
1447 1460
1448 return true; 1461 return 0;
1449} 1462}
1450 1463
1451static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, 1464static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -1463,8 +1476,9 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1463 1476
1464 WARN_ON_ONCE(rq->tag < 0); 1477 WARN_ON_ONCE(rq->tag < 0);
1465 1478
1466 if (!nvme_rdma_queue_is_ready(queue, rq)) 1479 ret = nvme_rdma_queue_is_ready(queue, rq);
1467 return BLK_MQ_RQ_QUEUE_BUSY; 1480 if (unlikely(ret))
1481 goto err;
1468 1482
1469 dev = queue->device->dev; 1483 dev = queue->device->dev;
1470 ib_dma_sync_single_for_cpu(dev, sqe->dma, 1484 ib_dma_sync_single_for_cpu(dev, sqe->dma,