aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Smart <jsmart2021@gmail.com>2018-04-12 11:16:15 -0400
committerJens Axboe <axboe@kernel.dk>2018-04-12 11:58:27 -0400
commitbb06ec31452fb2da1594f88035c2ecea4e0652f4 (patch)
tree9cdc51d8adefdd4875a3ebf429531ff13da4aef0
parent62843c2e4226057c83f520c74fe9c81a1891c331 (diff)
nvme: expand nvmf_check_if_ready checks
The nvmf_check_if_ready() checks that were added are very simplistic. As such, the routine allows a lot of cases to fail ios during windows of reset or re-connection. In cases where there are not multi-path options present, the error goes back to the callee - the filesystem or application. Not good. The common routine was rewritten and calling syntax slightly expanded so that per-transport is_ready routines don't need to be present. The transports now call the routine directly. The routine is now a fabrics routine rather than an inline function. The routine now looks at controller state to decide the action to take. Some states mandate io failure. Others define the condition where a command can be accepted. When the decision is unclear, a generic queue-or-reject check is made to look for failfast or multipath ios and only fails the io if it is so marked. Otherwise, the io will be queued and wait for the controller state to resolve. Admin commands issued via ioctl share a live admin queue with commands from the transport for controller init. The ioctls could be intermixed with the initialization commands. It's possible for the ioctl cmd to be issued prior to the controller being enabled. To block this, the ioctl admin commands need to be distinguished from admin commands used for controller init. Added a USERCMD nvme_req(req)->rq_flags bit to reflect this division and set it on ioctls requests. As the nvmf_check_if_ready() routine is called prior to nvme_setup_cmd(), ensure that commands allocated by the ioctl path (actually anything in core.c) preps the nvme_req(req) before starting the io. This will preserve the USERCMD flag during execution and/or retry. Signed-off-by: James Smart <james.smart@broadcom.com> Reviewed-by: Sagi Grimberg <sagi@grimberg.e> Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de> Signed-off-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--drivers/nvme/host/core.c17
-rw-r--r--drivers/nvme/host/fabrics.c79
-rw-r--r--drivers/nvme/host/fabrics.h33
-rw-r--r--drivers/nvme/host/fc.c12
-rw-r--r--drivers/nvme/host/nvme.h1
-rw-r--r--drivers/nvme/host/rdma.c14
-rw-r--r--drivers/nvme/target/loop.c11
7 files changed, 101 insertions, 66 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index aac3c1d2b2a2..9df4f71e58ca 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -376,6 +376,15 @@ static void nvme_put_ns(struct nvme_ns *ns)
376 kref_put(&ns->kref, nvme_free_ns); 376 kref_put(&ns->kref, nvme_free_ns);
377} 377}
378 378
379static inline void nvme_clear_nvme_request(struct request *req)
380{
381 if (!(req->rq_flags & RQF_DONTPREP)) {
382 nvme_req(req)->retries = 0;
383 nvme_req(req)->flags = 0;
384 req->rq_flags |= RQF_DONTPREP;
385 }
386}
387
379struct request *nvme_alloc_request(struct request_queue *q, 388struct request *nvme_alloc_request(struct request_queue *q,
380 struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) 389 struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid)
381{ 390{
@@ -392,6 +401,7 @@ struct request *nvme_alloc_request(struct request_queue *q,
392 return req; 401 return req;
393 402
394 req->cmd_flags |= REQ_FAILFAST_DRIVER; 403 req->cmd_flags |= REQ_FAILFAST_DRIVER;
404 nvme_clear_nvme_request(req);
395 nvme_req(req)->cmd = cmd; 405 nvme_req(req)->cmd = cmd;
396 406
397 return req; 407 return req;
@@ -608,11 +618,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
608{ 618{
609 blk_status_t ret = BLK_STS_OK; 619 blk_status_t ret = BLK_STS_OK;
610 620
611 if (!(req->rq_flags & RQF_DONTPREP)) { 621 nvme_clear_nvme_request(req);
612 nvme_req(req)->retries = 0;
613 nvme_req(req)->flags = 0;
614 req->rq_flags |= RQF_DONTPREP;
615 }
616 622
617 switch (req_op(req)) { 623 switch (req_op(req)) {
618 case REQ_OP_DRV_IN: 624 case REQ_OP_DRV_IN:
@@ -742,6 +748,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
742 return PTR_ERR(req); 748 return PTR_ERR(req);
743 749
744 req->timeout = timeout ? timeout : ADMIN_TIMEOUT; 750 req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
751 nvme_req(req)->flags |= NVME_REQ_USERCMD;
745 752
746 if (ubuffer && bufflen) { 753 if (ubuffer && bufflen) {
747 ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, 754 ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 3583f9492a45..124c458806df 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -536,6 +536,85 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
536 return NULL; 536 return NULL;
537} 537}
538 538
539blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq,
540 bool queue_live, bool is_connected)
541{
542 struct nvme_command *cmd = nvme_req(rq)->cmd;
543
544 if (likely(ctrl->state == NVME_CTRL_LIVE && is_connected))
545 return BLK_STS_OK;
546
547 switch (ctrl->state) {
548 case NVME_CTRL_DELETING:
549 goto reject_io;
550
551 case NVME_CTRL_NEW:
552 case NVME_CTRL_CONNECTING:
553 if (!is_connected)
554 /*
555 * This is the case of starting a new
556 * association but connectivity was lost
557 * before it was fully created. We need to
558 * error the commands used to initialize the
559 * controller so the reconnect can go into a
560 * retry attempt. The commands should all be
561 * marked REQ_FAILFAST_DRIVER, which will hit
562 * the reject path below. Anything else will
563 * be queued while the state settles.
564 */
565 goto reject_or_queue_io;
566
567 if ((queue_live &&
568 !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) ||
569 (!queue_live && blk_rq_is_passthrough(rq) &&
570 cmd->common.opcode == nvme_fabrics_command &&
571 cmd->fabrics.fctype == nvme_fabrics_type_connect))
572 /*
573 * If queue is live, allow only commands that
574 * are internally generated pass through. These
575 * are commands on the admin queue to initialize
576 * the controller. This will reject any ioctl
577 * admin cmds received while initializing.
578 *
579 * If the queue is not live, allow only a
580 * connect command. This will reject any ioctl
581 * admin cmd as well as initialization commands
582 * if the controller reverted the queue to non-live.
583 */
584 return BLK_STS_OK;
585
586 /*
587 * fall-thru to the reject_or_queue_io clause
588 */
589 break;
590
591 /* these cases fall-thru
592 * case NVME_CTRL_LIVE:
593 * case NVME_CTRL_RESETTING:
594 */
595 default:
596 break;
597 }
598
599reject_or_queue_io:
600 /*
601 * Any other new io is something we're not in a state to send
602 * to the device. Default action is to busy it and retry it
603 * after the controller state is recovered. However, anything
604 * marked for failfast or nvme multipath is immediately failed.
605 * Note: commands used to initialize the controller will be
606 * marked for failfast.
607 * Note: nvme cli/ioctl commands are marked for failfast.
608 */
609 if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
610 return BLK_STS_RESOURCE;
611
612reject_io:
613 nvme_req(rq)->status = NVME_SC_ABORT_REQ;
614 return BLK_STS_IOERR;
615}
616EXPORT_SYMBOL_GPL(nvmf_check_if_ready);
617
539static const match_table_t opt_tokens = { 618static const match_table_t opt_tokens = {
540 { NVMF_OPT_TRANSPORT, "transport=%s" }, 619 { NVMF_OPT_TRANSPORT, "transport=%s" },
541 { NVMF_OPT_TRADDR, "traddr=%s" }, 620 { NVMF_OPT_TRADDR, "traddr=%s" },
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index a3145d90c1d2..ef46c915b7b5 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -157,36 +157,7 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
157void nvmf_free_options(struct nvmf_ctrl_options *opts); 157void nvmf_free_options(struct nvmf_ctrl_options *opts);
158int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); 158int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
159bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); 159bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
160 160blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl,
161static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl, 161 struct request *rq, bool queue_live, bool is_connected);
162 struct request *rq)
163{
164 struct nvme_command *cmd = nvme_req(rq)->cmd;
165
166 /*
167 * We cannot accept any other command until the connect command has
168 * completed, so only allow connect to pass.
169 */
170 if (!blk_rq_is_passthrough(rq) ||
171 cmd->common.opcode != nvme_fabrics_command ||
172 cmd->fabrics.fctype != nvme_fabrics_type_connect) {
173 /*
174 * Connecting state means transport disruption or initial
175 * establishment, which can take a long time and even might
176 * fail permanently, fail fast to give upper layers a chance
177 * to failover.
178 * Deleting state means that the ctrl will never accept commands
179 * again, fail it permanently.
180 */
181 if (ctrl->state == NVME_CTRL_CONNECTING ||
182 ctrl->state == NVME_CTRL_DELETING) {
183 nvme_req(rq)->status = NVME_SC_ABORT_REQ;
184 return BLK_STS_IOERR;
185 }
186 return BLK_STS_RESOURCE; /* try again later */
187 }
188
189 return BLK_STS_OK;
190}
191 162
192#endif /* _NVME_FABRICS_H */ 163#endif /* _NVME_FABRICS_H */
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index c6e719b2f3ca..6cb26bcf6ec0 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2277,14 +2277,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
2277 return BLK_STS_OK; 2277 return BLK_STS_OK;
2278} 2278}
2279 2279
2280static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue,
2281 struct request *rq)
2282{
2283 if (unlikely(!test_bit(NVME_FC_Q_LIVE, &queue->flags)))
2284 return nvmf_check_init_req(&queue->ctrl->ctrl, rq);
2285 return BLK_STS_OK;
2286}
2287
2288static blk_status_t 2280static blk_status_t
2289nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, 2281nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
2290 const struct blk_mq_queue_data *bd) 2282 const struct blk_mq_queue_data *bd)
@@ -2300,7 +2292,9 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
2300 u32 data_len; 2292 u32 data_len;
2301 blk_status_t ret; 2293 blk_status_t ret;
2302 2294
2303 ret = nvme_fc_is_ready(queue, rq); 2295 ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq,
2296 test_bit(NVME_FC_Q_LIVE, &queue->flags),
2297 ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE);
2304 if (unlikely(ret)) 2298 if (unlikely(ret))
2305 return ret; 2299 return ret;
2306 2300
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 08c4cff79cde..061fecfd44f5 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -105,6 +105,7 @@ struct nvme_request {
105 105
106enum { 106enum {
107 NVME_REQ_CANCELLED = (1 << 0), 107 NVME_REQ_CANCELLED = (1 << 0),
108 NVME_REQ_USERCMD = (1 << 1),
108}; 109};
109 110
110static inline struct nvme_request *nvme_req(struct request *req) 111static inline struct nvme_request *nvme_req(struct request *req)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 758537e9ba07..1eb4438a8763 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1601,17 +1601,6 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
1601 return BLK_EH_HANDLED; 1601 return BLK_EH_HANDLED;
1602} 1602}
1603 1603
1604/*
1605 * We cannot accept any other command until the Connect command has completed.
1606 */
1607static inline blk_status_t
1608nvme_rdma_is_ready(struct nvme_rdma_queue *queue, struct request *rq)
1609{
1610 if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags)))
1611 return nvmf_check_init_req(&queue->ctrl->ctrl, rq);
1612 return BLK_STS_OK;
1613}
1614
1615static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, 1604static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1616 const struct blk_mq_queue_data *bd) 1605 const struct blk_mq_queue_data *bd)
1617{ 1606{
@@ -1627,7 +1616,8 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1627 1616
1628 WARN_ON_ONCE(rq->tag < 0); 1617 WARN_ON_ONCE(rq->tag < 0);
1629 1618
1630 ret = nvme_rdma_is_ready(queue, rq); 1619 ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq,
1620 test_bit(NVME_RDMA_Q_LIVE, &queue->flags), true);
1631 if (unlikely(ret)) 1621 if (unlikely(ret))
1632 return ret; 1622 return ret;
1633 1623
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index b9d5b69d8548..31fdfba556a8 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -149,14 +149,6 @@ nvme_loop_timeout(struct request *rq, bool reserved)
149 return BLK_EH_HANDLED; 149 return BLK_EH_HANDLED;
150} 150}
151 151
152static inline blk_status_t nvme_loop_is_ready(struct nvme_loop_queue *queue,
153 struct request *rq)
154{
155 if (unlikely(!test_bit(NVME_LOOP_Q_LIVE, &queue->flags)))
156 return nvmf_check_init_req(&queue->ctrl->ctrl, rq);
157 return BLK_STS_OK;
158}
159
160static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, 152static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
161 const struct blk_mq_queue_data *bd) 153 const struct blk_mq_queue_data *bd)
162{ 154{
@@ -166,7 +158,8 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
166 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); 158 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
167 blk_status_t ret; 159 blk_status_t ret;
168 160
169 ret = nvme_loop_is_ready(queue, req); 161 ret = nvmf_check_if_ready(&queue->ctrl->ctrl, req,
162 test_bit(NVME_LOOP_Q_LIVE, &queue->flags), true);
170 if (unlikely(ret)) 163 if (unlikely(ret))
171 return ret; 164 return ret;
172 165