aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSagi Grimberg <sagi@grimberg.me>2016-06-13 10:45:28 -0400
committerJens Axboe <axboe@fb.com>2016-07-05 13:28:20 -0400
commit038bd4cb6766c69b5b9c77507f389cc718a36842 (patch)
treef551977e651401dadf30bb47bc9756df5f922901
parent7b89eae29eec4dd9259acd82ed57bec8d9e430ca (diff)
nvme: add keep-alive support
Periodic keep-alive is a mandatory feature in NVMe over Fabrics, and optional in NVMe 1.2.1 for PCIe. This patch adds periodic keep-alive sent from the host to verify that the controller is still responsive and vice-versa. The keep-alive timeout is user-defined (with keep_alive_tmo connection parameter) and defaults to 5 seconds. In order to avoid a race condition where the host sends a keep-alive competing with the target side keep-alive timeout expiration, the host adds a grace period of 10 seconds when publishing the keep-alive timeout to the target. In case a keep-alive failed (or timed out), a transport specific error recovery kicks in. For now only NVMe over Fabrics is wired up to support keep alive, but we can add PCIe support easily once controllers actually supporting it become available. Signed-off-by: Sagi Grimberg <sagi@grimberg.me> Reviewed-by: Steve Wise <swise@chelsio.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--drivers/nvme/host/core.c76
-rw-r--r--drivers/nvme/host/fabrics.c33
-rw-r--r--drivers/nvme/host/fabrics.h3
-rw-r--r--drivers/nvme/host/nvme.h8
4 files changed, 119 insertions, 1 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e5c1d752c8f3..c01687d61009 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -30,6 +30,7 @@
30#include <asm/unaligned.h> 30#include <asm/unaligned.h>
31 31
32#include "nvme.h" 32#include "nvme.h"
33#include "fabrics.h"
33 34
34#define NVME_MINORS (1U << MINORBITS) 35#define NVME_MINORS (1U << MINORBITS)
35 36
@@ -463,6 +464,74 @@ int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
463 result, timeout); 464 result, timeout);
464} 465}
465 466
467static void nvme_keep_alive_end_io(struct request *rq, int error)
468{
469 struct nvme_ctrl *ctrl = rq->end_io_data;
470
471 blk_mq_free_request(rq);
472
473 if (error) {
474 dev_err(ctrl->device,
475 "failed nvme_keep_alive_end_io error=%d\n", error);
476 return;
477 }
478
479 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
480}
481
482static int nvme_keep_alive(struct nvme_ctrl *ctrl)
483{
484 struct nvme_command c;
485 struct request *rq;
486
487 memset(&c, 0, sizeof(c));
488 c.common.opcode = nvme_admin_keep_alive;
489
490 rq = nvme_alloc_request(ctrl->admin_q, &c, BLK_MQ_REQ_RESERVED,
491 NVME_QID_ANY);
492 if (IS_ERR(rq))
493 return PTR_ERR(rq);
494
495 rq->timeout = ctrl->kato * HZ;
496 rq->end_io_data = ctrl;
497
498 blk_execute_rq_nowait(rq->q, NULL, rq, 0, nvme_keep_alive_end_io);
499
500 return 0;
501}
502
503static void nvme_keep_alive_work(struct work_struct *work)
504{
505 struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
506 struct nvme_ctrl, ka_work);
507
508 if (nvme_keep_alive(ctrl)) {
509 /* allocation failure, reset the controller */
510 dev_err(ctrl->device, "keep-alive failed\n");
511 ctrl->ops->reset_ctrl(ctrl);
512 return;
513 }
514}
515
516void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
517{
518 if (unlikely(ctrl->kato == 0))
519 return;
520
521 INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
522 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
523}
524EXPORT_SYMBOL_GPL(nvme_start_keep_alive);
525
526void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
527{
528 if (unlikely(ctrl->kato == 0))
529 return;
530
531 cancel_delayed_work_sync(&ctrl->ka_work);
532}
533EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
534
466int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) 535int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
467{ 536{
468 struct nvme_command c = { }; 537 struct nvme_command c = { };
@@ -1179,6 +1248,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
1179 1248
1180 nvme_set_queue_limits(ctrl, ctrl->admin_q); 1249 nvme_set_queue_limits(ctrl, ctrl->admin_q);
1181 ctrl->sgls = le32_to_cpu(id->sgls); 1250 ctrl->sgls = le32_to_cpu(id->sgls);
1251 ctrl->kas = le16_to_cpu(id->kas);
1182 1252
1183 if (ctrl->ops->is_fabrics) { 1253 if (ctrl->ops->is_fabrics) {
1184 ctrl->icdoff = le16_to_cpu(id->icdoff); 1254 ctrl->icdoff = le16_to_cpu(id->icdoff);
@@ -1192,6 +1262,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
1192 */ 1262 */
1193 if (ctrl->cntlid != le16_to_cpu(id->cntlid)) 1263 if (ctrl->cntlid != le16_to_cpu(id->cntlid))
1194 ret = -EINVAL; 1264 ret = -EINVAL;
1265
1266 if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
1267 dev_err(ctrl->dev,
1268 "keep-alive support is mandatory for fabrics\n");
1269 ret = -EINVAL;
1270 }
1195 } else { 1271 } else {
1196 ctrl->cntlid = le16_to_cpu(id->cntlid); 1272 ctrl->cntlid = le16_to_cpu(id->cntlid);
1197 } 1273 }
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index efa86d0e4c9f..b86b6379ef0c 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -360,6 +360,12 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
360 cmd.connect.fctype = nvme_fabrics_type_connect; 360 cmd.connect.fctype = nvme_fabrics_type_connect;
361 cmd.connect.qid = 0; 361 cmd.connect.qid = 0;
362 cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize); 362 cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize);
363 /*
364 * Set keep-alive timeout in seconds granularity (ms * 1000)
365 * and add a grace period for controller kato enforcement
366 */
367 cmd.connect.kato = ctrl->opts->discovery_nqn ? 0 :
368 cpu_to_le32((ctrl->kato + NVME_KATO_GRACE) * 1000);
363 369
364 data = kzalloc(sizeof(*data), GFP_KERNEL); 370 data = kzalloc(sizeof(*data), GFP_KERNEL);
365 if (!data) 371 if (!data)
@@ -499,6 +505,7 @@ static const match_table_t opt_tokens = {
499 { NVMF_OPT_NR_IO_QUEUES, "nr_io_queues=%d" }, 505 { NVMF_OPT_NR_IO_QUEUES, "nr_io_queues=%d" },
500 { NVMF_OPT_TL_RETRY_COUNT, "tl_retry_count=%d" }, 506 { NVMF_OPT_TL_RETRY_COUNT, "tl_retry_count=%d" },
501 { NVMF_OPT_RECONNECT_DELAY, "reconnect_delay=%d" }, 507 { NVMF_OPT_RECONNECT_DELAY, "reconnect_delay=%d" },
508 { NVMF_OPT_KATO, "keep_alive_tmo=%d" },
502 { NVMF_OPT_HOSTNQN, "hostnqn=%s" }, 509 { NVMF_OPT_HOSTNQN, "hostnqn=%s" },
503 { NVMF_OPT_ERR, NULL } 510 { NVMF_OPT_ERR, NULL }
504}; 511};
@@ -610,6 +617,28 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
610 } 617 }
611 opts->tl_retry_count = token; 618 opts->tl_retry_count = token;
612 break; 619 break;
620 case NVMF_OPT_KATO:
621 if (match_int(args, &token)) {
622 ret = -EINVAL;
623 goto out;
624 }
625
626 if (opts->discovery_nqn) {
627 pr_err("Discovery controllers cannot accept keep_alive_tmo != 0\n");
628 ret = -EINVAL;
629 goto out;
630 }
631
632 if (token < 0) {
633 pr_err("Invalid keep_alive_tmo %d\n", token);
634 ret = -EINVAL;
635 goto out;
636 } else if (token == 0) {
637 /* Allowed for debug */
638 pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n");
639 }
640 opts->kato = token;
641 break;
613 case NVMF_OPT_HOSTNQN: 642 case NVMF_OPT_HOSTNQN:
614 if (opts->host) { 643 if (opts->host) {
615 pr_err("hostnqn already user-assigned: %s\n", 644 pr_err("hostnqn already user-assigned: %s\n",
@@ -661,6 +690,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
661 } 690 }
662 691
663out: 692out:
693 if (!opts->discovery_nqn && !opts->kato)
694 opts->kato = NVME_DEFAULT_KATO;
664 kfree(options); 695 kfree(options);
665 return ret; 696 return ret;
666} 697}
@@ -717,7 +748,7 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
717 748
718#define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN) 749#define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
719#define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \ 750#define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
720 NVMF_OPT_HOSTNQN) 751 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN)
721 752
722static struct nvme_ctrl * 753static struct nvme_ctrl *
723nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) 754nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 120383088bc1..b54067404963 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -49,6 +49,7 @@ enum {
49 NVMF_OPT_QUEUE_SIZE = 1 << 4, 49 NVMF_OPT_QUEUE_SIZE = 1 << 4,
50 NVMF_OPT_NR_IO_QUEUES = 1 << 5, 50 NVMF_OPT_NR_IO_QUEUES = 1 << 5,
51 NVMF_OPT_TL_RETRY_COUNT = 1 << 6, 51 NVMF_OPT_TL_RETRY_COUNT = 1 << 6,
52 NVMF_OPT_KATO = 1 << 7,
52 NVMF_OPT_HOSTNQN = 1 << 8, 53 NVMF_OPT_HOSTNQN = 1 << 8,
53 NVMF_OPT_RECONNECT_DELAY = 1 << 9, 54 NVMF_OPT_RECONNECT_DELAY = 1 << 9,
54}; 55};
@@ -72,6 +73,7 @@ enum {
72 * kicking upper layer(s) error recovery. 73 * kicking upper layer(s) error recovery.
73 * @reconnect_delay: Time between two consecutive reconnect attempts. 74 * @reconnect_delay: Time between two consecutive reconnect attempts.
74 * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN. 75 * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN.
76 * @kato: Keep-alive timeout.
75 * @host: Virtual NVMe host, contains the NQN and Host ID. 77 * @host: Virtual NVMe host, contains the NQN and Host ID.
76 */ 78 */
77struct nvmf_ctrl_options { 79struct nvmf_ctrl_options {
@@ -85,6 +87,7 @@ struct nvmf_ctrl_options {
85 unsigned short tl_retry_count; 87 unsigned short tl_retry_count;
86 unsigned int reconnect_delay; 88 unsigned int reconnect_delay;
87 bool discovery_nqn; 89 bool discovery_nqn;
90 unsigned int kato;
88 struct nvmf_host *host; 91 struct nvmf_host *host;
89}; 92};
90 93
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index ef28d4e33bf1..8d8cbc437699 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -38,6 +38,9 @@ extern unsigned char admin_timeout;
38extern unsigned char shutdown_timeout; 38extern unsigned char shutdown_timeout;
39#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ) 39#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ)
40 40
41#define NVME_DEFAULT_KATO 5
42#define NVME_KATO_GRACE 10
43
41enum { 44enum {
42 NVME_NS_LBA = 0, 45 NVME_NS_LBA = 0,
43 NVME_NS_LIGHTNVM = 1, 46 NVME_NS_LIGHTNVM = 1,
@@ -109,10 +112,13 @@ struct nvme_ctrl {
109 u8 vwc; 112 u8 vwc;
110 u32 vs; 113 u32 vs;
111 u32 sgls; 114 u32 sgls;
115 u16 kas;
116 unsigned int kato;
112 bool subsystem; 117 bool subsystem;
113 unsigned long quirks; 118 unsigned long quirks;
114 struct work_struct scan_work; 119 struct work_struct scan_work;
115 struct work_struct async_event_work; 120 struct work_struct async_event_work;
121 struct delayed_work ka_work;
116 122
117 /* Fabrics only */ 123 /* Fabrics only */
118 u16 sqsize; 124 u16 sqsize;
@@ -273,6 +279,8 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
273int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, 279int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
274 dma_addr_t dma_addr, u32 *result); 280 dma_addr_t dma_addr, u32 *result);
275int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); 281int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
282void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
283void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
276 284
277struct sg_io_hdr; 285struct sg_io_hdr;
278 286