aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/nvme-core.c
diff options
context:
space:
mode:
authorKeith Busch <keith.busch@intel.com>2015-03-31 12:37:17 -0400
committerJens Axboe <axboe@fb.com>2015-03-31 12:39:56 -0400
commit1efccc9ddb98fd533169669160201b027562af7e (patch)
tree6d7680fe16e50bb0fdd72e41d312accd80634fc7 /drivers/block/nvme-core.c
parentfda631ffe5422424579e1649e04cc468d0215b85 (diff)
NVMe: Fix blk-mq hot cpu notification
The driver may issue commands to a device that may never return, so its request_queue could always have active requests while the controller is running. Waiting for the queue to freeze could block forever, which is what blk-mq's hot cpu notification handler was doing when nvme drives were in use. This has the nvme driver make the asynchronous event command's tag reserved and does not keep the request active. We can't have more than one since the request is released back to the request_queue before the command is completed. Having only one avoids potential tag collisions, and reserving the tag for this purpose prevents other admin tasks from reusing the tag. I also couldn't think of a scenario where issuing AEN requests single depth is worse than issuing them in batches, so I don't think we lose anything with this change. As an added bonus, doing it this way removes "Cancelling I/O" warnings observed when unbinding the nvme driver from a device. Reported-by: Yigal Korman <yigal@plexistor.com> Signed-off-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'drivers/block/nvme-core.c')
-rw-r--r--drivers/block/nvme-core.c12
1 files changed, 5 insertions, 7 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 7ed618125c27..c12c95cf2e55 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -302,8 +302,6 @@ static void *cancel_cmd_info(struct nvme_cmd_info *cmd, nvme_completion_fn *fn)
302static void async_req_completion(struct nvme_queue *nvmeq, void *ctx, 302static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
303 struct nvme_completion *cqe) 303 struct nvme_completion *cqe)
304{ 304{
305 struct request *req = ctx;
306
307 u32 result = le32_to_cpup(&cqe->result); 305 u32 result = le32_to_cpup(&cqe->result);
308 u16 status = le16_to_cpup(&cqe->status) >> 1; 306 u16 status = le16_to_cpup(&cqe->status) >> 1;
309 307
@@ -312,8 +310,6 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
312 if (status == NVME_SC_SUCCESS) 310 if (status == NVME_SC_SUCCESS)
313 dev_warn(nvmeq->q_dmadev, 311 dev_warn(nvmeq->q_dmadev,
314 "async event result %08x\n", result); 312 "async event result %08x\n", result);
315
316 blk_mq_free_hctx_request(nvmeq->hctx, req);
317} 313}
318 314
319static void abort_completion(struct nvme_queue *nvmeq, void *ctx, 315static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -1027,18 +1023,19 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
1027 struct nvme_cmd_info *cmd_info; 1023 struct nvme_cmd_info *cmd_info;
1028 struct request *req; 1024 struct request *req;
1029 1025
1030 req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, false); 1026 req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, true);
1031 if (IS_ERR(req)) 1027 if (IS_ERR(req))
1032 return PTR_ERR(req); 1028 return PTR_ERR(req);
1033 1029
1034 req->cmd_flags |= REQ_NO_TIMEOUT; 1030 req->cmd_flags |= REQ_NO_TIMEOUT;
1035 cmd_info = blk_mq_rq_to_pdu(req); 1031 cmd_info = blk_mq_rq_to_pdu(req);
1036 nvme_set_info(cmd_info, req, async_req_completion); 1032 nvme_set_info(cmd_info, NULL, async_req_completion);
1037 1033
1038 memset(&c, 0, sizeof(c)); 1034 memset(&c, 0, sizeof(c));
1039 c.common.opcode = nvme_admin_async_event; 1035 c.common.opcode = nvme_admin_async_event;
1040 c.common.command_id = req->tag; 1036 c.common.command_id = req->tag;
1041 1037
1038 blk_mq_free_hctx_request(nvmeq->hctx, req);
1042 return __nvme_submit_cmd(nvmeq, &c); 1039 return __nvme_submit_cmd(nvmeq, &c);
1043} 1040}
1044 1041
@@ -1583,6 +1580,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
1583 dev->admin_tagset.ops = &nvme_mq_admin_ops; 1580 dev->admin_tagset.ops = &nvme_mq_admin_ops;
1584 dev->admin_tagset.nr_hw_queues = 1; 1581 dev->admin_tagset.nr_hw_queues = 1;
1585 dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1; 1582 dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
1583 dev->admin_tagset.reserved_tags = 1;
1586 dev->admin_tagset.timeout = ADMIN_TIMEOUT; 1584 dev->admin_tagset.timeout = ADMIN_TIMEOUT;
1587 dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev); 1585 dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
1588 dev->admin_tagset.cmd_size = nvme_cmd_size(dev); 1586 dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
@@ -2334,7 +2332,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
2334 dev->oncs = le16_to_cpup(&ctrl->oncs); 2332 dev->oncs = le16_to_cpup(&ctrl->oncs);
2335 dev->abort_limit = ctrl->acl + 1; 2333 dev->abort_limit = ctrl->acl + 1;
2336 dev->vwc = ctrl->vwc; 2334 dev->vwc = ctrl->vwc;
2337 dev->event_limit = min(ctrl->aerl + 1, 8);
2338 memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); 2335 memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
2339 memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); 2336 memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
2340 memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); 2337 memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
@@ -2881,6 +2878,7 @@ static int nvme_dev_start(struct nvme_dev *dev)
2881 2878
2882 nvme_set_irq_hints(dev); 2879 nvme_set_irq_hints(dev);
2883 2880
2881 dev->event_limit = 1;
2884 return result; 2882 return result;
2885 2883
2886 free_tags: 2884 free_tags: