aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2017-11-29 11:21:50 -0500
committerJens Axboe <axboe@kernel.dk>2017-11-29 11:21:50 -0500
commited565371e368f014db237aacf42b27b40b1bd247 (patch)
tree3d950c90e216b27175ccfb193002c798af4f5fc2
parent2967acbb257a6a9bf912f4778b727e00972eac9b (diff)
parent7e5dd57ef3081ff6c03908d786ed5087f6fbb7ae (diff)
Merge branch 'nvme-4.15' of git://git.infradead.org/nvme into for-linus
Pull NVMe fixes from Christoph: "A few more nvme updates for 4.15. A single small PCIe fix, and a number of patches for RDMA that are a little larger than what I'd like to see for -rc2, but they fix important issues seen in the wild."
-rw-r--r--drivers/nvme/host/pci.c1
-rw-r--r--drivers/nvme/host/rdma.c234
2 files changed, 119 insertions, 116 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 617374762b7c..f5800c3c9082 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1759,6 +1759,7 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
1759 dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), 1759 dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs),
1760 dev->host_mem_descs, dev->host_mem_descs_dma); 1760 dev->host_mem_descs, dev->host_mem_descs_dma);
1761 dev->host_mem_descs = NULL; 1761 dev->host_mem_descs = NULL;
1762 dev->nr_host_mem_descs = 0;
1762} 1763}
1763 1764
1764static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, 1765static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 2c597105a6bf..37af56596be6 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <rdma/mr_pool.h>
18#include <linux/err.h> 19#include <linux/err.h>
19#include <linux/string.h> 20#include <linux/string.h>
20#include <linux/atomic.h> 21#include <linux/atomic.h>
@@ -59,6 +60,9 @@ struct nvme_rdma_request {
59 struct nvme_request req; 60 struct nvme_request req;
60 struct ib_mr *mr; 61 struct ib_mr *mr;
61 struct nvme_rdma_qe sqe; 62 struct nvme_rdma_qe sqe;
63 union nvme_result result;
64 __le16 status;
65 refcount_t ref;
62 struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; 66 struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
63 u32 num_sge; 67 u32 num_sge;
64 int nents; 68 int nents;
@@ -73,11 +77,11 @@ struct nvme_rdma_request {
73enum nvme_rdma_queue_flags { 77enum nvme_rdma_queue_flags {
74 NVME_RDMA_Q_ALLOCATED = 0, 78 NVME_RDMA_Q_ALLOCATED = 0,
75 NVME_RDMA_Q_LIVE = 1, 79 NVME_RDMA_Q_LIVE = 1,
80 NVME_RDMA_Q_TR_READY = 2,
76}; 81};
77 82
78struct nvme_rdma_queue { 83struct nvme_rdma_queue {
79 struct nvme_rdma_qe *rsp_ring; 84 struct nvme_rdma_qe *rsp_ring;
80 atomic_t sig_count;
81 int queue_size; 85 int queue_size;
82 size_t cmnd_capsule_len; 86 size_t cmnd_capsule_len;
83 struct nvme_rdma_ctrl *ctrl; 87 struct nvme_rdma_ctrl *ctrl;
@@ -258,32 +262,6 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
258 return ret; 262 return ret;
259} 263}
260 264
261static int nvme_rdma_reinit_request(void *data, struct request *rq)
262{
263 struct nvme_rdma_ctrl *ctrl = data;
264 struct nvme_rdma_device *dev = ctrl->device;
265 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
266 int ret = 0;
267
268 if (WARN_ON_ONCE(!req->mr))
269 return 0;
270
271 ib_dereg_mr(req->mr);
272
273 req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
274 ctrl->max_fr_pages);
275 if (IS_ERR(req->mr)) {
276 ret = PTR_ERR(req->mr);
277 req->mr = NULL;
278 goto out;
279 }
280
281 req->mr->need_inval = false;
282
283out:
284 return ret;
285}
286
287static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, 265static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
288 struct request *rq, unsigned int hctx_idx) 266 struct request *rq, unsigned int hctx_idx)
289{ 267{
@@ -293,9 +271,6 @@ static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
293 struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; 271 struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
294 struct nvme_rdma_device *dev = queue->device; 272 struct nvme_rdma_device *dev = queue->device;
295 273
296 if (req->mr)
297 ib_dereg_mr(req->mr);
298
299 nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command), 274 nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command),
300 DMA_TO_DEVICE); 275 DMA_TO_DEVICE);
301} 276}
@@ -317,21 +292,9 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
317 if (ret) 292 if (ret)
318 return ret; 293 return ret;
319 294
320 req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
321 ctrl->max_fr_pages);
322 if (IS_ERR(req->mr)) {
323 ret = PTR_ERR(req->mr);
324 goto out_free_qe;
325 }
326
327 req->queue = queue; 295 req->queue = queue;
328 296
329 return 0; 297 return 0;
330
331out_free_qe:
332 nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command),
333 DMA_TO_DEVICE);
334 return -ENOMEM;
335} 298}
336 299
337static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 300static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@ -428,10 +391,23 @@ out_err:
428 391
429static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) 392static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
430{ 393{
431 struct nvme_rdma_device *dev = queue->device; 394 struct nvme_rdma_device *dev;
432 struct ib_device *ibdev = dev->dev; 395 struct ib_device *ibdev;
433 396
434 rdma_destroy_qp(queue->cm_id); 397 if (!test_and_clear_bit(NVME_RDMA_Q_TR_READY, &queue->flags))
398 return;
399
400 dev = queue->device;
401 ibdev = dev->dev;
402
403 ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
404
405 /*
406 * The cm_id object might have been destroyed during RDMA connection
407 * establishment error flow to avoid getting other cma events, thus
408 * the destruction of the QP shouldn't use rdma_cm API.
409 */
410 ib_destroy_qp(queue->qp);
435 ib_free_cq(queue->ib_cq); 411 ib_free_cq(queue->ib_cq);
436 412
437 nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, 413 nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
@@ -440,6 +416,12 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
440 nvme_rdma_dev_put(dev); 416 nvme_rdma_dev_put(dev);
441} 417}
442 418
419static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev)
420{
421 return min_t(u32, NVME_RDMA_MAX_SEGMENTS,
422 ibdev->attrs.max_fast_reg_page_list_len);
423}
424
443static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) 425static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
444{ 426{
445 struct ib_device *ibdev; 427 struct ib_device *ibdev;
@@ -482,8 +464,24 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
482 goto out_destroy_qp; 464 goto out_destroy_qp;
483 } 465 }
484 466
467 ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
468 queue->queue_size,
469 IB_MR_TYPE_MEM_REG,
470 nvme_rdma_get_max_fr_pages(ibdev));
471 if (ret) {
472 dev_err(queue->ctrl->ctrl.device,
473 "failed to initialize MR pool sized %d for QID %d\n",
474 queue->queue_size, idx);
475 goto out_destroy_ring;
476 }
477
478 set_bit(NVME_RDMA_Q_TR_READY, &queue->flags);
479
485 return 0; 480 return 0;
486 481
482out_destroy_ring:
483 nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
484 sizeof(struct nvme_completion), DMA_FROM_DEVICE);
487out_destroy_qp: 485out_destroy_qp:
488 rdma_destroy_qp(queue->cm_id); 486 rdma_destroy_qp(queue->cm_id);
489out_destroy_ib_cq: 487out_destroy_ib_cq:
@@ -510,7 +508,6 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
510 queue->cmnd_capsule_len = sizeof(struct nvme_command); 508 queue->cmnd_capsule_len = sizeof(struct nvme_command);
511 509
512 queue->queue_size = queue_size; 510 queue->queue_size = queue_size;
513 atomic_set(&queue->sig_count, 0);
514 511
515 queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue, 512 queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
516 RDMA_PS_TCP, IB_QPT_RC); 513 RDMA_PS_TCP, IB_QPT_RC);
@@ -546,6 +543,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
546 543
547out_destroy_cm_id: 544out_destroy_cm_id:
548 rdma_destroy_id(queue->cm_id); 545 rdma_destroy_id(queue->cm_id);
546 nvme_rdma_destroy_queue_ib(queue);
549 return ret; 547 return ret;
550} 548}
551 549
@@ -756,8 +754,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
756 754
757 ctrl->device = ctrl->queues[0].device; 755 ctrl->device = ctrl->queues[0].device;
758 756
759 ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS, 757 ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
760 ctrl->device->dev->attrs.max_fast_reg_page_list_len);
761 758
762 if (new) { 759 if (new) {
763 ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); 760 ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
@@ -771,10 +768,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
771 error = PTR_ERR(ctrl->ctrl.admin_q); 768 error = PTR_ERR(ctrl->ctrl.admin_q);
772 goto out_free_tagset; 769 goto out_free_tagset;
773 } 770 }
774 } else {
775 error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
776 if (error)
777 goto out_free_queue;
778 } 771 }
779 772
780 error = nvme_rdma_start_queue(ctrl, 0); 773 error = nvme_rdma_start_queue(ctrl, 0);
@@ -854,10 +847,6 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
854 goto out_free_tag_set; 847 goto out_free_tag_set;
855 } 848 }
856 } else { 849 } else {
857 ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
858 if (ret)
859 goto out_free_io_queues;
860
861 blk_mq_update_nr_hw_queues(&ctrl->tag_set, 850 blk_mq_update_nr_hw_queues(&ctrl->tag_set,
862 ctrl->ctrl.queue_count - 1); 851 ctrl->ctrl.queue_count - 1);
863 } 852 }
@@ -1018,8 +1007,18 @@ static void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc)
1018 1007
1019static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) 1008static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
1020{ 1009{
1021 if (unlikely(wc->status != IB_WC_SUCCESS)) 1010 struct nvme_rdma_request *req =
1011 container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe);
1012 struct request *rq = blk_mq_rq_from_pdu(req);
1013
1014 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1022 nvme_rdma_wr_error(cq, wc, "LOCAL_INV"); 1015 nvme_rdma_wr_error(cq, wc, "LOCAL_INV");
1016 return;
1017 }
1018
1019 if (refcount_dec_and_test(&req->ref))
1020 nvme_end_request(rq, req->status, req->result);
1021
1023} 1022}
1024 1023
1025static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, 1024static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
@@ -1030,7 +1029,7 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
1030 .opcode = IB_WR_LOCAL_INV, 1029 .opcode = IB_WR_LOCAL_INV,
1031 .next = NULL, 1030 .next = NULL,
1032 .num_sge = 0, 1031 .num_sge = 0,
1033 .send_flags = 0, 1032 .send_flags = IB_SEND_SIGNALED,
1034 .ex.invalidate_rkey = req->mr->rkey, 1033 .ex.invalidate_rkey = req->mr->rkey,
1035 }; 1034 };
1036 1035
@@ -1044,22 +1043,15 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
1044 struct request *rq) 1043 struct request *rq)
1045{ 1044{
1046 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 1045 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
1047 struct nvme_rdma_ctrl *ctrl = queue->ctrl;
1048 struct nvme_rdma_device *dev = queue->device; 1046 struct nvme_rdma_device *dev = queue->device;
1049 struct ib_device *ibdev = dev->dev; 1047 struct ib_device *ibdev = dev->dev;
1050 int res;
1051 1048
1052 if (!blk_rq_bytes(rq)) 1049 if (!blk_rq_bytes(rq))
1053 return; 1050 return;
1054 1051
1055 if (req->mr->need_inval && test_bit(NVME_RDMA_Q_LIVE, &req->queue->flags)) { 1052 if (req->mr) {
1056 res = nvme_rdma_inv_rkey(queue, req); 1053 ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
1057 if (unlikely(res < 0)) { 1054 req->mr = NULL;
1058 dev_err(ctrl->ctrl.device,
1059 "Queueing INV WR for rkey %#x failed (%d)\n",
1060 req->mr->rkey, res);
1061 nvme_rdma_error_recovery(queue->ctrl);
1062 }
1063 } 1055 }
1064 1056
1065 ib_dma_unmap_sg(ibdev, req->sg_table.sgl, 1057 ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
@@ -1118,12 +1110,18 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
1118 struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; 1110 struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
1119 int nr; 1111 int nr;
1120 1112
1113 req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs);
1114 if (WARN_ON_ONCE(!req->mr))
1115 return -EAGAIN;
1116
1121 /* 1117 /*
1122 * Align the MR to a 4K page size to match the ctrl page size and 1118 * Align the MR to a 4K page size to match the ctrl page size and
1123 * the block virtual boundary. 1119 * the block virtual boundary.
1124 */ 1120 */
1125 nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); 1121 nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K);
1126 if (unlikely(nr < count)) { 1122 if (unlikely(nr < count)) {
1123 ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
1124 req->mr = NULL;
1127 if (nr < 0) 1125 if (nr < 0)
1128 return nr; 1126 return nr;
1129 return -EINVAL; 1127 return -EINVAL;
@@ -1142,8 +1140,6 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
1142 IB_ACCESS_REMOTE_READ | 1140 IB_ACCESS_REMOTE_READ |
1143 IB_ACCESS_REMOTE_WRITE; 1141 IB_ACCESS_REMOTE_WRITE;
1144 1142
1145 req->mr->need_inval = true;
1146
1147 sg->addr = cpu_to_le64(req->mr->iova); 1143 sg->addr = cpu_to_le64(req->mr->iova);
1148 put_unaligned_le24(req->mr->length, sg->length); 1144 put_unaligned_le24(req->mr->length, sg->length);
1149 put_unaligned_le32(req->mr->rkey, sg->key); 1145 put_unaligned_le32(req->mr->rkey, sg->key);
@@ -1163,7 +1159,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
1163 1159
1164 req->num_sge = 1; 1160 req->num_sge = 1;
1165 req->inline_data = false; 1161 req->inline_data = false;
1166 req->mr->need_inval = false; 1162 refcount_set(&req->ref, 2); /* send and recv completions */
1167 1163
1168 c->common.flags |= NVME_CMD_SGL_METABUF; 1164 c->common.flags |= NVME_CMD_SGL_METABUF;
1169 1165
@@ -1200,25 +1196,24 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
1200 1196
1201static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) 1197static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
1202{ 1198{
1203 if (unlikely(wc->status != IB_WC_SUCCESS)) 1199 struct nvme_rdma_qe *qe =
1204 nvme_rdma_wr_error(cq, wc, "SEND"); 1200 container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
1205} 1201 struct nvme_rdma_request *req =
1202 container_of(qe, struct nvme_rdma_request, sqe);
1203 struct request *rq = blk_mq_rq_from_pdu(req);
1206 1204
1207/* 1205 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1208 * We want to signal completion at least every queue depth/2. This returns the 1206 nvme_rdma_wr_error(cq, wc, "SEND");
1209 * largest power of two that is not above half of (queue size + 1) to optimize 1207 return;
1210 * (avoid divisions). 1208 }
1211 */
1212static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
1213{
1214 int limit = 1 << ilog2((queue->queue_size + 1) / 2);
1215 1209
1216 return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0; 1210 if (refcount_dec_and_test(&req->ref))
1211 nvme_end_request(rq, req->status, req->result);
1217} 1212}
1218 1213
1219static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, 1214static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
1220 struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, 1215 struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
1221 struct ib_send_wr *first, bool flush) 1216 struct ib_send_wr *first)
1222{ 1217{
1223 struct ib_send_wr wr, *bad_wr; 1218 struct ib_send_wr wr, *bad_wr;
1224 int ret; 1219 int ret;
@@ -1227,31 +1222,12 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
1227 sge->length = sizeof(struct nvme_command), 1222 sge->length = sizeof(struct nvme_command),
1228 sge->lkey = queue->device->pd->local_dma_lkey; 1223 sge->lkey = queue->device->pd->local_dma_lkey;
1229 1224
1230 qe->cqe.done = nvme_rdma_send_done;
1231
1232 wr.next = NULL; 1225 wr.next = NULL;
1233 wr.wr_cqe = &qe->cqe; 1226 wr.wr_cqe = &qe->cqe;
1234 wr.sg_list = sge; 1227 wr.sg_list = sge;
1235 wr.num_sge = num_sge; 1228 wr.num_sge = num_sge;
1236 wr.opcode = IB_WR_SEND; 1229 wr.opcode = IB_WR_SEND;
1237 wr.send_flags = 0; 1230 wr.send_flags = IB_SEND_SIGNALED;
1238
1239 /*
1240 * Unsignalled send completions are another giant desaster in the
1241 * IB Verbs spec: If we don't regularly post signalled sends
1242 * the send queue will fill up and only a QP reset will rescue us.
1243 * Would have been way to obvious to handle this in hardware or
1244 * at least the RDMA stack..
1245 *
1246 * Always signal the flushes. The magic request used for the flush
1247 * sequencer is not allocated in our driver's tagset and it's
1248 * triggered to be freed by blk_cleanup_queue(). So we need to
1249 * always mark it as signaled to ensure that the "wr_cqe", which is
1250 * embedded in request's payload, is not freed when __ib_process_cq()
1251 * calls wr_cqe->done().
1252 */
1253 if (nvme_rdma_queue_sig_limit(queue) || flush)
1254 wr.send_flags |= IB_SEND_SIGNALED;
1255 1231
1256 if (first) 1232 if (first)
1257 first->next = &wr; 1233 first->next = &wr;
@@ -1301,6 +1277,12 @@ static struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue)
1301 return queue->ctrl->tag_set.tags[queue_idx - 1]; 1277 return queue->ctrl->tag_set.tags[queue_idx - 1];
1302} 1278}
1303 1279
1280static void nvme_rdma_async_done(struct ib_cq *cq, struct ib_wc *wc)
1281{
1282 if (unlikely(wc->status != IB_WC_SUCCESS))
1283 nvme_rdma_wr_error(cq, wc, "ASYNC");
1284}
1285
1304static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) 1286static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
1305{ 1287{
1306 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg); 1288 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg);
@@ -1319,10 +1301,12 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
1319 cmd->common.flags |= NVME_CMD_SGL_METABUF; 1301 cmd->common.flags |= NVME_CMD_SGL_METABUF;
1320 nvme_rdma_set_sg_null(cmd); 1302 nvme_rdma_set_sg_null(cmd);
1321 1303
1304 sqe->cqe.done = nvme_rdma_async_done;
1305
1322 ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd), 1306 ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd),
1323 DMA_TO_DEVICE); 1307 DMA_TO_DEVICE);
1324 1308
1325 ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL, false); 1309 ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL);
1326 WARN_ON_ONCE(ret); 1310 WARN_ON_ONCE(ret);
1327} 1311}
1328 1312
@@ -1343,14 +1327,34 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
1343 } 1327 }
1344 req = blk_mq_rq_to_pdu(rq); 1328 req = blk_mq_rq_to_pdu(rq);
1345 1329
1346 if (rq->tag == tag) 1330 req->status = cqe->status;
1347 ret = 1; 1331 req->result = cqe->result;
1332
1333 if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
1334 if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) {
1335 dev_err(queue->ctrl->ctrl.device,
1336 "Bogus remote invalidation for rkey %#x\n",
1337 req->mr->rkey);
1338 nvme_rdma_error_recovery(queue->ctrl);
1339 }
1340 } else if (req->mr) {
1341 ret = nvme_rdma_inv_rkey(queue, req);
1342 if (unlikely(ret < 0)) {
1343 dev_err(queue->ctrl->ctrl.device,
1344 "Queueing INV WR for rkey %#x failed (%d)\n",
1345 req->mr->rkey, ret);
1346 nvme_rdma_error_recovery(queue->ctrl);
1347 }
1348 /* the local invalidation completion will end the request */
1349 return 0;
1350 }
1348 1351
1349 if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && 1352 if (refcount_dec_and_test(&req->ref)) {
1350 wc->ex.invalidate_rkey == req->mr->rkey) 1353 if (rq->tag == tag)
1351 req->mr->need_inval = false; 1354 ret = 1;
1355 nvme_end_request(rq, req->status, req->result);
1356 }
1352 1357
1353 nvme_end_request(rq, cqe->status, cqe->result);
1354 return ret; 1358 return ret;
1355} 1359}
1356 1360
@@ -1607,7 +1611,6 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1607 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 1611 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
1608 struct nvme_rdma_qe *sqe = &req->sqe; 1612 struct nvme_rdma_qe *sqe = &req->sqe;
1609 struct nvme_command *c = sqe->data; 1613 struct nvme_command *c = sqe->data;
1610 bool flush = false;
1611 struct ib_device *dev; 1614 struct ib_device *dev;
1612 blk_status_t ret; 1615 blk_status_t ret;
1613 int err; 1616 int err;
@@ -1636,13 +1639,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1636 goto err; 1639 goto err;
1637 } 1640 }
1638 1641
1642 sqe->cqe.done = nvme_rdma_send_done;
1643
1639 ib_dma_sync_single_for_device(dev, sqe->dma, 1644 ib_dma_sync_single_for_device(dev, sqe->dma,
1640 sizeof(struct nvme_command), DMA_TO_DEVICE); 1645 sizeof(struct nvme_command), DMA_TO_DEVICE);
1641 1646
1642 if (req_op(rq) == REQ_OP_FLUSH)
1643 flush = true;
1644 err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, 1647 err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
1645 req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); 1648 req->mr ? &req->reg_wr.wr : NULL);
1646 if (unlikely(err)) { 1649 if (unlikely(err)) {
1647 nvme_rdma_unmap_data(queue, rq); 1650 nvme_rdma_unmap_data(queue, rq);
1648 goto err; 1651 goto err;
@@ -1790,7 +1793,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
1790 .submit_async_event = nvme_rdma_submit_async_event, 1793 .submit_async_event = nvme_rdma_submit_async_event,
1791 .delete_ctrl = nvme_rdma_delete_ctrl, 1794 .delete_ctrl = nvme_rdma_delete_ctrl,
1792 .get_address = nvmf_get_address, 1795 .get_address = nvmf_get_address,
1793 .reinit_request = nvme_rdma_reinit_request,
1794}; 1796};
1795 1797
1796static inline bool 1798static inline bool