aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c421
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h75
2 files changed, 419 insertions, 77 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index c9b3b9e0679f..b42f1323ea56 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -66,6 +66,7 @@ static unsigned int srp_sg_tablesize;
66static unsigned int cmd_sg_entries; 66static unsigned int cmd_sg_entries;
67static unsigned int indirect_sg_entries; 67static unsigned int indirect_sg_entries;
68static bool allow_ext_sg; 68static bool allow_ext_sg;
69static bool prefer_fr;
69static bool register_always; 70static bool register_always;
70static int topspin_workarounds = 1; 71static int topspin_workarounds = 1;
71 72
@@ -88,6 +89,10 @@ module_param(topspin_workarounds, int, 0444);
88MODULE_PARM_DESC(topspin_workarounds, 89MODULE_PARM_DESC(topspin_workarounds,
89 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); 90 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
90 91
92module_param(prefer_fr, bool, 0444);
93MODULE_PARM_DESC(prefer_fr,
94"Whether to use fast registration if both FMR and fast registration are supported");
95
91module_param(register_always, bool, 0444); 96module_param(register_always, bool, 0444);
92MODULE_PARM_DESC(register_always, 97MODULE_PARM_DESC(register_always,
93 "Use memory registration even for contiguous memory regions"); 98 "Use memory registration even for contiguous memory regions");
@@ -311,6 +316,132 @@ static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
311 return ib_create_fmr_pool(dev->pd, &fmr_param); 316 return ib_create_fmr_pool(dev->pd, &fmr_param);
312} 317}
313 318
319/**
320 * srp_destroy_fr_pool() - free the resources owned by a pool
321 * @pool: Fast registration pool to be destroyed.
322 */
323static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
324{
325 int i;
326 struct srp_fr_desc *d;
327
328 if (!pool)
329 return;
330
331 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
332 if (d->frpl)
333 ib_free_fast_reg_page_list(d->frpl);
334 if (d->mr)
335 ib_dereg_mr(d->mr);
336 }
337 kfree(pool);
338}
339
340/**
341 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
342 * @device: IB device to allocate fast registration descriptors for.
343 * @pd: Protection domain associated with the FR descriptors.
344 * @pool_size: Number of descriptors to allocate.
345 * @max_page_list_len: Maximum fast registration work request page list length.
346 */
347static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
348 struct ib_pd *pd, int pool_size,
349 int max_page_list_len)
350{
351 struct srp_fr_pool *pool;
352 struct srp_fr_desc *d;
353 struct ib_mr *mr;
354 struct ib_fast_reg_page_list *frpl;
355 int i, ret = -EINVAL;
356
357 if (pool_size <= 0)
358 goto err;
359 ret = -ENOMEM;
360 pool = kzalloc(sizeof(struct srp_fr_pool) +
361 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
362 if (!pool)
363 goto err;
364 pool->size = pool_size;
365 pool->max_page_list_len = max_page_list_len;
366 spin_lock_init(&pool->lock);
367 INIT_LIST_HEAD(&pool->free_list);
368
369 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
370 mr = ib_alloc_fast_reg_mr(pd, max_page_list_len);
371 if (IS_ERR(mr)) {
372 ret = PTR_ERR(mr);
373 goto destroy_pool;
374 }
375 d->mr = mr;
376 frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
377 if (IS_ERR(frpl)) {
378 ret = PTR_ERR(frpl);
379 goto destroy_pool;
380 }
381 d->frpl = frpl;
382 list_add_tail(&d->entry, &pool->free_list);
383 }
384
385out:
386 return pool;
387
388destroy_pool:
389 srp_destroy_fr_pool(pool);
390
391err:
392 pool = ERR_PTR(ret);
393 goto out;
394}
395
396/**
397 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
398 * @pool: Pool to obtain descriptor from.
399 */
400static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
401{
402 struct srp_fr_desc *d = NULL;
403 unsigned long flags;
404
405 spin_lock_irqsave(&pool->lock, flags);
406 if (!list_empty(&pool->free_list)) {
407 d = list_first_entry(&pool->free_list, typeof(*d), entry);
408 list_del(&d->entry);
409 }
410 spin_unlock_irqrestore(&pool->lock, flags);
411
412 return d;
413}
414
415/**
416 * srp_fr_pool_put() - put an FR descriptor back in the free list
417 * @pool: Pool the descriptor was allocated from.
418 * @desc: Pointer to an array of fast registration descriptor pointers.
419 * @n: Number of descriptors to put back.
420 *
421 * Note: The caller must already have queued an invalidation request for
422 * desc->mr->rkey before calling this function.
423 */
424static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
425 int n)
426{
427 unsigned long flags;
428 int i;
429
430 spin_lock_irqsave(&pool->lock, flags);
431 for (i = 0; i < n; i++)
432 list_add(&desc[i]->entry, &pool->free_list);
433 spin_unlock_irqrestore(&pool->lock, flags);
434}
435
436static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
437{
438 struct srp_device *dev = target->srp_host->srp_dev;
439
440 return srp_create_fr_pool(dev->dev, dev->pd,
441 target->scsi_host->can_queue,
442 dev->max_pages_per_mr);
443}
444
314static int srp_create_target_ib(struct srp_target_port *target) 445static int srp_create_target_ib(struct srp_target_port *target)
315{ 446{
316 struct srp_device *dev = target->srp_host->srp_dev; 447 struct srp_device *dev = target->srp_host->srp_dev;
@@ -318,6 +449,8 @@ static int srp_create_target_ib(struct srp_target_port *target)
318 struct ib_cq *recv_cq, *send_cq; 449 struct ib_cq *recv_cq, *send_cq;
319 struct ib_qp *qp; 450 struct ib_qp *qp;
320 struct ib_fmr_pool *fmr_pool = NULL; 451 struct ib_fmr_pool *fmr_pool = NULL;
452 struct srp_fr_pool *fr_pool = NULL;
453 const int m = 1 + dev->use_fast_reg;
321 int ret; 454 int ret;
322 455
323 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); 456 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
@@ -332,7 +465,7 @@ static int srp_create_target_ib(struct srp_target_port *target)
332 } 465 }
333 466
334 send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, target, 467 send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, target,
335 target->queue_size, target->comp_vector); 468 m * target->queue_size, target->comp_vector);
336 if (IS_ERR(send_cq)) { 469 if (IS_ERR(send_cq)) {
337 ret = PTR_ERR(send_cq); 470 ret = PTR_ERR(send_cq);
338 goto err_recv_cq; 471 goto err_recv_cq;
@@ -341,11 +474,11 @@ static int srp_create_target_ib(struct srp_target_port *target)
341 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP); 474 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
342 475
343 init_attr->event_handler = srp_qp_event; 476 init_attr->event_handler = srp_qp_event;
344 init_attr->cap.max_send_wr = target->queue_size; 477 init_attr->cap.max_send_wr = m * target->queue_size;
345 init_attr->cap.max_recv_wr = target->queue_size; 478 init_attr->cap.max_recv_wr = target->queue_size;
346 init_attr->cap.max_recv_sge = 1; 479 init_attr->cap.max_recv_sge = 1;
347 init_attr->cap.max_send_sge = 1; 480 init_attr->cap.max_send_sge = 1;
348 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 481 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
349 init_attr->qp_type = IB_QPT_RC; 482 init_attr->qp_type = IB_QPT_RC;
350 init_attr->send_cq = send_cq; 483 init_attr->send_cq = send_cq;
351 init_attr->recv_cq = recv_cq; 484 init_attr->recv_cq = recv_cq;
@@ -360,7 +493,18 @@ static int srp_create_target_ib(struct srp_target_port *target)
360 if (ret) 493 if (ret)
361 goto err_qp; 494 goto err_qp;
362 495
363 if (dev->has_fmr) { 496 if (dev->use_fast_reg && dev->has_fr) {
497 fr_pool = srp_alloc_fr_pool(target);
498 if (IS_ERR(fr_pool)) {
499 ret = PTR_ERR(fr_pool);
500 shost_printk(KERN_WARNING, target->scsi_host, PFX
501 "FR pool allocation failed (%d)\n", ret);
502 goto err_qp;
503 }
504 if (target->fr_pool)
505 srp_destroy_fr_pool(target->fr_pool);
506 target->fr_pool = fr_pool;
507 } else if (!dev->use_fast_reg && dev->has_fmr) {
364 fmr_pool = srp_alloc_fmr_pool(target); 508 fmr_pool = srp_alloc_fmr_pool(target);
365 if (IS_ERR(fmr_pool)) { 509 if (IS_ERR(fmr_pool)) {
366 ret = PTR_ERR(fmr_pool); 510 ret = PTR_ERR(fmr_pool);
@@ -407,10 +551,16 @@ err:
407 */ 551 */
408static void srp_free_target_ib(struct srp_target_port *target) 552static void srp_free_target_ib(struct srp_target_port *target)
409{ 553{
554 struct srp_device *dev = target->srp_host->srp_dev;
410 int i; 555 int i;
411 556
412 if (target->fmr_pool) 557 if (dev->use_fast_reg) {
413 ib_destroy_fmr_pool(target->fmr_pool); 558 if (target->fr_pool)
559 srp_destroy_fr_pool(target->fr_pool);
560 } else {
561 if (target->fmr_pool)
562 ib_destroy_fmr_pool(target->fmr_pool);
563 }
414 ib_destroy_qp(target->qp); 564 ib_destroy_qp(target->qp);
415 ib_destroy_cq(target->send_cq); 565 ib_destroy_cq(target->send_cq);
416 ib_destroy_cq(target->recv_cq); 566 ib_destroy_cq(target->recv_cq);
@@ -615,7 +765,8 @@ static void srp_disconnect_target(struct srp_target_port *target)
615 765
616static void srp_free_req_data(struct srp_target_port *target) 766static void srp_free_req_data(struct srp_target_port *target)
617{ 767{
618 struct ib_device *ibdev = target->srp_host->srp_dev->dev; 768 struct srp_device *dev = target->srp_host->srp_dev;
769 struct ib_device *ibdev = dev->dev;
619 struct srp_request *req; 770 struct srp_request *req;
620 int i; 771 int i;
621 772
@@ -624,7 +775,10 @@ static void srp_free_req_data(struct srp_target_port *target)
624 775
625 for (i = 0; i < target->req_ring_size; ++i) { 776 for (i = 0; i < target->req_ring_size; ++i) {
626 req = &target->req_ring[i]; 777 req = &target->req_ring[i];
627 kfree(req->fmr_list); 778 if (dev->use_fast_reg)
779 kfree(req->fr_list);
780 else
781 kfree(req->fmr_list);
628 kfree(req->map_page); 782 kfree(req->map_page);
629 if (req->indirect_dma_addr) { 783 if (req->indirect_dma_addr) {
630 ib_dma_unmap_single(ibdev, req->indirect_dma_addr, 784 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
@@ -643,6 +797,7 @@ static int srp_alloc_req_data(struct srp_target_port *target)
643 struct srp_device *srp_dev = target->srp_host->srp_dev; 797 struct srp_device *srp_dev = target->srp_host->srp_dev;
644 struct ib_device *ibdev = srp_dev->dev; 798 struct ib_device *ibdev = srp_dev->dev;
645 struct srp_request *req; 799 struct srp_request *req;
800 void *mr_list;
646 dma_addr_t dma_addr; 801 dma_addr_t dma_addr;
647 int i, ret = -ENOMEM; 802 int i, ret = -ENOMEM;
648 803
@@ -655,12 +810,20 @@ static int srp_alloc_req_data(struct srp_target_port *target)
655 810
656 for (i = 0; i < target->req_ring_size; ++i) { 811 for (i = 0; i < target->req_ring_size; ++i) {
657 req = &target->req_ring[i]; 812 req = &target->req_ring[i];
658 req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *), 813 mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
659 GFP_KERNEL); 814 GFP_KERNEL);
815 if (!mr_list)
816 goto out;
817 if (srp_dev->use_fast_reg)
818 req->fr_list = mr_list;
819 else
820 req->fmr_list = mr_list;
660 req->map_page = kmalloc(srp_dev->max_pages_per_mr * 821 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
661 sizeof(void *), GFP_KERNEL); 822 sizeof(void *), GFP_KERNEL);
823 if (!req->map_page)
824 goto out;
662 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); 825 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
663 if (!req->fmr_list || !req->map_page || !req->indirect_desc) 826 if (!req->indirect_desc)
664 goto out; 827 goto out;
665 828
666 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, 829 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
@@ -797,21 +960,56 @@ static int srp_connect_target(struct srp_target_port *target)
797 } 960 }
798} 961}
799 962
963static int srp_inv_rkey(struct srp_target_port *target, u32 rkey)
964{
965 struct ib_send_wr *bad_wr;
966 struct ib_send_wr wr = {
967 .opcode = IB_WR_LOCAL_INV,
968 .wr_id = LOCAL_INV_WR_ID_MASK,
969 .next = NULL,
970 .num_sge = 0,
971 .send_flags = 0,
972 .ex.invalidate_rkey = rkey,
973 };
974
975 return ib_post_send(target->qp, &wr, &bad_wr);
976}
977
800static void srp_unmap_data(struct scsi_cmnd *scmnd, 978static void srp_unmap_data(struct scsi_cmnd *scmnd,
801 struct srp_target_port *target, 979 struct srp_target_port *target,
802 struct srp_request *req) 980 struct srp_request *req)
803{ 981{
804 struct ib_device *ibdev = target->srp_host->srp_dev->dev; 982 struct srp_device *dev = target->srp_host->srp_dev;
805 struct ib_pool_fmr **pfmr; 983 struct ib_device *ibdev = dev->dev;
984 int i, res;
806 985
807 if (!scsi_sglist(scmnd) || 986 if (!scsi_sglist(scmnd) ||
808 (scmnd->sc_data_direction != DMA_TO_DEVICE && 987 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
809 scmnd->sc_data_direction != DMA_FROM_DEVICE)) 988 scmnd->sc_data_direction != DMA_FROM_DEVICE))
810 return; 989 return;
811 990
812 pfmr = req->fmr_list; 991 if (dev->use_fast_reg) {
813 while (req->nmdesc--) 992 struct srp_fr_desc **pfr;
814 ib_fmr_pool_unmap(*pfmr++); 993
994 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
995 res = srp_inv_rkey(target, (*pfr)->mr->rkey);
996 if (res < 0) {
997 shost_printk(KERN_ERR, target->scsi_host, PFX
998 "Queueing INV WR for rkey %#x failed (%d)\n",
999 (*pfr)->mr->rkey, res);
1000 queue_work(system_long_wq,
1001 &target->tl_err_work);
1002 }
1003 }
1004 if (req->nmdesc)
1005 srp_fr_pool_put(target->fr_pool, req->fr_list,
1006 req->nmdesc);
1007 } else {
1008 struct ib_pool_fmr **pfmr;
1009
1010 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1011 ib_fmr_pool_unmap(*pfmr);
1012 }
815 1013
816 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), 1014 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
817 scmnd->sc_data_direction); 1015 scmnd->sc_data_direction);
@@ -924,21 +1122,19 @@ static int srp_rport_reconnect(struct srp_rport *rport)
924 * callbacks will have finished before a new QP is allocated. 1122 * callbacks will have finished before a new QP is allocated.
925 */ 1123 */
926 ret = srp_new_cm_id(target); 1124 ret = srp_new_cm_id(target);
927 /*
928 * Whether or not creating a new CM ID succeeded, create a new
929 * QP. This guarantees that all completion callback function
930 * invocations have finished before request resetting starts.
931 */
932 if (ret == 0)
933 ret = srp_create_target_ib(target);
934 else
935 srp_create_target_ib(target);
936 1125
937 for (i = 0; i < target->req_ring_size; ++i) { 1126 for (i = 0; i < target->req_ring_size; ++i) {
938 struct srp_request *req = &target->req_ring[i]; 1127 struct srp_request *req = &target->req_ring[i];
939 srp_finish_req(target, req, NULL, DID_RESET << 16); 1128 srp_finish_req(target, req, NULL, DID_RESET << 16);
940 } 1129 }
941 1130
1131 /*
1132 * Whether or not creating a new CM ID succeeded, create a new
1133 * QP. This guarantees that all callback functions for the old QP have
1134 * finished before any send requests are posted on the new QP.
1135 */
1136 ret += srp_create_target_ib(target);
1137
942 INIT_LIST_HEAD(&target->free_tx); 1138 INIT_LIST_HEAD(&target->free_tx);
943 for (i = 0; i < target->queue_size; ++i) 1139 for (i = 0; i < target->queue_size; ++i)
944 list_add(&target->tx_ring[i]->list, &target->free_tx); 1140 list_add(&target->tx_ring[i]->list, &target->free_tx);
@@ -986,6 +1182,47 @@ static int srp_map_finish_fmr(struct srp_map_state *state,
986 return 0; 1182 return 0;
987} 1183}
988 1184
1185static int srp_map_finish_fr(struct srp_map_state *state,
1186 struct srp_target_port *target)
1187{
1188 struct srp_device *dev = target->srp_host->srp_dev;
1189 struct ib_send_wr *bad_wr;
1190 struct ib_send_wr wr;
1191 struct srp_fr_desc *desc;
1192 u32 rkey;
1193
1194 desc = srp_fr_pool_get(target->fr_pool);
1195 if (!desc)
1196 return -ENOMEM;
1197
1198 rkey = ib_inc_rkey(desc->mr->rkey);
1199 ib_update_fast_reg_key(desc->mr, rkey);
1200
1201 memcpy(desc->frpl->page_list, state->pages,
1202 sizeof(state->pages[0]) * state->npages);
1203
1204 memset(&wr, 0, sizeof(wr));
1205 wr.opcode = IB_WR_FAST_REG_MR;
1206 wr.wr_id = FAST_REG_WR_ID_MASK;
1207 wr.wr.fast_reg.iova_start = state->base_dma_addr;
1208 wr.wr.fast_reg.page_list = desc->frpl;
1209 wr.wr.fast_reg.page_list_len = state->npages;
1210 wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size);
1211 wr.wr.fast_reg.length = state->dma_len;
1212 wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
1213 IB_ACCESS_REMOTE_READ |
1214 IB_ACCESS_REMOTE_WRITE);
1215 wr.wr.fast_reg.rkey = desc->mr->lkey;
1216
1217 *state->next_fr++ = desc;
1218 state->nmdesc++;
1219
1220 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1221 desc->mr->rkey);
1222
1223 return ib_post_send(target->qp, &wr, &bad_wr);
1224}
1225
989static int srp_finish_mapping(struct srp_map_state *state, 1226static int srp_finish_mapping(struct srp_map_state *state,
990 struct srp_target_port *target) 1227 struct srp_target_port *target)
991{ 1228{
@@ -998,7 +1235,9 @@ static int srp_finish_mapping(struct srp_map_state *state,
998 srp_map_desc(state, state->base_dma_addr, state->dma_len, 1235 srp_map_desc(state, state->base_dma_addr, state->dma_len,
999 target->rkey); 1236 target->rkey);
1000 else 1237 else
1001 ret = srp_map_finish_fmr(state, target); 1238 ret = target->srp_host->srp_dev->use_fast_reg ?
1239 srp_map_finish_fr(state, target) :
1240 srp_map_finish_fmr(state, target);
1002 1241
1003 if (ret == 0) { 1242 if (ret == 0) {
1004 state->npages = 0; 1243 state->npages = 0;
@@ -1020,7 +1259,7 @@ static void srp_map_update_start(struct srp_map_state *state,
1020static int srp_map_sg_entry(struct srp_map_state *state, 1259static int srp_map_sg_entry(struct srp_map_state *state,
1021 struct srp_target_port *target, 1260 struct srp_target_port *target,
1022 struct scatterlist *sg, int sg_index, 1261 struct scatterlist *sg, int sg_index,
1023 int use_fmr) 1262 bool use_mr)
1024{ 1263{
1025 struct srp_device *dev = target->srp_host->srp_dev; 1264 struct srp_device *dev = target->srp_host->srp_dev;
1026 struct ib_device *ibdev = dev->dev; 1265 struct ib_device *ibdev = dev->dev;
@@ -1032,22 +1271,24 @@ static int srp_map_sg_entry(struct srp_map_state *state,
1032 if (!dma_len) 1271 if (!dma_len)
1033 return 0; 1272 return 0;
1034 1273
1035 if (use_fmr == SRP_MAP_NO_FMR) { 1274 if (!use_mr) {
1036 /* Once we're in direct map mode for a request, we don't 1275 /*
1037 * go back to FMR mode, so no need to update anything 1276 * Once we're in direct map mode for a request, we don't
1277 * go back to FMR or FR mode, so no need to update anything
1038 * other than the descriptor. 1278 * other than the descriptor.
1039 */ 1279 */
1040 srp_map_desc(state, dma_addr, dma_len, target->rkey); 1280 srp_map_desc(state, dma_addr, dma_len, target->rkey);
1041 return 0; 1281 return 0;
1042 } 1282 }
1043 1283
1044 /* If we start at an offset into the FMR page, don't merge into 1284 /*
1045 * the current FMR. Finish it out, and use the kernel's MR for this 1285 * Since not all RDMA HW drivers support non-zero page offsets for
1046 * sg entry. This is to avoid potential bugs on some SRP targets 1286 * FMR, if we start at an offset into a page, don't merge into the
1047 * that were never quite defined, but went away when the initiator 1287 * current FMR mapping. Finish it out, and use the kernel's MR for
1048 * avoided using FMR on such page fragments. 1288 * this sg entry.
1049 */ 1289 */
1050 if (dma_addr & ~dev->mr_page_mask || dma_len > dev->mr_max_size) { 1290 if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) ||
1291 dma_len > dev->mr_max_size) {
1051 ret = srp_finish_mapping(state, target); 1292 ret = srp_finish_mapping(state, target);
1052 if (ret) 1293 if (ret)
1053 return ret; 1294 return ret;
@@ -1057,16 +1298,18 @@ static int srp_map_sg_entry(struct srp_map_state *state,
1057 return 0; 1298 return 0;
1058 } 1299 }
1059 1300
1060 /* If this is the first sg to go into the FMR, save our position. 1301 /*
1061 * We need to know the first unmapped entry, its index, and the 1302 * If this is the first sg that will be mapped via FMR or via FR, save
1062 * first unmapped address within that entry to be able to restart 1303 * our position. We need to know the first unmapped entry, its index,
1063 * mapping after an error. 1304 * and the first unmapped address within that entry to be able to
1305 * restart mapping after an error.
1064 */ 1306 */
1065 if (!state->unmapped_sg) 1307 if (!state->unmapped_sg)
1066 srp_map_update_start(state, sg, sg_index, dma_addr); 1308 srp_map_update_start(state, sg, sg_index, dma_addr);
1067 1309
1068 while (dma_len) { 1310 while (dma_len) {
1069 if (state->npages == dev->max_pages_per_mr) { 1311 unsigned offset = dma_addr & ~dev->mr_page_mask;
1312 if (state->npages == dev->max_pages_per_mr || offset != 0) {
1070 ret = srp_finish_mapping(state, target); 1313 ret = srp_finish_mapping(state, target);
1071 if (ret) 1314 if (ret)
1072 return ret; 1315 return ret;
@@ -1074,17 +1317,18 @@ static int srp_map_sg_entry(struct srp_map_state *state,
1074 srp_map_update_start(state, sg, sg_index, dma_addr); 1317 srp_map_update_start(state, sg, sg_index, dma_addr);
1075 } 1318 }
1076 1319
1077 len = min_t(unsigned int, dma_len, dev->mr_page_size); 1320 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1078 1321
1079 if (!state->npages) 1322 if (!state->npages)
1080 state->base_dma_addr = dma_addr; 1323 state->base_dma_addr = dma_addr;
1081 state->pages[state->npages++] = dma_addr; 1324 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1082 state->dma_len += len; 1325 state->dma_len += len;
1083 dma_addr += len; 1326 dma_addr += len;
1084 dma_len -= len; 1327 dma_len -= len;
1085 } 1328 }
1086 1329
1087 /* If the last entry of the FMR wasn't a full page, then we need to 1330 /*
1331 * If the last entry of the MR wasn't a full page, then we need to
1088 * close it out and start a new one -- we can only merge at page 1332 * close it out and start a new one -- we can only merge at page
1089 * boundries. 1333 * boundries.
1090 */ 1334 */
@@ -1097,25 +1341,32 @@ static int srp_map_sg_entry(struct srp_map_state *state,
1097 return ret; 1341 return ret;
1098} 1342}
1099 1343
1100static void srp_map_fmr(struct srp_map_state *state, 1344static int srp_map_sg(struct srp_map_state *state,
1101 struct srp_target_port *target, struct srp_request *req, 1345 struct srp_target_port *target, struct srp_request *req,
1102 struct scatterlist *scat, int count) 1346 struct scatterlist *scat, int count)
1103{ 1347{
1104 struct srp_device *dev = target->srp_host->srp_dev; 1348 struct srp_device *dev = target->srp_host->srp_dev;
1105 struct ib_device *ibdev = dev->dev; 1349 struct ib_device *ibdev = dev->dev;
1106 struct scatterlist *sg; 1350 struct scatterlist *sg;
1107 int i, use_fmr; 1351 int i;
1352 bool use_mr;
1108 1353
1109 state->desc = req->indirect_desc; 1354 state->desc = req->indirect_desc;
1110 state->pages = req->map_page; 1355 state->pages = req->map_page;
1111 state->next_fmr = req->fmr_list; 1356 if (dev->use_fast_reg) {
1112 1357 state->next_fr = req->fr_list;
1113 use_fmr = target->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR; 1358 use_mr = !!target->fr_pool;
1359 } else {
1360 state->next_fmr = req->fmr_list;
1361 use_mr = !!target->fmr_pool;
1362 }
1114 1363
1115 for_each_sg(scat, sg, count, i) { 1364 for_each_sg(scat, sg, count, i) {
1116 if (srp_map_sg_entry(state, target, sg, i, use_fmr)) { 1365 if (srp_map_sg_entry(state, target, sg, i, use_mr)) {
1117 /* FMR mapping failed, so backtrack to the first 1366 /*
1118 * unmapped entry and continue on without using FMR. 1367 * Memory registration failed, so backtrack to the
1368 * first unmapped entry and continue on without using
1369 * memory registration.
1119 */ 1370 */
1120 dma_addr_t dma_addr; 1371 dma_addr_t dma_addr;
1121 unsigned int dma_len; 1372 unsigned int dma_len;
@@ -1128,15 +1379,17 @@ backtrack:
1128 dma_len = ib_sg_dma_len(ibdev, sg); 1379 dma_len = ib_sg_dma_len(ibdev, sg);
1129 dma_len -= (state->unmapped_addr - dma_addr); 1380 dma_len -= (state->unmapped_addr - dma_addr);
1130 dma_addr = state->unmapped_addr; 1381 dma_addr = state->unmapped_addr;
1131 use_fmr = SRP_MAP_NO_FMR; 1382 use_mr = false;
1132 srp_map_desc(state, dma_addr, dma_len, target->rkey); 1383 srp_map_desc(state, dma_addr, dma_len, target->rkey);
1133 } 1384 }
1134 } 1385 }
1135 1386
1136 if (use_fmr == SRP_MAP_ALLOW_FMR && srp_finish_mapping(state, target)) 1387 if (use_mr && srp_finish_mapping(state, target))
1137 goto backtrack; 1388 goto backtrack;
1138 1389
1139 req->nmdesc = state->nmdesc; 1390 req->nmdesc = state->nmdesc;
1391
1392 return 0;
1140} 1393}
1141 1394
1142static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, 1395static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
@@ -1193,9 +1446,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
1193 goto map_complete; 1446 goto map_complete;
1194 } 1447 }
1195 1448
1196 /* We have more than one scatter/gather entry, so build our indirect 1449 /*
1197 * descriptor table, trying to merge as many entries with FMR as we 1450 * We have more than one scatter/gather entry, so build our indirect
1198 * can. 1451 * descriptor table, trying to merge as many entries as we can.
1199 */ 1452 */
1200 indirect_hdr = (void *) cmd->add_data; 1453 indirect_hdr = (void *) cmd->add_data;
1201 1454
@@ -1203,7 +1456,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
1203 target->indirect_size, DMA_TO_DEVICE); 1456 target->indirect_size, DMA_TO_DEVICE);
1204 1457
1205 memset(&state, 0, sizeof(state)); 1458 memset(&state, 0, sizeof(state));
1206 srp_map_fmr(&state, target, req, scat, count); 1459 srp_map_sg(&state, target, req, scat, count);
1207 1460
1208 /* We've mapped the request, now pull as much of the indirect 1461 /* We've mapped the request, now pull as much of the indirect
1209 * descriptor table as we can into the command buffer. If this 1462 * descriptor table as we can into the command buffer. If this
@@ -1212,7 +1465,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
1212 * give us more S/G entries than we allow. 1465 * give us more S/G entries than we allow.
1213 */ 1466 */
1214 if (state.ndesc == 1) { 1467 if (state.ndesc == 1) {
1215 /* FMR mapping was able to collapse this to one entry, 1468 /*
1469 * Memory registration collapsed the sg-list into one entry,
1216 * so use a direct descriptor. 1470 * so use a direct descriptor.
1217 */ 1471 */
1218 struct srp_direct_buf *buf = (void *) cmd->add_data; 1472 struct srp_direct_buf *buf = (void *) cmd->add_data;
@@ -1535,14 +1789,24 @@ static void srp_tl_err_work(struct work_struct *work)
1535 srp_start_tl_fail_timers(target->rport); 1789 srp_start_tl_fail_timers(target->rport);
1536} 1790}
1537 1791
1538static void srp_handle_qp_err(enum ib_wc_status wc_status, bool send_err, 1792static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
1539 struct srp_target_port *target) 1793 bool send_err, struct srp_target_port *target)
1540{ 1794{
1541 if (target->connected && !target->qp_in_error) { 1795 if (target->connected && !target->qp_in_error) {
1542 shost_printk(KERN_ERR, target->scsi_host, 1796 if (wr_id & LOCAL_INV_WR_ID_MASK) {
1543 PFX "failed %s status %d\n", 1797 shost_printk(KERN_ERR, target->scsi_host, PFX
1544 send_err ? "send" : "receive", 1798 "LOCAL_INV failed with status %d\n",
1545 wc_status); 1799 wc_status);
1800 } else if (wr_id & FAST_REG_WR_ID_MASK) {
1801 shost_printk(KERN_ERR, target->scsi_host, PFX
1802 "FAST_REG_MR failed status %d\n",
1803 wc_status);
1804 } else {
1805 shost_printk(KERN_ERR, target->scsi_host,
1806 PFX "failed %s status %d for iu %p\n",
1807 send_err ? "send" : "receive",
1808 wc_status, (void *)(uintptr_t)wr_id);
1809 }
1546 queue_work(system_long_wq, &target->tl_err_work); 1810 queue_work(system_long_wq, &target->tl_err_work);
1547 } 1811 }
1548 target->qp_in_error = true; 1812 target->qp_in_error = true;
@@ -1558,7 +1822,7 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
1558 if (likely(wc.status == IB_WC_SUCCESS)) { 1822 if (likely(wc.status == IB_WC_SUCCESS)) {
1559 srp_handle_recv(target, &wc); 1823 srp_handle_recv(target, &wc);
1560 } else { 1824 } else {
1561 srp_handle_qp_err(wc.status, false, target); 1825 srp_handle_qp_err(wc.wr_id, wc.status, false, target);
1562 } 1826 }
1563 } 1827 }
1564} 1828}
@@ -1574,7 +1838,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
1574 iu = (struct srp_iu *) (uintptr_t) wc.wr_id; 1838 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
1575 list_add(&iu->list, &target->free_tx); 1839 list_add(&iu->list, &target->free_tx);
1576 } else { 1840 } else {
1577 srp_handle_qp_err(wc.status, true, target); 1841 srp_handle_qp_err(wc.wr_id, wc.status, true, target);
1578 } 1842 }
1579 } 1843 }
1580} 1844}
@@ -2737,9 +3001,9 @@ static ssize_t srp_create_target(struct device *dev,
2737 goto err; 3001 goto err;
2738 } 3002 }
2739 3003
2740 if (!srp_dev->has_fmr && !target->allow_ext_sg && 3004 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
2741 target->cmd_sg_cnt < target->sg_tablesize) { 3005 target->cmd_sg_cnt < target->sg_tablesize) {
2742 pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); 3006 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
2743 target->sg_tablesize = target->cmd_sg_cnt; 3007 target->sg_tablesize = target->cmd_sg_cnt;
2744 } 3008 }
2745 3009
@@ -2896,6 +3160,13 @@ static void srp_add_one(struct ib_device *device)
2896 3160
2897 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && 3161 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
2898 device->map_phys_fmr && device->unmap_fmr); 3162 device->map_phys_fmr && device->unmap_fmr);
3163 srp_dev->has_fr = (dev_attr->device_cap_flags &
3164 IB_DEVICE_MEM_MGT_EXTENSIONS);
3165 if (!srp_dev->has_fmr && !srp_dev->has_fr)
3166 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3167
3168 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3169 (!srp_dev->has_fmr || prefer_fr));
2899 3170
2900 /* 3171 /*
2901 * Use the smallest page size supported by the HCA, down to a 3172 * Use the smallest page size supported by the HCA, down to a
@@ -2909,10 +3180,16 @@ static void srp_add_one(struct ib_device *device)
2909 do_div(max_pages_per_mr, srp_dev->mr_page_size); 3180 do_div(max_pages_per_mr, srp_dev->mr_page_size);
2910 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, 3181 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
2911 max_pages_per_mr); 3182 max_pages_per_mr);
3183 if (srp_dev->use_fast_reg) {
3184 srp_dev->max_pages_per_mr =
3185 min_t(u32, srp_dev->max_pages_per_mr,
3186 dev_attr->max_fast_reg_page_list_len);
3187 }
2912 srp_dev->mr_max_size = srp_dev->mr_page_size * 3188 srp_dev->mr_max_size = srp_dev->mr_page_size *
2913 srp_dev->max_pages_per_mr; 3189 srp_dev->max_pages_per_mr;
2914 pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, max_pages_per_mr = %d, mr_max_size = %#x\n", 3190 pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
2915 device->name, mr_page_shift, dev_attr->max_mr_size, 3191 device->name, mr_page_shift, dev_attr->max_mr_size,
3192 dev_attr->max_fast_reg_page_list_len,
2916 srp_dev->max_pages_per_mr, srp_dev->mr_max_size); 3193 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
2917 3194
2918 INIT_LIST_HEAD(&srp_dev->dev_list); 3195 INIT_LIST_HEAD(&srp_dev->dev_list);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index eb130486b1c8..e46ecb15aa0d 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -68,8 +68,8 @@ enum {
68 68
69 SRP_MAX_PAGES_PER_MR = 512, 69 SRP_MAX_PAGES_PER_MR = 512,
70 70
71 SRP_MAP_ALLOW_FMR = 0, 71 LOCAL_INV_WR_ID_MASK = 1,
72 SRP_MAP_NO_FMR = 1, 72 FAST_REG_WR_ID_MASK = 2,
73}; 73};
74 74
75enum srp_target_state { 75enum srp_target_state {
@@ -83,6 +83,12 @@ enum srp_iu_type {
83 SRP_IU_RSP, 83 SRP_IU_RSP,
84}; 84};
85 85
86/*
87 * @mr_page_mask: HCA memory registration page mask.
88 * @mr_page_size: HCA memory registration page size.
89 * @mr_max_size: Maximum size in bytes of a single FMR / FR registration
90 * request.
91 */
86struct srp_device { 92struct srp_device {
87 struct list_head dev_list; 93 struct list_head dev_list;
88 struct ib_device *dev; 94 struct ib_device *dev;
@@ -93,6 +99,8 @@ struct srp_device {
93 int mr_max_size; 99 int mr_max_size;
94 int max_pages_per_mr; 100 int max_pages_per_mr;
95 bool has_fmr; 101 bool has_fmr;
102 bool has_fr;
103 bool use_fast_reg;
96}; 104};
97 105
98struct srp_host { 106struct srp_host {
@@ -110,7 +118,10 @@ struct srp_request {
110 struct list_head list; 118 struct list_head list;
111 struct scsi_cmnd *scmnd; 119 struct scsi_cmnd *scmnd;
112 struct srp_iu *cmd; 120 struct srp_iu *cmd;
113 struct ib_pool_fmr **fmr_list; 121 union {
122 struct ib_pool_fmr **fmr_list;
123 struct srp_fr_desc **fr_list;
124 };
114 u64 *map_page; 125 u64 *map_page;
115 struct srp_direct_buf *indirect_desc; 126 struct srp_direct_buf *indirect_desc;
116 dma_addr_t indirect_dma_addr; 127 dma_addr_t indirect_dma_addr;
@@ -129,7 +140,10 @@ struct srp_target_port {
129 struct ib_cq *send_cq ____cacheline_aligned_in_smp; 140 struct ib_cq *send_cq ____cacheline_aligned_in_smp;
130 struct ib_cq *recv_cq; 141 struct ib_cq *recv_cq;
131 struct ib_qp *qp; 142 struct ib_qp *qp;
132 struct ib_fmr_pool *fmr_pool; 143 union {
144 struct ib_fmr_pool *fmr_pool;
145 struct srp_fr_pool *fr_pool;
146 };
133 u32 lkey; 147 u32 lkey;
134 u32 rkey; 148 u32 rkey;
135 enum srp_target_state state; 149 enum srp_target_state state;
@@ -196,8 +210,59 @@ struct srp_iu {
196 enum dma_data_direction direction; 210 enum dma_data_direction direction;
197}; 211};
198 212
213/**
214 * struct srp_fr_desc - fast registration work request arguments
215 * @entry: Entry in srp_fr_pool.free_list.
216 * @mr: Memory region.
217 * @frpl: Fast registration page list.
218 */
219struct srp_fr_desc {
220 struct list_head entry;
221 struct ib_mr *mr;
222 struct ib_fast_reg_page_list *frpl;
223};
224
225/**
226 * struct srp_fr_pool - pool of fast registration descriptors
227 *
228 * An entry is available for allocation if and only if it occurs in @free_list.
229 *
230 * @size: Number of descriptors in this pool.
231 * @max_page_list_len: Maximum fast registration work request page list length.
232 * @lock: Protects free_list.
233 * @free_list: List of free descriptors.
234 * @desc: Fast registration descriptor pool.
235 */
236struct srp_fr_pool {
237 int size;
238 int max_page_list_len;
239 spinlock_t lock;
240 struct list_head free_list;
241 struct srp_fr_desc desc[0];
242};
243
244/**
245 * struct srp_map_state - per-request DMA memory mapping state
246 * @desc: Pointer to the element of the SRP buffer descriptor array
247 * that is being filled in.
248 * @pages: Array with DMA addresses of pages being considered for
249 * memory registration.
250 * @base_dma_addr: DMA address of the first page that has not yet been mapped.
251 * @dma_len: Number of bytes that will be registered with the next
252 * FMR or FR memory registration call.
253 * @total_len: Total number of bytes in the sg-list being mapped.
254 * @npages: Number of page addresses in the pages[] array.
255 * @nmdesc: Number of FMR or FR memory descriptors used for mapping.
256 * @ndesc: Number of SRP buffer descriptors that have been filled in.
257 * @unmapped_sg: First element of the sg-list that is mapped via FMR or FR.
258 * @unmapped_index: Index of the first element mapped via FMR or FR.
259 * @unmapped_addr: DMA address of the first element mapped via FMR or FR.
260 */
199struct srp_map_state { 261struct srp_map_state {
200 struct ib_pool_fmr **next_fmr; 262 union {
263 struct ib_pool_fmr **next_fmr;
264 struct srp_fr_desc **next_fr;
265 };
201 struct srp_direct_buf *desc; 266 struct srp_direct_buf *desc;
202 u64 *pages; 267 u64 *pages;
203 dma_addr_t base_dma_addr; 268 dma_addr_t base_dma_addr;