aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBart Van Assche <bvanassche@acm.org>2014-05-20 09:07:20 -0400
committerRoland Dreier <roland@purestorage.com>2014-05-20 12:20:52 -0400
commitd1b4289e16477fe13e95b88ffb7067c87b10ab6e (patch)
tree674cb79bacd5ea13e87ba6fd4bf6cdaddd76a05a
parentb1b8854d1622b99b64cd98ed307ffd168c6d3ebd (diff)
IB/srp: One FMR pool per SRP connection
Allocate one FMR pool per SRP connection instead of one SRP pool per HCA. This improves scalability of the SRP initiator. Only request the SCSI mid-layer to retry a SCSI command after a temporary mapping failure (-ENOMEM) but not after a permanent mapping failure. This avoids that SCSI commands are retried indefinitely if a permanent memory mapping failure occurs. Tell the SCSI mid-layer to reduce queue depth temporarily in the unlikely case where an application is queuing many requests with more than max_pages_per_fmr sg-list elements. For FMR pool allocation, base the max_pages_per_fmr parameter on the HCA memory registration limit. Only try to allocate an FMR pool if FMR is supported. Signed-off-by: Bart Van Assche <bvanassche@acm.org> Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c129
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h7
2 files changed, 84 insertions, 52 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 77ba96553b27..80dfe173deac 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -293,12 +293,31 @@ static int srp_new_cm_id(struct srp_target_port *target)
293 return 0; 293 return 0;
294} 294}
295 295
296static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
297{
298 struct srp_device *dev = target->srp_host->srp_dev;
299 struct ib_fmr_pool_param fmr_param;
300
301 memset(&fmr_param, 0, sizeof(fmr_param));
302 fmr_param.pool_size = target->scsi_host->can_queue;
303 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
304 fmr_param.cache = 1;
305 fmr_param.max_pages_per_fmr = dev->max_pages_per_fmr;
306 fmr_param.page_shift = ilog2(dev->fmr_page_size);
307 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
308 IB_ACCESS_REMOTE_WRITE |
309 IB_ACCESS_REMOTE_READ);
310
311 return ib_create_fmr_pool(dev->pd, &fmr_param);
312}
313
296static int srp_create_target_ib(struct srp_target_port *target) 314static int srp_create_target_ib(struct srp_target_port *target)
297{ 315{
298 struct srp_device *dev = target->srp_host->srp_dev; 316 struct srp_device *dev = target->srp_host->srp_dev;
299 struct ib_qp_init_attr *init_attr; 317 struct ib_qp_init_attr *init_attr;
300 struct ib_cq *recv_cq, *send_cq; 318 struct ib_cq *recv_cq, *send_cq;
301 struct ib_qp *qp; 319 struct ib_qp *qp;
320 struct ib_fmr_pool *fmr_pool = NULL;
302 int ret; 321 int ret;
303 322
304 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); 323 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
@@ -341,6 +360,19 @@ static int srp_create_target_ib(struct srp_target_port *target)
341 if (ret) 360 if (ret)
342 goto err_qp; 361 goto err_qp;
343 362
363 if (dev->has_fmr) {
364 fmr_pool = srp_alloc_fmr_pool(target);
365 if (IS_ERR(fmr_pool)) {
366 ret = PTR_ERR(fmr_pool);
367 shost_printk(KERN_WARNING, target->scsi_host, PFX
368 "FMR pool allocation failed (%d)\n", ret);
369 goto err_qp;
370 }
371 if (target->fmr_pool)
372 ib_destroy_fmr_pool(target->fmr_pool);
373 target->fmr_pool = fmr_pool;
374 }
375
344 if (target->qp) 376 if (target->qp)
345 ib_destroy_qp(target->qp); 377 ib_destroy_qp(target->qp);
346 if (target->recv_cq) 378 if (target->recv_cq)
@@ -377,6 +409,8 @@ static void srp_free_target_ib(struct srp_target_port *target)
377{ 409{
378 int i; 410 int i;
379 411
412 if (target->fmr_pool)
413 ib_destroy_fmr_pool(target->fmr_pool);
380 ib_destroy_qp(target->qp); 414 ib_destroy_qp(target->qp);
381 ib_destroy_cq(target->send_cq); 415 ib_destroy_cq(target->send_cq);
382 ib_destroy_cq(target->recv_cq); 416 ib_destroy_cq(target->recv_cq);
@@ -623,8 +657,8 @@ static int srp_alloc_req_data(struct srp_target_port *target)
623 req = &target->req_ring[i]; 657 req = &target->req_ring[i];
624 req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *), 658 req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
625 GFP_KERNEL); 659 GFP_KERNEL);
626 req->map_page = kmalloc(SRP_FMR_SIZE * sizeof(void *), 660 req->map_page = kmalloc(srp_dev->max_pages_per_fmr *
627 GFP_KERNEL); 661 sizeof(void *), GFP_KERNEL);
628 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); 662 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
629 if (!req->fmr_list || !req->map_page || !req->indirect_desc) 663 if (!req->fmr_list || !req->map_page || !req->indirect_desc)
630 goto out; 664 goto out;
@@ -936,11 +970,10 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
936static int srp_map_finish_fmr(struct srp_map_state *state, 970static int srp_map_finish_fmr(struct srp_map_state *state,
937 struct srp_target_port *target) 971 struct srp_target_port *target)
938{ 972{
939 struct srp_device *dev = target->srp_host->srp_dev;
940 struct ib_pool_fmr *fmr; 973 struct ib_pool_fmr *fmr;
941 u64 io_addr = 0; 974 u64 io_addr = 0;
942 975
943 fmr = ib_fmr_pool_map_phys(dev->fmr_pool, state->pages, 976 fmr = ib_fmr_pool_map_phys(target->fmr_pool, state->pages,
944 state->npages, io_addr); 977 state->npages, io_addr);
945 if (IS_ERR(fmr)) 978 if (IS_ERR(fmr))
946 return PTR_ERR(fmr); 979 return PTR_ERR(fmr);
@@ -1033,7 +1066,7 @@ static int srp_map_sg_entry(struct srp_map_state *state,
1033 srp_map_update_start(state, sg, sg_index, dma_addr); 1066 srp_map_update_start(state, sg, sg_index, dma_addr);
1034 1067
1035 while (dma_len) { 1068 while (dma_len) {
1036 if (state->npages == SRP_FMR_SIZE) { 1069 if (state->npages == dev->max_pages_per_fmr) {
1037 ret = srp_finish_mapping(state, target); 1070 ret = srp_finish_mapping(state, target);
1038 if (ret) 1071 if (ret)
1039 return ret; 1072 return ret;
@@ -1077,7 +1110,7 @@ static void srp_map_fmr(struct srp_map_state *state,
1077 state->pages = req->map_page; 1110 state->pages = req->map_page;
1078 state->next_fmr = req->fmr_list; 1111 state->next_fmr = req->fmr_list;
1079 1112
1080 use_fmr = dev->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR; 1113 use_fmr = target->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR;
1081 1114
1082 for_each_sg(scat, sg, count, i) { 1115 for_each_sg(scat, sg, count, i) {
1083 if (srp_map_sg_entry(state, target, sg, i, use_fmr)) { 1116 if (srp_map_sg_entry(state, target, sg, i, use_fmr)) {
@@ -1555,7 +1588,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1555 struct srp_cmd *cmd; 1588 struct srp_cmd *cmd;
1556 struct ib_device *dev; 1589 struct ib_device *dev;
1557 unsigned long flags; 1590 unsigned long flags;
1558 int len, result; 1591 int len, ret;
1559 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler; 1592 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
1560 1593
1561 /* 1594 /*
@@ -1567,12 +1600,9 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1567 if (in_scsi_eh) 1600 if (in_scsi_eh)
1568 mutex_lock(&rport->mutex); 1601 mutex_lock(&rport->mutex);
1569 1602
1570 result = srp_chkready(target->rport); 1603 scmnd->result = srp_chkready(target->rport);
1571 if (unlikely(result)) { 1604 if (unlikely(scmnd->result))
1572 scmnd->result = result; 1605 goto err;
1573 scmnd->scsi_done(scmnd);
1574 goto unlock_rport;
1575 }
1576 1606
1577 spin_lock_irqsave(&target->lock, flags); 1607 spin_lock_irqsave(&target->lock, flags);
1578 iu = __srp_get_tx_iu(target, SRP_IU_CMD); 1608 iu = __srp_get_tx_iu(target, SRP_IU_CMD);
@@ -1587,7 +1617,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1587 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len, 1617 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
1588 DMA_TO_DEVICE); 1618 DMA_TO_DEVICE);
1589 1619
1590 scmnd->result = 0;
1591 scmnd->host_scribble = (void *) req; 1620 scmnd->host_scribble = (void *) req;
1592 1621
1593 cmd = iu->buf; 1622 cmd = iu->buf;
@@ -1604,7 +1633,15 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1604 len = srp_map_data(scmnd, target, req); 1633 len = srp_map_data(scmnd, target, req);
1605 if (len < 0) { 1634 if (len < 0) {
1606 shost_printk(KERN_ERR, target->scsi_host, 1635 shost_printk(KERN_ERR, target->scsi_host,
1607 PFX "Failed to map data\n"); 1636 PFX "Failed to map data (%d)\n", len);
1637 /*
1638 * If we ran out of memory descriptors (-ENOMEM) because an
1639 * application is queuing many requests with more than
1640 * max_pages_per_fmr sg-list elements, tell the SCSI mid-layer
1641 * to reduce queue depth temporarily.
1642 */
1643 scmnd->result = len == -ENOMEM ?
1644 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
1608 goto err_iu; 1645 goto err_iu;
1609 } 1646 }
1610 1647
@@ -1616,11 +1653,13 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1616 goto err_unmap; 1653 goto err_unmap;
1617 } 1654 }
1618 1655
1656 ret = 0;
1657
1619unlock_rport: 1658unlock_rport:
1620 if (in_scsi_eh) 1659 if (in_scsi_eh)
1621 mutex_unlock(&rport->mutex); 1660 mutex_unlock(&rport->mutex);
1622 1661
1623 return 0; 1662 return ret;
1624 1663
1625err_unmap: 1664err_unmap:
1626 srp_unmap_data(scmnd, target, req); 1665 srp_unmap_data(scmnd, target, req);
@@ -1640,10 +1679,15 @@ err_iu:
1640err_unlock: 1679err_unlock:
1641 spin_unlock_irqrestore(&target->lock, flags); 1680 spin_unlock_irqrestore(&target->lock, flags);
1642 1681
1643 if (in_scsi_eh) 1682err:
1644 mutex_unlock(&rport->mutex); 1683 if (scmnd->result) {
1684 scmnd->scsi_done(scmnd);
1685 ret = 0;
1686 } else {
1687 ret = SCSI_MLQUEUE_HOST_BUSY;
1688 }
1645 1689
1646 return SCSI_MLQUEUE_HOST_BUSY; 1690 goto unlock_rport;
1647} 1691}
1648 1692
1649/* 1693/*
@@ -2647,7 +2691,8 @@ static ssize_t srp_create_target(struct device *dev,
2647 container_of(dev, struct srp_host, dev); 2691 container_of(dev, struct srp_host, dev);
2648 struct Scsi_Host *target_host; 2692 struct Scsi_Host *target_host;
2649 struct srp_target_port *target; 2693 struct srp_target_port *target;
2650 struct ib_device *ibdev = host->srp_dev->dev; 2694 struct srp_device *srp_dev = host->srp_dev;
2695 struct ib_device *ibdev = srp_dev->dev;
2651 int ret; 2696 int ret;
2652 2697
2653 target_host = scsi_host_alloc(&srp_template, 2698 target_host = scsi_host_alloc(&srp_template,
@@ -2692,8 +2737,8 @@ static ssize_t srp_create_target(struct device *dev,
2692 goto err; 2737 goto err;
2693 } 2738 }
2694 2739
2695 if (!host->srp_dev->fmr_pool && !target->allow_ext_sg && 2740 if (!srp_dev->has_fmr && !target->allow_ext_sg &&
2696 target->cmd_sg_cnt < target->sg_tablesize) { 2741 target->cmd_sg_cnt < target->sg_tablesize) {
2697 pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); 2742 pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
2698 target->sg_tablesize = target->cmd_sg_cnt; 2743 target->sg_tablesize = target->cmd_sg_cnt;
2699 } 2744 }
@@ -2832,9 +2877,9 @@ static void srp_add_one(struct ib_device *device)
2832{ 2877{
2833 struct srp_device *srp_dev; 2878 struct srp_device *srp_dev;
2834 struct ib_device_attr *dev_attr; 2879 struct ib_device_attr *dev_attr;
2835 struct ib_fmr_pool_param fmr_param;
2836 struct srp_host *host; 2880 struct srp_host *host;
2837 int max_pages_per_fmr, fmr_page_shift, s, e, p; 2881 int fmr_page_shift, s, e, p;
2882 u64 max_pages_per_fmr;
2838 2883
2839 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); 2884 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
2840 if (!dev_attr) 2885 if (!dev_attr)
@@ -2849,6 +2894,9 @@ static void srp_add_one(struct ib_device *device)
2849 if (!srp_dev) 2894 if (!srp_dev)
2850 goto free_attr; 2895 goto free_attr;
2851 2896
2897 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
2898 device->map_phys_fmr && device->unmap_fmr);
2899
2852 /* 2900 /*
2853 * Use the smallest page size supported by the HCA, down to a 2901 * Use the smallest page size supported by the HCA, down to a
2854 * minimum of 4096 bytes. We're unlikely to build large sglists 2902 * minimum of 4096 bytes. We're unlikely to build large sglists
@@ -2857,7 +2905,15 @@ static void srp_add_one(struct ib_device *device)
2857 fmr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1); 2905 fmr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
2858 srp_dev->fmr_page_size = 1 << fmr_page_shift; 2906 srp_dev->fmr_page_size = 1 << fmr_page_shift;
2859 srp_dev->fmr_page_mask = ~((u64) srp_dev->fmr_page_size - 1); 2907 srp_dev->fmr_page_mask = ~((u64) srp_dev->fmr_page_size - 1);
2860 srp_dev->fmr_max_size = srp_dev->fmr_page_size * SRP_FMR_SIZE; 2908 max_pages_per_fmr = dev_attr->max_mr_size;
2909 do_div(max_pages_per_fmr, srp_dev->fmr_page_size);
2910 srp_dev->max_pages_per_fmr = min_t(u64, SRP_FMR_SIZE,
2911 max_pages_per_fmr);
2912 srp_dev->fmr_max_size = srp_dev->fmr_page_size *
2913 srp_dev->max_pages_per_fmr;
2914 pr_debug("%s: fmr_page_shift = %d, dev_attr->max_mr_size = %#llx, max_pages_per_fmr = %d, fmr_max_size = %#x\n",
2915 device->name, fmr_page_shift, dev_attr->max_mr_size,
2916 srp_dev->max_pages_per_fmr, srp_dev->fmr_max_size);
2861 2917
2862 INIT_LIST_HEAD(&srp_dev->dev_list); 2918 INIT_LIST_HEAD(&srp_dev->dev_list);
2863 2919
@@ -2873,27 +2929,6 @@ static void srp_add_one(struct ib_device *device)
2873 if (IS_ERR(srp_dev->mr)) 2929 if (IS_ERR(srp_dev->mr))
2874 goto err_pd; 2930 goto err_pd;
2875 2931
2876 for (max_pages_per_fmr = SRP_FMR_SIZE;
2877 max_pages_per_fmr >= SRP_FMR_MIN_SIZE;
2878 max_pages_per_fmr /= 2, srp_dev->fmr_max_size /= 2) {
2879 memset(&fmr_param, 0, sizeof fmr_param);
2880 fmr_param.pool_size = SRP_FMR_POOL_SIZE;
2881 fmr_param.dirty_watermark = SRP_FMR_DIRTY_SIZE;
2882 fmr_param.cache = 1;
2883 fmr_param.max_pages_per_fmr = max_pages_per_fmr;
2884 fmr_param.page_shift = fmr_page_shift;
2885 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
2886 IB_ACCESS_REMOTE_WRITE |
2887 IB_ACCESS_REMOTE_READ);
2888
2889 srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param);
2890 if (!IS_ERR(srp_dev->fmr_pool))
2891 break;
2892 }
2893
2894 if (IS_ERR(srp_dev->fmr_pool))
2895 srp_dev->fmr_pool = NULL;
2896
2897 if (device->node_type == RDMA_NODE_IB_SWITCH) { 2932 if (device->node_type == RDMA_NODE_IB_SWITCH) {
2898 s = 0; 2933 s = 0;
2899 e = 0; 2934 e = 0;
@@ -2956,8 +2991,6 @@ static void srp_remove_one(struct ib_device *device)
2956 kfree(host); 2991 kfree(host);
2957 } 2992 }
2958 2993
2959 if (srp_dev->fmr_pool)
2960 ib_destroy_fmr_pool(srp_dev->fmr_pool);
2961 ib_dereg_mr(srp_dev->mr); 2994 ib_dereg_mr(srp_dev->mr);
2962 ib_dealloc_pd(srp_dev->pd); 2995 ib_dealloc_pd(srp_dev->pd);
2963 2996
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index aad27b7b4a46..2d99e52f2f5c 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -67,9 +67,6 @@ enum {
67 SRP_TAG_TSK_MGMT = 1U << 31, 67 SRP_TAG_TSK_MGMT = 1U << 31,
68 68
69 SRP_FMR_SIZE = 512, 69 SRP_FMR_SIZE = 512,
70 SRP_FMR_MIN_SIZE = 128,
71 SRP_FMR_POOL_SIZE = 1024,
72 SRP_FMR_DIRTY_SIZE = SRP_FMR_POOL_SIZE / 4,
73 70
74 SRP_MAP_ALLOW_FMR = 0, 71 SRP_MAP_ALLOW_FMR = 0,
75 SRP_MAP_NO_FMR = 1, 72 SRP_MAP_NO_FMR = 1,
@@ -91,10 +88,11 @@ struct srp_device {
91 struct ib_device *dev; 88 struct ib_device *dev;
92 struct ib_pd *pd; 89 struct ib_pd *pd;
93 struct ib_mr *mr; 90 struct ib_mr *mr;
94 struct ib_fmr_pool *fmr_pool;
95 u64 fmr_page_mask; 91 u64 fmr_page_mask;
96 int fmr_page_size; 92 int fmr_page_size;
97 int fmr_max_size; 93 int fmr_max_size;
94 int max_pages_per_fmr;
95 bool has_fmr;
98}; 96};
99 97
100struct srp_host { 98struct srp_host {
@@ -131,6 +129,7 @@ struct srp_target_port {
131 struct ib_cq *send_cq ____cacheline_aligned_in_smp; 129 struct ib_cq *send_cq ____cacheline_aligned_in_smp;
132 struct ib_cq *recv_cq; 130 struct ib_cq *recv_cq;
133 struct ib_qp *qp; 131 struct ib_qp *qp;
132 struct ib_fmr_pool *fmr_pool;
134 u32 lkey; 133 u32 lkey;
135 u32 rkey; 134 u32 rkey;
136 enum srp_target_state state; 135 enum srp_target_state state;