aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-20 17:35:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-20 17:35:07 -0400
commit76b584d3125a1f7d8b64e9c522a4555bc2844bde (patch)
treec75dc6b134eeae650372df7c6179f1e43a95953b
parent7992893c5a9fdffa42117f6f749359466e06bdf6 (diff)
parentc16d2750a08c8ccaf98d65f287a8aec91bb9610d (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford: "Primary 4.7 merge window changes - Updates to the new Intel X722 iWARP driver - Updates to the hfi1 driver - Fixes for the iw_cxgb4 driver - Misc core fixes - Generic RDMA READ/WRITE API addition - SRP updates - Misc ipoib updates - Minor mlx5 updates" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (148 commits) IB/mlx5: Fire the CQ completion handler from tasklet net/mlx5_core: Use tasklet for user-space CQ completion events IB/core: Do not require CAP_NET_ADMIN for packet sniffing IB/mlx4: Fix unaligned access in send_reply_to_slave IB/mlx5: Report Scatter FCS device capability when supported IB/mlx5: Add Scatter FCS support for Raw Packet QP IB/core: Add Scatter FCS create flag IB/core: Add Raw Scatter FCS device capability IB/core: Add extended device capability flags i40iw: pass hw_stats by reference rather than by value i40iw: Remove unnecessary synchronize_irq() before free_irq() i40iw: constify i40iw_vf_cqp_ops structure IB/mlx5: Add UARs write-combining and non-cached mapping IB/mlx5: Allow mapping the free running counter on PROT_EXEC IB/mlx4: Use list_for_each_entry_safe IB/SA: Use correct free function IB/core: Fix a potential array overrun in CMA and SA agent IB/core: Remove unnecessary check in ibnl_rcv_msg IB/IWPM: Fix a potential skb leak RDMA/nes: replace custom print_hex_dump() ...
-rw-r--r--drivers/infiniband/core/Makefile4
-rw-r--r--drivers/infiniband/core/cma.c4
-rw-r--r--drivers/infiniband/core/iwcm.c4
-rw-r--r--drivers/infiniband/core/iwpm_util.c1
-rw-r--r--drivers/infiniband/core/mr_pool.c86
-rw-r--r--drivers/infiniband/core/netlink.c5
-rw-r--r--drivers/infiniband/core/rw.c727
-rw-r--r--drivers/infiniband/core/sa_query.c4
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c11
-rw-r--r--drivers/infiniband/core/verbs.c172
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c611
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h14
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c12
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h7
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c148
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.h10
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c185
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_d.h4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c14
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c56
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_osdep.h1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_pble.c9
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_status.h1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_type.h14
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c106
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_user.h36
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c47
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c294
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.h3
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_vf.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_vf.h2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_virtchnl.c102
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c9
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h5
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c7
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c5
-rw-r--r--drivers/infiniband/hw/mlx5/main.c102
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h8
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c25
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c20
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c60
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c43
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h5
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h6
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c6
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c13
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c67
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c2
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c4
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c841
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h69
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c229
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h2
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c729
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h31
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_msg.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c59
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h2
-rw-r--r--drivers/staging/rdma/hfi1/affinity.c93
-rw-r--r--drivers/staging/rdma/hfi1/affinity.h19
-rw-r--r--drivers/staging/rdma/hfi1/chip.c647
-rw-r--r--drivers/staging/rdma/hfi1/chip.h7
-rw-r--r--drivers/staging/rdma/hfi1/chip_registers.h1
-rw-r--r--drivers/staging/rdma/hfi1/diag.c3
-rw-r--r--drivers/staging/rdma/hfi1/driver.c3
-rw-r--r--drivers/staging/rdma/hfi1/firmware.c9
-rw-r--r--drivers/staging/rdma/hfi1/hfi.h11
-rw-r--r--drivers/staging/rdma/hfi1/init.c25
-rw-r--r--drivers/staging/rdma/hfi1/mad.c16
-rw-r--r--drivers/staging/rdma/hfi1/mmu_rb.c35
-rw-r--r--drivers/staging/rdma/hfi1/mmu_rb.h2
-rw-r--r--drivers/staging/rdma/hfi1/pio.c52
-rw-r--r--drivers/staging/rdma/hfi1/pio.h4
-rw-r--r--drivers/staging/rdma/hfi1/platform.c99
-rw-r--r--drivers/staging/rdma/hfi1/qp.c6
-rw-r--r--drivers/staging/rdma/hfi1/qsfp.c58
-rw-r--r--drivers/staging/rdma/hfi1/qsfp.h15
-rw-r--r--drivers/staging/rdma/hfi1/rc.c9
-rw-r--r--drivers/staging/rdma/hfi1/ruc.c20
-rw-r--r--drivers/staging/rdma/hfi1/sysfs.c4
-rw-r--r--drivers/staging/rdma/hfi1/ud.c8
-rw-r--r--drivers/staging/rdma/hfi1/user_exp_rcv.c7
-rw-r--r--drivers/staging/rdma/hfi1/user_sdma.c97
-rw-r--r--drivers/staging/rdma/hfi1/verbs.c108
-rw-r--r--drivers/staging/rdma/hfi1/verbs.h4
-rw-r--r--drivers/target/target_core_transport.c32
-rw-r--r--drivers/target/target_core_xcopy.c2
-rw-r--r--include/linux/mlx5/cq.h5
-rw-r--r--include/linux/mlx5/driver.h10
-rw-r--r--include/rdma/ib_verbs.h61
-rw-r--r--include/rdma/mr_pool.h25
-rw-r--r--include/rdma/rdma_vt.h1
-rw-r--r--include/rdma/rdmavt_qp.h5
-rw-r--r--include/rdma/rw.h88
-rw-r--r--include/target/target_core_backend.h1
-rw-r--r--include/target/target_core_fabric.h4
-rw-r--r--include/uapi/rdma/ib_user_verbs.h1
-rw-r--r--net/rds/ib_frmr.c2
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c2
111 files changed, 4116 insertions, 2605 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index f818538a7f4e..26987d9d7e1c 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -8,9 +8,9 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
8obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ 8obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
9 $(user_access-y) 9 $(user_access-y)
10 10
11ib_core-y := packer.o ud_header.o verbs.o cq.o sysfs.o \ 11ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
12 device.o fmr_pool.o cache.o netlink.o \ 12 device.o fmr_pool.o cache.o netlink.o \
13 roce_gid_mgmt.o 13 roce_gid_mgmt.o mr_pool.o
14ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o 14ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
15ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o 15ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
16 16
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 93ab0ae97208..f0c91ba3178a 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -800,6 +800,7 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
800 if (id->device != pd->device) 800 if (id->device != pd->device)
801 return -EINVAL; 801 return -EINVAL;
802 802
803 qp_init_attr->port_num = id->port_num;
803 qp = ib_create_qp(pd, qp_init_attr); 804 qp = ib_create_qp(pd, qp_init_attr);
804 if (IS_ERR(qp)) 805 if (IS_ERR(qp))
805 return PTR_ERR(qp); 806 return PTR_ERR(qp);
@@ -4294,7 +4295,8 @@ static int __init cma_init(void)
4294 if (ret) 4295 if (ret)
4295 goto err; 4296 goto err;
4296 4297
4297 if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table)) 4298 if (ibnl_add_client(RDMA_NL_RDMA_CM, ARRAY_SIZE(cma_cb_table),
4299 cma_cb_table))
4298 pr_warn("RDMA CMA: failed to add netlink callback\n"); 4300 pr_warn("RDMA CMA: failed to add netlink callback\n");
4299 cma_configfs_init(); 4301 cma_configfs_init();
4300 4302
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index e28a160cdab0..f0572049d291 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -459,7 +459,7 @@ static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr,
459 if (pm_addr->ss_family == AF_INET) { 459 if (pm_addr->ss_family == AF_INET) {
460 struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr; 460 struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr;
461 461
462 if (pm4_addr->sin_addr.s_addr == INADDR_ANY) { 462 if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) {
463 struct sockaddr_in *cm4_addr = 463 struct sockaddr_in *cm4_addr =
464 (struct sockaddr_in *)cm_addr; 464 (struct sockaddr_in *)cm_addr;
465 struct sockaddr_in *cm4_outaddr = 465 struct sockaddr_in *cm4_outaddr =
@@ -1175,7 +1175,7 @@ static int __init iw_cm_init(void)
1175 if (ret) 1175 if (ret)
1176 pr_err("iw_cm: couldn't init iwpm\n"); 1176 pr_err("iw_cm: couldn't init iwpm\n");
1177 1177
1178 ret = ibnl_add_client(RDMA_NL_IWCM, RDMA_NL_IWPM_NUM_OPS, 1178 ret = ibnl_add_client(RDMA_NL_IWCM, ARRAY_SIZE(iwcm_nl_cb_table),
1179 iwcm_nl_cb_table); 1179 iwcm_nl_cb_table);
1180 if (ret) 1180 if (ret)
1181 pr_err("iw_cm: couldn't register netlink callbacks\n"); 1181 pr_err("iw_cm: couldn't register netlink callbacks\n");
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 9b2bf2fb2b00..b65e06c560d7 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -634,6 +634,7 @@ static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid)
634 if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client, 634 if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client,
635 RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) { 635 RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) {
636 pr_warn("%s Unable to put NLMSG_DONE\n", __func__); 636 pr_warn("%s Unable to put NLMSG_DONE\n", __func__);
637 dev_kfree_skb(skb);
637 return -ENOMEM; 638 return -ENOMEM;
638 } 639 }
639 nlh->nlmsg_type = NLMSG_DONE; 640 nlh->nlmsg_type = NLMSG_DONE;
diff --git a/drivers/infiniband/core/mr_pool.c b/drivers/infiniband/core/mr_pool.c
new file mode 100644
index 000000000000..49d478b2ea94
--- /dev/null
+++ b/drivers/infiniband/core/mr_pool.c
@@ -0,0 +1,86 @@
1/*
2 * Copyright (c) 2016 HGST, a Western Digital Company.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#include <rdma/ib_verbs.h>
14#include <rdma/mr_pool.h>
15
16struct ib_mr *ib_mr_pool_get(struct ib_qp *qp, struct list_head *list)
17{
18 struct ib_mr *mr;
19 unsigned long flags;
20
21 spin_lock_irqsave(&qp->mr_lock, flags);
22 mr = list_first_entry_or_null(list, struct ib_mr, qp_entry);
23 if (mr) {
24 list_del(&mr->qp_entry);
25 qp->mrs_used++;
26 }
27 spin_unlock_irqrestore(&qp->mr_lock, flags);
28
29 return mr;
30}
31EXPORT_SYMBOL(ib_mr_pool_get);
32
33void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr)
34{
35 unsigned long flags;
36
37 spin_lock_irqsave(&qp->mr_lock, flags);
38 list_add(&mr->qp_entry, list);
39 qp->mrs_used--;
40 spin_unlock_irqrestore(&qp->mr_lock, flags);
41}
42EXPORT_SYMBOL(ib_mr_pool_put);
43
44int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr,
45 enum ib_mr_type type, u32 max_num_sg)
46{
47 struct ib_mr *mr;
48 unsigned long flags;
49 int ret, i;
50
51 for (i = 0; i < nr; i++) {
52 mr = ib_alloc_mr(qp->pd, type, max_num_sg);
53 if (IS_ERR(mr)) {
54 ret = PTR_ERR(mr);
55 goto out;
56 }
57
58 spin_lock_irqsave(&qp->mr_lock, flags);
59 list_add_tail(&mr->qp_entry, list);
60 spin_unlock_irqrestore(&qp->mr_lock, flags);
61 }
62
63 return 0;
64out:
65 ib_mr_pool_destroy(qp, list);
66 return ret;
67}
68EXPORT_SYMBOL(ib_mr_pool_init);
69
70void ib_mr_pool_destroy(struct ib_qp *qp, struct list_head *list)
71{
72 struct ib_mr *mr;
73 unsigned long flags;
74
75 spin_lock_irqsave(&qp->mr_lock, flags);
76 while (!list_empty(list)) {
77 mr = list_first_entry(list, struct ib_mr, qp_entry);
78 list_del(&mr->qp_entry);
79
80 spin_unlock_irqrestore(&qp->mr_lock, flags);
81 ib_dereg_mr(mr);
82 spin_lock_irqsave(&qp->mr_lock, flags);
83 }
84 spin_unlock_irqrestore(&qp->mr_lock, flags);
85}
86EXPORT_SYMBOL(ib_mr_pool_destroy);
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index d47df9356779..9b8c20c8209b 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -151,12 +151,11 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
151 struct ibnl_client *client; 151 struct ibnl_client *client;
152 int type = nlh->nlmsg_type; 152 int type = nlh->nlmsg_type;
153 int index = RDMA_NL_GET_CLIENT(type); 153 int index = RDMA_NL_GET_CLIENT(type);
154 int op = RDMA_NL_GET_OP(type); 154 unsigned int op = RDMA_NL_GET_OP(type);
155 155
156 list_for_each_entry(client, &client_list, list) { 156 list_for_each_entry(client, &client_list, list) {
157 if (client->index == index) { 157 if (client->index == index) {
158 if (op < 0 || op >= client->nops || 158 if (op >= client->nops || !client->cb_table[op].dump)
159 !client->cb_table[op].dump)
160 return -EINVAL; 159 return -EINVAL;
161 160
162 /* 161 /*
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
new file mode 100644
index 000000000000..1eb9b1294a63
--- /dev/null
+++ b/drivers/infiniband/core/rw.c
@@ -0,0 +1,727 @@
1/*
2 * Copyright (c) 2016 HGST, a Western Digital Company.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#include <linux/moduleparam.h>
14#include <linux/slab.h>
15#include <rdma/mr_pool.h>
16#include <rdma/rw.h>
17
18enum {
19 RDMA_RW_SINGLE_WR,
20 RDMA_RW_MULTI_WR,
21 RDMA_RW_MR,
22 RDMA_RW_SIG_MR,
23};
24
25static bool rdma_rw_force_mr;
26module_param_named(force_mr, rdma_rw_force_mr, bool, 0);
27MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations");
28
29/*
30 * Check if the device might use memory registration. This is currently only
31 * true for iWarp devices. In the future we can hopefully fine tune this based
32 * on HCA driver input.
33 */
34static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
35{
36 if (rdma_protocol_iwarp(dev, port_num))
37 return true;
38 if (unlikely(rdma_rw_force_mr))
39 return true;
40 return false;
41}
42
43/*
44 * Check if the device will use memory registration for this RW operation.
45 * We currently always use memory registrations for iWarp RDMA READs, and
46 * have a debug option to force usage of MRs.
47 *
48 * XXX: In the future we can hopefully fine tune this based on HCA driver
49 * input.
50 */
51static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num,
52 enum dma_data_direction dir, int dma_nents)
53{
54 if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE)
55 return true;
56 if (unlikely(rdma_rw_force_mr))
57 return true;
58 return false;
59}
60
61static inline u32 rdma_rw_max_sge(struct ib_device *dev,
62 enum dma_data_direction dir)
63{
64 return dir == DMA_TO_DEVICE ?
65 dev->attrs.max_sge : dev->attrs.max_sge_rd;
66}
67
68static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev)
69{
70 /* arbitrary limit to avoid allocating gigantic resources */
71 return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256);
72}
73
74static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
75 struct rdma_rw_reg_ctx *reg, struct scatterlist *sg,
76 u32 sg_cnt, u32 offset)
77{
78 u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
79 u32 nents = min(sg_cnt, pages_per_mr);
80 int count = 0, ret;
81
82 reg->mr = ib_mr_pool_get(qp, &qp->rdma_mrs);
83 if (!reg->mr)
84 return -EAGAIN;
85
86 if (reg->mr->need_inval) {
87 reg->inv_wr.opcode = IB_WR_LOCAL_INV;
88 reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey;
89 reg->inv_wr.next = &reg->reg_wr.wr;
90 count++;
91 } else {
92 reg->inv_wr.next = NULL;
93 }
94
95 ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE);
96 if (ret < nents) {
97 ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr);
98 return -EINVAL;
99 }
100
101 reg->reg_wr.wr.opcode = IB_WR_REG_MR;
102 reg->reg_wr.mr = reg->mr;
103 reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
104 if (rdma_protocol_iwarp(qp->device, port_num))
105 reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
106 count++;
107
108 reg->sge.addr = reg->mr->iova;
109 reg->sge.length = reg->mr->length;
110 return count;
111}
112
113static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
114 u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
115 u64 remote_addr, u32 rkey, enum dma_data_direction dir)
116{
117 u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
118 int i, j, ret = 0, count = 0;
119
120 ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr;
121 ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL);
122 if (!ctx->reg) {
123 ret = -ENOMEM;
124 goto out;
125 }
126
127 for (i = 0; i < ctx->nr_ops; i++) {
128 struct rdma_rw_reg_ctx *prev = i ? &ctx->reg[i - 1] : NULL;
129 struct rdma_rw_reg_ctx *reg = &ctx->reg[i];
130 u32 nents = min(sg_cnt, pages_per_mr);
131
132 ret = rdma_rw_init_one_mr(qp, port_num, reg, sg, sg_cnt,
133 offset);
134 if (ret < 0)
135 goto out_free;
136 count += ret;
137
138 if (prev) {
139 if (reg->mr->need_inval)
140 prev->wr.wr.next = &reg->inv_wr;
141 else
142 prev->wr.wr.next = &reg->reg_wr.wr;
143 }
144
145 reg->reg_wr.wr.next = &reg->wr.wr;
146
147 reg->wr.wr.sg_list = &reg->sge;
148 reg->wr.wr.num_sge = 1;
149 reg->wr.remote_addr = remote_addr;
150 reg->wr.rkey = rkey;
151 if (dir == DMA_TO_DEVICE) {
152 reg->wr.wr.opcode = IB_WR_RDMA_WRITE;
153 } else if (!rdma_cap_read_inv(qp->device, port_num)) {
154 reg->wr.wr.opcode = IB_WR_RDMA_READ;
155 } else {
156 reg->wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
157 reg->wr.wr.ex.invalidate_rkey = reg->mr->lkey;
158 }
159 count++;
160
161 remote_addr += reg->sge.length;
162 sg_cnt -= nents;
163 for (j = 0; j < nents; j++)
164 sg = sg_next(sg);
165 offset = 0;
166 }
167
168 ctx->type = RDMA_RW_MR;
169 return count;
170
171out_free:
172 while (--i >= 0)
173 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
174 kfree(ctx->reg);
175out:
176 return ret;
177}
178
179static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
180 struct scatterlist *sg, u32 sg_cnt, u32 offset,
181 u64 remote_addr, u32 rkey, enum dma_data_direction dir)
182{
183 struct ib_device *dev = qp->pd->device;
184 u32 max_sge = rdma_rw_max_sge(dev, dir);
185 struct ib_sge *sge;
186 u32 total_len = 0, i, j;
187
188 ctx->nr_ops = DIV_ROUND_UP(sg_cnt, max_sge);
189
190 ctx->map.sges = sge = kcalloc(sg_cnt, sizeof(*sge), GFP_KERNEL);
191 if (!ctx->map.sges)
192 goto out;
193
194 ctx->map.wrs = kcalloc(ctx->nr_ops, sizeof(*ctx->map.wrs), GFP_KERNEL);
195 if (!ctx->map.wrs)
196 goto out_free_sges;
197
198 for (i = 0; i < ctx->nr_ops; i++) {
199 struct ib_rdma_wr *rdma_wr = &ctx->map.wrs[i];
200 u32 nr_sge = min(sg_cnt, max_sge);
201
202 if (dir == DMA_TO_DEVICE)
203 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
204 else
205 rdma_wr->wr.opcode = IB_WR_RDMA_READ;
206 rdma_wr->remote_addr = remote_addr + total_len;
207 rdma_wr->rkey = rkey;
208 rdma_wr->wr.sg_list = sge;
209
210 for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) {
211 rdma_wr->wr.num_sge++;
212
213 sge->addr = ib_sg_dma_address(dev, sg) + offset;
214 sge->length = ib_sg_dma_len(dev, sg) - offset;
215 sge->lkey = qp->pd->local_dma_lkey;
216
217 total_len += sge->length;
218 sge++;
219 sg_cnt--;
220 offset = 0;
221 }
222
223 if (i + 1 < ctx->nr_ops)
224 rdma_wr->wr.next = &ctx->map.wrs[i + 1].wr;
225 }
226
227 ctx->type = RDMA_RW_MULTI_WR;
228 return ctx->nr_ops;
229
230out_free_sges:
231 kfree(ctx->map.sges);
232out:
233 return -ENOMEM;
234}
235
236static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
237 struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey,
238 enum dma_data_direction dir)
239{
240 struct ib_device *dev = qp->pd->device;
241 struct ib_rdma_wr *rdma_wr = &ctx->single.wr;
242
243 ctx->nr_ops = 1;
244
245 ctx->single.sge.lkey = qp->pd->local_dma_lkey;
246 ctx->single.sge.addr = ib_sg_dma_address(dev, sg) + offset;
247 ctx->single.sge.length = ib_sg_dma_len(dev, sg) - offset;
248
249 memset(rdma_wr, 0, sizeof(*rdma_wr));
250 if (dir == DMA_TO_DEVICE)
251 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
252 else
253 rdma_wr->wr.opcode = IB_WR_RDMA_READ;
254 rdma_wr->wr.sg_list = &ctx->single.sge;
255 rdma_wr->wr.num_sge = 1;
256 rdma_wr->remote_addr = remote_addr;
257 rdma_wr->rkey = rkey;
258
259 ctx->type = RDMA_RW_SINGLE_WR;
260 return 1;
261}
262
263/**
264 * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context
265 * @ctx: context to initialize
266 * @qp: queue pair to operate on
267 * @port_num: port num to which the connection is bound
268 * @sg: scatterlist to READ/WRITE from/to
269 * @sg_cnt: number of entries in @sg
270 * @sg_offset: current byte offset into @sg
271 * @remote_addr:remote address to read/write (relative to @rkey)
272 * @rkey: remote key to operate on
273 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
274 *
275 * Returns the number of WQEs that will be needed on the workqueue if
276 * successful, or a negative error code.
277 */
278int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
279 struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
280 u64 remote_addr, u32 rkey, enum dma_data_direction dir)
281{
282 struct ib_device *dev = qp->pd->device;
283 int ret;
284
285 ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
286 if (!ret)
287 return -ENOMEM;
288 sg_cnt = ret;
289
290 /*
291 * Skip to the S/G entry that sg_offset falls into:
292 */
293 for (;;) {
294 u32 len = ib_sg_dma_len(dev, sg);
295
296 if (sg_offset < len)
297 break;
298
299 sg = sg_next(sg);
300 sg_offset -= len;
301 sg_cnt--;
302 }
303
304 ret = -EIO;
305 if (WARN_ON_ONCE(sg_cnt == 0))
306 goto out_unmap_sg;
307
308 if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) {
309 ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt,
310 sg_offset, remote_addr, rkey, dir);
311 } else if (sg_cnt > 1) {
312 ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset,
313 remote_addr, rkey, dir);
314 } else {
315 ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset,
316 remote_addr, rkey, dir);
317 }
318
319 if (ret < 0)
320 goto out_unmap_sg;
321 return ret;
322
323out_unmap_sg:
324 ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
325 return ret;
326}
327EXPORT_SYMBOL(rdma_rw_ctx_init);
328
329/**
330 * rdma_rw_ctx_signature init - initialize a RW context with signature offload
331 * @ctx: context to initialize
332 * @qp: queue pair to operate on
333 * @port_num: port num to which the connection is bound
334 * @sg: scatterlist to READ/WRITE from/to
335 * @sg_cnt: number of entries in @sg
336 * @prot_sg: scatterlist to READ/WRITE protection information from/to
337 * @prot_sg_cnt: number of entries in @prot_sg
338 * @sig_attrs: signature offloading algorithms
339 * @remote_addr:remote address to read/write (relative to @rkey)
340 * @rkey: remote key to operate on
341 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
342 *
343 * Returns the number of WQEs that will be needed on the workqueue if
344 * successful, or a negative error code.
345 */
346int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
347 u8 port_num, struct scatterlist *sg, u32 sg_cnt,
348 struct scatterlist *prot_sg, u32 prot_sg_cnt,
349 struct ib_sig_attrs *sig_attrs,
350 u64 remote_addr, u32 rkey, enum dma_data_direction dir)
351{
352 struct ib_device *dev = qp->pd->device;
353 u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
354 struct ib_rdma_wr *rdma_wr;
355 struct ib_send_wr *prev_wr = NULL;
356 int count = 0, ret;
357
358 if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) {
359 pr_err("SG count too large\n");
360 return -EINVAL;
361 }
362
363 ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
364 if (!ret)
365 return -ENOMEM;
366 sg_cnt = ret;
367
368 ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir);
369 if (!ret) {
370 ret = -ENOMEM;
371 goto out_unmap_sg;
372 }
373 prot_sg_cnt = ret;
374
375 ctx->type = RDMA_RW_SIG_MR;
376 ctx->nr_ops = 1;
377 ctx->sig = kcalloc(1, sizeof(*ctx->sig), GFP_KERNEL);
378 if (!ctx->sig) {
379 ret = -ENOMEM;
380 goto out_unmap_prot_sg;
381 }
382
383 ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->data, sg, sg_cnt, 0);
384 if (ret < 0)
385 goto out_free_ctx;
386 count += ret;
387 prev_wr = &ctx->sig->data.reg_wr.wr;
388
389 if (prot_sg_cnt) {
390 ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot,
391 prot_sg, prot_sg_cnt, 0);
392 if (ret < 0)
393 goto out_destroy_data_mr;
394 count += ret;
395
396 if (ctx->sig->prot.inv_wr.next)
397 prev_wr->next = &ctx->sig->prot.inv_wr;
398 else
399 prev_wr->next = &ctx->sig->prot.reg_wr.wr;
400 prev_wr = &ctx->sig->prot.reg_wr.wr;
401 } else {
402 ctx->sig->prot.mr = NULL;
403 }
404
405 ctx->sig->sig_mr = ib_mr_pool_get(qp, &qp->sig_mrs);
406 if (!ctx->sig->sig_mr) {
407 ret = -EAGAIN;
408 goto out_destroy_prot_mr;
409 }
410
411 if (ctx->sig->sig_mr->need_inval) {
412 memset(&ctx->sig->sig_inv_wr, 0, sizeof(ctx->sig->sig_inv_wr));
413
414 ctx->sig->sig_inv_wr.opcode = IB_WR_LOCAL_INV;
415 ctx->sig->sig_inv_wr.ex.invalidate_rkey = ctx->sig->sig_mr->rkey;
416
417 prev_wr->next = &ctx->sig->sig_inv_wr;
418 prev_wr = &ctx->sig->sig_inv_wr;
419 }
420
421 ctx->sig->sig_wr.wr.opcode = IB_WR_REG_SIG_MR;
422 ctx->sig->sig_wr.wr.wr_cqe = NULL;
423 ctx->sig->sig_wr.wr.sg_list = &ctx->sig->data.sge;
424 ctx->sig->sig_wr.wr.num_sge = 1;
425 ctx->sig->sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE;
426 ctx->sig->sig_wr.sig_attrs = sig_attrs;
427 ctx->sig->sig_wr.sig_mr = ctx->sig->sig_mr;
428 if (prot_sg_cnt)
429 ctx->sig->sig_wr.prot = &ctx->sig->prot.sge;
430 prev_wr->next = &ctx->sig->sig_wr.wr;
431 prev_wr = &ctx->sig->sig_wr.wr;
432 count++;
433
434 ctx->sig->sig_sge.addr = 0;
435 ctx->sig->sig_sge.length = ctx->sig->data.sge.length;
436 if (sig_attrs->wire.sig_type != IB_SIG_TYPE_NONE)
437 ctx->sig->sig_sge.length += ctx->sig->prot.sge.length;
438
439 rdma_wr = &ctx->sig->data.wr;
440 rdma_wr->wr.sg_list = &ctx->sig->sig_sge;
441 rdma_wr->wr.num_sge = 1;
442 rdma_wr->remote_addr = remote_addr;
443 rdma_wr->rkey = rkey;
444 if (dir == DMA_TO_DEVICE)
445 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
446 else
447 rdma_wr->wr.opcode = IB_WR_RDMA_READ;
448 prev_wr->next = &rdma_wr->wr;
449 prev_wr = &rdma_wr->wr;
450 count++;
451
452 return count;
453
454out_destroy_prot_mr:
455 if (prot_sg_cnt)
456 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr);
457out_destroy_data_mr:
458 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr);
459out_free_ctx:
460 kfree(ctx->sig);
461out_unmap_prot_sg:
462 ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir);
463out_unmap_sg:
464 ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
465 return ret;
466}
467EXPORT_SYMBOL(rdma_rw_ctx_signature_init);
468
469/*
470 * Now that we are going to post the WRs we can update the lkey and need_inval
471 * state on the MRs. If we were doing this at init time, we would get double
472 * or missing invalidations if a context was initialized but not actually
473 * posted.
474 */
475static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval)
476{
477 reg->mr->need_inval = need_inval;
478 ib_update_fast_reg_key(reg->mr, ib_inc_rkey(reg->mr->lkey));
479 reg->reg_wr.key = reg->mr->lkey;
480 reg->sge.lkey = reg->mr->lkey;
481}
482
483/**
484 * rdma_rw_ctx_wrs - return chain of WRs for a RDMA READ or WRITE operation
485 * @ctx: context to operate on
486 * @qp: queue pair to operate on
487 * @port_num: port num to which the connection is bound
488 * @cqe: completion queue entry for the last WR
489 * @chain_wr: WR to append to the posted chain
490 *
491 * Return the WR chain for the set of RDMA READ/WRITE operations described by
492 * @ctx, as well as any memory registration operations needed. If @chain_wr
493 * is non-NULL the WR it points to will be appended to the chain of WRs posted.
494 * If @chain_wr is not set @cqe must be set so that the caller gets a
495 * completion notification.
496 */
497struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
498 u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
499{
500 struct ib_send_wr *first_wr, *last_wr;
501 int i;
502
503 switch (ctx->type) {
504 case RDMA_RW_SIG_MR:
505 rdma_rw_update_lkey(&ctx->sig->data, true);
506 if (ctx->sig->prot.mr)
507 rdma_rw_update_lkey(&ctx->sig->prot, true);
508
509 ctx->sig->sig_mr->need_inval = true;
510 ib_update_fast_reg_key(ctx->sig->sig_mr,
511 ib_inc_rkey(ctx->sig->sig_mr->lkey));
512 ctx->sig->sig_sge.lkey = ctx->sig->sig_mr->lkey;
513
514 if (ctx->sig->data.inv_wr.next)
515 first_wr = &ctx->sig->data.inv_wr;
516 else
517 first_wr = &ctx->sig->data.reg_wr.wr;
518 last_wr = &ctx->sig->data.wr.wr;
519 break;
520 case RDMA_RW_MR:
521 for (i = 0; i < ctx->nr_ops; i++) {
522 rdma_rw_update_lkey(&ctx->reg[i],
523 ctx->reg[i].wr.wr.opcode !=
524 IB_WR_RDMA_READ_WITH_INV);
525 }
526
527 if (ctx->reg[0].inv_wr.next)
528 first_wr = &ctx->reg[0].inv_wr;
529 else
530 first_wr = &ctx->reg[0].reg_wr.wr;
531 last_wr = &ctx->reg[ctx->nr_ops - 1].wr.wr;
532 break;
533 case RDMA_RW_MULTI_WR:
534 first_wr = &ctx->map.wrs[0].wr;
535 last_wr = &ctx->map.wrs[ctx->nr_ops - 1].wr;
536 break;
537 case RDMA_RW_SINGLE_WR:
538 first_wr = &ctx->single.wr.wr;
539 last_wr = &ctx->single.wr.wr;
540 break;
541 default:
542 BUG();
543 }
544
545 if (chain_wr) {
546 last_wr->next = chain_wr;
547 } else {
548 last_wr->wr_cqe = cqe;
549 last_wr->send_flags |= IB_SEND_SIGNALED;
550 }
551
552 return first_wr;
553}
554EXPORT_SYMBOL(rdma_rw_ctx_wrs);
555
556/**
557 * rdma_rw_ctx_post - post a RDMA READ or RDMA WRITE operation
558 * @ctx: context to operate on
559 * @qp: queue pair to operate on
560 * @port_num: port num to which the connection is bound
561 * @cqe: completion queue entry for the last WR
562 * @chain_wr: WR to append to the posted chain
563 *
564 * Post the set of RDMA READ/WRITE operations described by @ctx, as well as
565 * any memory registration operations needed. If @chain_wr is non-NULL the
566 * WR it points to will be appended to the chain of WRs posted. If @chain_wr
567 * is not set @cqe must be set so that the caller gets a completion
568 * notification.
569 */
570int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
571 struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
572{
573 struct ib_send_wr *first_wr, *bad_wr;
574
575 first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr);
576 return ib_post_send(qp, first_wr, &bad_wr);
577}
578EXPORT_SYMBOL(rdma_rw_ctx_post);
579
580/**
581 * rdma_rw_ctx_destroy - release all resources allocated by rdma_rw_ctx_init
582 * @ctx: context to release
583 * @qp: queue pair to operate on
584 * @port_num: port num to which the connection is bound
585 * @sg: scatterlist that was used for the READ/WRITE
586 * @sg_cnt: number of entries in @sg
587 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
588 */
589void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
590 struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir)
591{
592 int i;
593
594 switch (ctx->type) {
595 case RDMA_RW_MR:
596 for (i = 0; i < ctx->nr_ops; i++)
597 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
598 kfree(ctx->reg);
599 break;
600 case RDMA_RW_MULTI_WR:
601 kfree(ctx->map.wrs);
602 kfree(ctx->map.sges);
603 break;
604 case RDMA_RW_SINGLE_WR:
605 break;
606 default:
607 BUG();
608 break;
609 }
610
611 ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
612}
613EXPORT_SYMBOL(rdma_rw_ctx_destroy);
614
615/**
616 * rdma_rw_ctx_destroy_signature - release all resources allocated by
617 * rdma_rw_ctx_init_signature
618 * @ctx: context to release
619 * @qp: queue pair to operate on
620 * @port_num: port num to which the connection is bound
621 * @sg: scatterlist that was used for the READ/WRITE
622 * @sg_cnt: number of entries in @sg
623 * @prot_sg: scatterlist that was used for the READ/WRITE of the PI
624 * @prot_sg_cnt: number of entries in @prot_sg
625 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
626 */
627void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
628 u8 port_num, struct scatterlist *sg, u32 sg_cnt,
629 struct scatterlist *prot_sg, u32 prot_sg_cnt,
630 enum dma_data_direction dir)
631{
632 if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR))
633 return;
634
635 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr);
636 ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
637
638 if (ctx->sig->prot.mr) {
639 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr);
640 ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
641 }
642
643 ib_mr_pool_put(qp, &qp->sig_mrs, ctx->sig->sig_mr);
644 kfree(ctx->sig);
645}
646EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
647
648void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
649{
650 u32 factor;
651
652 WARN_ON_ONCE(attr->port_num == 0);
653
654 /*
655 * Each context needs at least one RDMA READ or WRITE WR.
656 *
657 * For some hardware we might need more, eventually we should ask the
658 * HCA driver for a multiplier here.
659 */
660 factor = 1;
661
662 /*
663 * If the devices needs MRs to perform RDMA READ or WRITE operations,
664 * we'll need two additional MRs for the registrations and the
665 * invalidation.
666 */
667 if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
668 factor += 6; /* (inv + reg) * (data + prot + sig) */
669 else if (rdma_rw_can_use_mr(dev, attr->port_num))
670 factor += 2; /* inv + reg */
671
672 attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs;
673
674 /*
675 * But maybe we were just too high in the sky and the device doesn't
676 * even support all we need, and we'll have to live with what we get..
677 */
678 attr->cap.max_send_wr =
679 min_t(u32, attr->cap.max_send_wr, dev->attrs.max_qp_wr);
680}
681
682int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
683{
684 struct ib_device *dev = qp->pd->device;
685 u32 nr_mrs = 0, nr_sig_mrs = 0;
686 int ret = 0;
687
688 if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) {
689 nr_sig_mrs = attr->cap.max_rdma_ctxs;
690 nr_mrs = attr->cap.max_rdma_ctxs * 2;
691 } else if (rdma_rw_can_use_mr(dev, attr->port_num)) {
692 nr_mrs = attr->cap.max_rdma_ctxs;
693 }
694
695 if (nr_mrs) {
696 ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs,
697 IB_MR_TYPE_MEM_REG,
698 rdma_rw_fr_page_list_len(dev));
699 if (ret) {
700 pr_err("%s: failed to allocated %d MRs\n",
701 __func__, nr_mrs);
702 return ret;
703 }
704 }
705
706 if (nr_sig_mrs) {
707 ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs,
708 IB_MR_TYPE_SIGNATURE, 2);
709 if (ret) {
710 pr_err("%s: failed to allocated %d SIG MRs\n",
711 __func__, nr_mrs);
712 goto out_free_rdma_mrs;
713 }
714 }
715
716 return 0;
717
718out_free_rdma_mrs:
719 ib_mr_pool_destroy(qp, &qp->rdma_mrs);
720 return ret;
721}
722
723void rdma_rw_cleanup_mrs(struct ib_qp *qp)
724{
725 ib_mr_pool_destroy(qp, &qp->sig_mrs);
726 ib_mr_pool_destroy(qp, &qp->rdma_mrs);
727}
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 8a09c0fb268d..3ebd108bcc5f 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -536,7 +536,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
536 data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS, 536 data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS,
537 RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST); 537 RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST);
538 if (!data) { 538 if (!data) {
539 kfree_skb(skb); 539 nlmsg_free(skb);
540 return -EMSGSIZE; 540 return -EMSGSIZE;
541 } 541 }
542 542
@@ -1820,7 +1820,7 @@ static int __init ib_sa_init(void)
1820 goto err3; 1820 goto err3;
1821 } 1821 }
1822 1822
1823 if (ibnl_add_client(RDMA_NL_LS, RDMA_NL_LS_NUM_OPS, 1823 if (ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ib_sa_cb_table),
1824 ib_sa_cb_table)) { 1824 ib_sa_cb_table)) {
1825 pr_err("Failed to add netlink callback\n"); 1825 pr_err("Failed to add netlink callback\n");
1826 ret = -EINVAL; 1826 ret = -EINVAL;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 6fdc7ecdaca0..1a8babb8ee3c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1833,7 +1833,8 @@ static int create_qp(struct ib_uverbs_file *file,
1833 if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | 1833 if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
1834 IB_QP_CREATE_CROSS_CHANNEL | 1834 IB_QP_CREATE_CROSS_CHANNEL |
1835 IB_QP_CREATE_MANAGED_SEND | 1835 IB_QP_CREATE_MANAGED_SEND |
1836 IB_QP_CREATE_MANAGED_RECV)) { 1836 IB_QP_CREATE_MANAGED_RECV |
1837 IB_QP_CREATE_SCATTER_FCS)) {
1837 ret = -EINVAL; 1838 ret = -EINVAL;
1838 goto err_put; 1839 goto err_put;
1839 } 1840 }
@@ -3088,8 +3089,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3088 if (cmd.comp_mask) 3089 if (cmd.comp_mask)
3089 return -EINVAL; 3090 return -EINVAL;
3090 3091
3091 if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER && 3092 if (!capable(CAP_NET_RAW))
3092 !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
3093 return -EPERM; 3093 return -EPERM;
3094 3094
3095 if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED) 3095 if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED)
@@ -3655,6 +3655,11 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
3655 resp.hca_core_clock = attr.hca_core_clock; 3655 resp.hca_core_clock = attr.hca_core_clock;
3656 resp.response_length += sizeof(resp.hca_core_clock); 3656 resp.response_length += sizeof(resp.hca_core_clock);
3657 3657
3658 if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex))
3659 goto end;
3660
3661 resp.device_cap_flags_ex = attr.device_cap_flags;
3662 resp.response_length += sizeof(resp.device_cap_flags_ex);
3658end: 3663end:
3659 err = ib_copy_to_udata(ucore, &resp, resp.response_length); 3664 err = ib_copy_to_udata(ucore, &resp, resp.response_length);
3660 return err; 3665 return err;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index b65b3541e732..1d7d4cf442e3 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -48,6 +48,7 @@
48#include <rdma/ib_verbs.h> 48#include <rdma/ib_verbs.h>
49#include <rdma/ib_cache.h> 49#include <rdma/ib_cache.h>
50#include <rdma/ib_addr.h> 50#include <rdma/ib_addr.h>
51#include <rdma/rw.h>
51 52
52#include "core_priv.h" 53#include "core_priv.h"
53 54
@@ -723,59 +724,89 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
723} 724}
724EXPORT_SYMBOL(ib_open_qp); 725EXPORT_SYMBOL(ib_open_qp);
725 726
727static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp,
728 struct ib_qp_init_attr *qp_init_attr)
729{
730 struct ib_qp *real_qp = qp;
731
732 qp->event_handler = __ib_shared_qp_event_handler;
733 qp->qp_context = qp;
734 qp->pd = NULL;
735 qp->send_cq = qp->recv_cq = NULL;
736 qp->srq = NULL;
737 qp->xrcd = qp_init_attr->xrcd;
738 atomic_inc(&qp_init_attr->xrcd->usecnt);
739 INIT_LIST_HEAD(&qp->open_list);
740
741 qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
742 qp_init_attr->qp_context);
743 if (!IS_ERR(qp))
744 __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
745 else
746 real_qp->device->destroy_qp(real_qp);
747 return qp;
748}
749
726struct ib_qp *ib_create_qp(struct ib_pd *pd, 750struct ib_qp *ib_create_qp(struct ib_pd *pd,
727 struct ib_qp_init_attr *qp_init_attr) 751 struct ib_qp_init_attr *qp_init_attr)
728{ 752{
729 struct ib_qp *qp, *real_qp; 753 struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device;
730 struct ib_device *device; 754 struct ib_qp *qp;
755 int ret;
756
757 /*
758 * If the callers is using the RDMA API calculate the resources
759 * needed for the RDMA READ/WRITE operations.
760 *
761 * Note that these callers need to pass in a port number.
762 */
763 if (qp_init_attr->cap.max_rdma_ctxs)
764 rdma_rw_init_qp(device, qp_init_attr);
731 765
732 device = pd ? pd->device : qp_init_attr->xrcd->device;
733 qp = device->create_qp(pd, qp_init_attr, NULL); 766 qp = device->create_qp(pd, qp_init_attr, NULL);
767 if (IS_ERR(qp))
768 return qp;
769
770 qp->device = device;
771 qp->real_qp = qp;
772 qp->uobject = NULL;
773 qp->qp_type = qp_init_attr->qp_type;
774
775 atomic_set(&qp->usecnt, 0);
776 qp->mrs_used = 0;
777 spin_lock_init(&qp->mr_lock);
778 INIT_LIST_HEAD(&qp->rdma_mrs);
779 INIT_LIST_HEAD(&qp->sig_mrs);
780
781 if (qp_init_attr->qp_type == IB_QPT_XRC_TGT)
782 return ib_create_xrc_qp(qp, qp_init_attr);
783
784 qp->event_handler = qp_init_attr->event_handler;
785 qp->qp_context = qp_init_attr->qp_context;
786 if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
787 qp->recv_cq = NULL;
788 qp->srq = NULL;
789 } else {
790 qp->recv_cq = qp_init_attr->recv_cq;
791 atomic_inc(&qp_init_attr->recv_cq->usecnt);
792 qp->srq = qp_init_attr->srq;
793 if (qp->srq)
794 atomic_inc(&qp_init_attr->srq->usecnt);
795 }
734 796
735 if (!IS_ERR(qp)) { 797 qp->pd = pd;
736 qp->device = device; 798 qp->send_cq = qp_init_attr->send_cq;
737 qp->real_qp = qp; 799 qp->xrcd = NULL;
738 qp->uobject = NULL;
739 qp->qp_type = qp_init_attr->qp_type;
740
741 atomic_set(&qp->usecnt, 0);
742 if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
743 qp->event_handler = __ib_shared_qp_event_handler;
744 qp->qp_context = qp;
745 qp->pd = NULL;
746 qp->send_cq = qp->recv_cq = NULL;
747 qp->srq = NULL;
748 qp->xrcd = qp_init_attr->xrcd;
749 atomic_inc(&qp_init_attr->xrcd->usecnt);
750 INIT_LIST_HEAD(&qp->open_list);
751
752 real_qp = qp;
753 qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
754 qp_init_attr->qp_context);
755 if (!IS_ERR(qp))
756 __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
757 else
758 real_qp->device->destroy_qp(real_qp);
759 } else {
760 qp->event_handler = qp_init_attr->event_handler;
761 qp->qp_context = qp_init_attr->qp_context;
762 if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
763 qp->recv_cq = NULL;
764 qp->srq = NULL;
765 } else {
766 qp->recv_cq = qp_init_attr->recv_cq;
767 atomic_inc(&qp_init_attr->recv_cq->usecnt);
768 qp->srq = qp_init_attr->srq;
769 if (qp->srq)
770 atomic_inc(&qp_init_attr->srq->usecnt);
771 }
772 800
773 qp->pd = pd; 801 atomic_inc(&pd->usecnt);
774 qp->send_cq = qp_init_attr->send_cq; 802 atomic_inc(&qp_init_attr->send_cq->usecnt);
775 qp->xrcd = NULL;
776 803
777 atomic_inc(&pd->usecnt); 804 if (qp_init_attr->cap.max_rdma_ctxs) {
778 atomic_inc(&qp_init_attr->send_cq->usecnt); 805 ret = rdma_rw_init_mrs(qp, qp_init_attr);
806 if (ret) {
807 pr_err("failed to init MR pool ret= %d\n", ret);
808 ib_destroy_qp(qp);
809 qp = ERR_PTR(ret);
779 } 810 }
780 } 811 }
781 812
@@ -1250,6 +1281,8 @@ int ib_destroy_qp(struct ib_qp *qp)
1250 struct ib_srq *srq; 1281 struct ib_srq *srq;
1251 int ret; 1282 int ret;
1252 1283
1284 WARN_ON_ONCE(qp->mrs_used > 0);
1285
1253 if (atomic_read(&qp->usecnt)) 1286 if (atomic_read(&qp->usecnt))
1254 return -EBUSY; 1287 return -EBUSY;
1255 1288
@@ -1261,6 +1294,9 @@ int ib_destroy_qp(struct ib_qp *qp)
1261 rcq = qp->recv_cq; 1294 rcq = qp->recv_cq;
1262 srq = qp->srq; 1295 srq = qp->srq;
1263 1296
1297 if (!qp->uobject)
1298 rdma_rw_cleanup_mrs(qp);
1299
1264 ret = qp->device->destroy_qp(qp); 1300 ret = qp->device->destroy_qp(qp);
1265 if (!ret) { 1301 if (!ret) {
1266 if (pd) 1302 if (pd)
@@ -1343,6 +1379,7 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
1343 mr->pd = pd; 1379 mr->pd = pd;
1344 mr->uobject = NULL; 1380 mr->uobject = NULL;
1345 atomic_inc(&pd->usecnt); 1381 atomic_inc(&pd->usecnt);
1382 mr->need_inval = false;
1346 } 1383 }
1347 1384
1348 return mr; 1385 return mr;
@@ -1389,6 +1426,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
1389 mr->pd = pd; 1426 mr->pd = pd;
1390 mr->uobject = NULL; 1427 mr->uobject = NULL;
1391 atomic_inc(&pd->usecnt); 1428 atomic_inc(&pd->usecnt);
1429 mr->need_inval = false;
1392 } 1430 }
1393 1431
1394 return mr; 1432 return mr;
@@ -1597,6 +1635,7 @@ EXPORT_SYMBOL(ib_set_vf_guid);
1597 * @mr: memory region 1635 * @mr: memory region
1598 * @sg: dma mapped scatterlist 1636 * @sg: dma mapped scatterlist
1599 * @sg_nents: number of entries in sg 1637 * @sg_nents: number of entries in sg
1638 * @sg_offset: offset in bytes into sg
1600 * @page_size: page vector desired page size 1639 * @page_size: page vector desired page size
1601 * 1640 *
1602 * Constraints: 1641 * Constraints:
@@ -1615,17 +1654,15 @@ EXPORT_SYMBOL(ib_set_vf_guid);
1615 * After this completes successfully, the memory region 1654 * After this completes successfully, the memory region
1616 * is ready for registration. 1655 * is ready for registration.
1617 */ 1656 */
1618int ib_map_mr_sg(struct ib_mr *mr, 1657int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
1619 struct scatterlist *sg, 1658 unsigned int *sg_offset, unsigned int page_size)
1620 int sg_nents,
1621 unsigned int page_size)
1622{ 1659{
1623 if (unlikely(!mr->device->map_mr_sg)) 1660 if (unlikely(!mr->device->map_mr_sg))
1624 return -ENOSYS; 1661 return -ENOSYS;
1625 1662
1626 mr->page_size = page_size; 1663 mr->page_size = page_size;
1627 1664
1628 return mr->device->map_mr_sg(mr, sg, sg_nents); 1665 return mr->device->map_mr_sg(mr, sg, sg_nents, sg_offset);
1629} 1666}
1630EXPORT_SYMBOL(ib_map_mr_sg); 1667EXPORT_SYMBOL(ib_map_mr_sg);
1631 1668
@@ -1635,6 +1672,10 @@ EXPORT_SYMBOL(ib_map_mr_sg);
1635 * @mr: memory region 1672 * @mr: memory region
1636 * @sgl: dma mapped scatterlist 1673 * @sgl: dma mapped scatterlist
1637 * @sg_nents: number of entries in sg 1674 * @sg_nents: number of entries in sg
1675 * @sg_offset_p: IN: start offset in bytes into sg
1676 * OUT: offset in bytes for element n of the sg of the first
1677 * byte that has not been processed where n is the return
1678 * value of this function.
1638 * @set_page: driver page assignment function pointer 1679 * @set_page: driver page assignment function pointer
1639 * 1680 *
1640 * Core service helper for drivers to convert the largest 1681 * Core service helper for drivers to convert the largest
@@ -1645,23 +1686,26 @@ EXPORT_SYMBOL(ib_map_mr_sg);
1645 * Returns the number of sg elements that were assigned to 1686 * Returns the number of sg elements that were assigned to
1646 * a page vector. 1687 * a page vector.
1647 */ 1688 */
1648int ib_sg_to_pages(struct ib_mr *mr, 1689int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents,
1649 struct scatterlist *sgl, 1690 unsigned int *sg_offset_p, int (*set_page)(struct ib_mr *, u64))
1650 int sg_nents,
1651 int (*set_page)(struct ib_mr *, u64))
1652{ 1691{
1653 struct scatterlist *sg; 1692 struct scatterlist *sg;
1654 u64 last_end_dma_addr = 0; 1693 u64 last_end_dma_addr = 0;
1694 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1655 unsigned int last_page_off = 0; 1695 unsigned int last_page_off = 0;
1656 u64 page_mask = ~((u64)mr->page_size - 1); 1696 u64 page_mask = ~((u64)mr->page_size - 1);
1657 int i, ret; 1697 int i, ret;
1658 1698
1659 mr->iova = sg_dma_address(&sgl[0]); 1699 if (unlikely(sg_nents <= 0 || sg_offset > sg_dma_len(&sgl[0])))
1700 return -EINVAL;
1701
1702 mr->iova = sg_dma_address(&sgl[0]) + sg_offset;
1660 mr->length = 0; 1703 mr->length = 0;
1661 1704
1662 for_each_sg(sgl, sg, sg_nents, i) { 1705 for_each_sg(sgl, sg, sg_nents, i) {
1663 u64 dma_addr = sg_dma_address(sg); 1706 u64 dma_addr = sg_dma_address(sg) + sg_offset;
1664 unsigned int dma_len = sg_dma_len(sg); 1707 u64 prev_addr = dma_addr;
1708 unsigned int dma_len = sg_dma_len(sg) - sg_offset;
1665 u64 end_dma_addr = dma_addr + dma_len; 1709 u64 end_dma_addr = dma_addr + dma_len;
1666 u64 page_addr = dma_addr & page_mask; 1710 u64 page_addr = dma_addr & page_mask;
1667 1711
@@ -1685,8 +1729,14 @@ int ib_sg_to_pages(struct ib_mr *mr,
1685 1729
1686 do { 1730 do {
1687 ret = set_page(mr, page_addr); 1731 ret = set_page(mr, page_addr);
1688 if (unlikely(ret < 0)) 1732 if (unlikely(ret < 0)) {
1689 return i ? : ret; 1733 sg_offset = prev_addr - sg_dma_address(sg);
1734 mr->length += prev_addr - dma_addr;
1735 if (sg_offset_p)
1736 *sg_offset_p = sg_offset;
1737 return i || sg_offset ? i : ret;
1738 }
1739 prev_addr = page_addr;
1690next_page: 1740next_page:
1691 page_addr += mr->page_size; 1741 page_addr += mr->page_size;
1692 } while (page_addr < end_dma_addr); 1742 } while (page_addr < end_dma_addr);
@@ -1694,8 +1744,12 @@ next_page:
1694 mr->length += dma_len; 1744 mr->length += dma_len;
1695 last_end_dma_addr = end_dma_addr; 1745 last_end_dma_addr = end_dma_addr;
1696 last_page_off = end_dma_addr & ~page_mask; 1746 last_page_off = end_dma_addr & ~page_mask;
1747
1748 sg_offset = 0;
1697 } 1749 }
1698 1750
1751 if (sg_offset_p)
1752 *sg_offset_p = 0;
1699 return i; 1753 return i;
1700} 1754}
1701EXPORT_SYMBOL(ib_sg_to_pages); 1755EXPORT_SYMBOL(ib_sg_to_pages);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 3234a8be16f6..47cb927a0dd6 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -783,15 +783,14 @@ static int iwch_set_page(struct ib_mr *ibmr, u64 addr)
783 return 0; 783 return 0;
784} 784}
785 785
786static int iwch_map_mr_sg(struct ib_mr *ibmr, 786static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
787 struct scatterlist *sg, 787 int sg_nents, unsigned int *sg_offset)
788 int sg_nents)
789{ 788{
790 struct iwch_mr *mhp = to_iwch_mr(ibmr); 789 struct iwch_mr *mhp = to_iwch_mr(ibmr);
791 790
792 mhp->npages = 0; 791 mhp->npages = 0;
793 792
794 return ib_sg_to_pages(ibmr, sg, sg_nents, iwch_set_page); 793 return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page);
795} 794}
796 795
797static int iwch_destroy_qp(struct ib_qp *ib_qp) 796static int iwch_destroy_qp(struct ib_qp *ib_qp)
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 651711370d55..a3a67216bce6 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -119,7 +119,7 @@ MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
119static int mpa_rev = 2; 119static int mpa_rev = 2;
120module_param(mpa_rev, int, 0644); 120module_param(mpa_rev, int, 0644);
121MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " 121MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
122 "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft" 122 "1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft"
123 " compliant (default=2)"); 123 " compliant (default=2)");
124 124
125static int markers_enabled; 125static int markers_enabled;
@@ -145,19 +145,35 @@ static struct sk_buff_head rxq;
145static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); 145static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
146static void ep_timeout(unsigned long arg); 146static void ep_timeout(unsigned long arg);
147static void connect_reply_upcall(struct c4iw_ep *ep, int status); 147static void connect_reply_upcall(struct c4iw_ep *ep, int status);
148static int sched(struct c4iw_dev *dev, struct sk_buff *skb);
148 149
149static LIST_HEAD(timeout_list); 150static LIST_HEAD(timeout_list);
150static spinlock_t timeout_lock; 151static spinlock_t timeout_lock;
151 152
153static void deref_cm_id(struct c4iw_ep_common *epc)
154{
155 epc->cm_id->rem_ref(epc->cm_id);
156 epc->cm_id = NULL;
157 set_bit(CM_ID_DEREFED, &epc->history);
158}
159
160static void ref_cm_id(struct c4iw_ep_common *epc)
161{
162 set_bit(CM_ID_REFED, &epc->history);
163 epc->cm_id->add_ref(epc->cm_id);
164}
165
152static void deref_qp(struct c4iw_ep *ep) 166static void deref_qp(struct c4iw_ep *ep)
153{ 167{
154 c4iw_qp_rem_ref(&ep->com.qp->ibqp); 168 c4iw_qp_rem_ref(&ep->com.qp->ibqp);
155 clear_bit(QP_REFERENCED, &ep->com.flags); 169 clear_bit(QP_REFERENCED, &ep->com.flags);
170 set_bit(QP_DEREFED, &ep->com.history);
156} 171}
157 172
158static void ref_qp(struct c4iw_ep *ep) 173static void ref_qp(struct c4iw_ep *ep)
159{ 174{
160 set_bit(QP_REFERENCED, &ep->com.flags); 175 set_bit(QP_REFERENCED, &ep->com.flags);
176 set_bit(QP_REFED, &ep->com.history);
161 c4iw_qp_add_ref(&ep->com.qp->ibqp); 177 c4iw_qp_add_ref(&ep->com.qp->ibqp);
162} 178}
163 179
@@ -201,6 +217,8 @@ static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
201 error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e); 217 error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
202 if (error < 0) 218 if (error < 0)
203 kfree_skb(skb); 219 kfree_skb(skb);
220 else if (error == NET_XMIT_DROP)
221 return -ENOMEM;
204 return error < 0 ? error : 0; 222 return error < 0 ? error : 0;
205} 223}
206 224
@@ -290,12 +308,63 @@ static void *alloc_ep(int size, gfp_t gfp)
290 return epc; 308 return epc;
291} 309}
292 310
311static void remove_ep_tid(struct c4iw_ep *ep)
312{
313 unsigned long flags;
314
315 spin_lock_irqsave(&ep->com.dev->lock, flags);
316 _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0);
317 spin_unlock_irqrestore(&ep->com.dev->lock, flags);
318}
319
320static void insert_ep_tid(struct c4iw_ep *ep)
321{
322 unsigned long flags;
323
324 spin_lock_irqsave(&ep->com.dev->lock, flags);
325 _insert_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep, ep->hwtid, 0);
326 spin_unlock_irqrestore(&ep->com.dev->lock, flags);
327}
328
329/*
330 * Atomically lookup the ep ptr given the tid and grab a reference on the ep.
331 */
332static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid)
333{
334 struct c4iw_ep *ep;
335 unsigned long flags;
336
337 spin_lock_irqsave(&dev->lock, flags);
338 ep = idr_find(&dev->hwtid_idr, tid);
339 if (ep)
340 c4iw_get_ep(&ep->com);
341 spin_unlock_irqrestore(&dev->lock, flags);
342 return ep;
343}
344
345/*
346 * Atomically lookup the ep ptr given the stid and grab a reference on the ep.
347 */
348static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev,
349 unsigned int stid)
350{
351 struct c4iw_listen_ep *ep;
352 unsigned long flags;
353
354 spin_lock_irqsave(&dev->lock, flags);
355 ep = idr_find(&dev->stid_idr, stid);
356 if (ep)
357 c4iw_get_ep(&ep->com);
358 spin_unlock_irqrestore(&dev->lock, flags);
359 return ep;
360}
361
293void _c4iw_free_ep(struct kref *kref) 362void _c4iw_free_ep(struct kref *kref)
294{ 363{
295 struct c4iw_ep *ep; 364 struct c4iw_ep *ep;
296 365
297 ep = container_of(kref, struct c4iw_ep, com.kref); 366 ep = container_of(kref, struct c4iw_ep, com.kref);
298 PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]); 367 PDBG("%s ep %p state %s\n", __func__, ep, states[ep->com.state]);
299 if (test_bit(QP_REFERENCED, &ep->com.flags)) 368 if (test_bit(QP_REFERENCED, &ep->com.flags))
300 deref_qp(ep); 369 deref_qp(ep);
301 if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { 370 if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
@@ -309,10 +378,11 @@ void _c4iw_free_ep(struct kref *kref)
309 (const u32 *)&sin6->sin6_addr.s6_addr, 378 (const u32 *)&sin6->sin6_addr.s6_addr,
310 1); 379 1);
311 } 380 }
312 remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid);
313 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); 381 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
314 dst_release(ep->dst); 382 dst_release(ep->dst);
315 cxgb4_l2t_release(ep->l2t); 383 cxgb4_l2t_release(ep->l2t);
384 if (ep->mpa_skb)
385 kfree_skb(ep->mpa_skb);
316 } 386 }
317 kfree(ep); 387 kfree(ep);
318} 388}
@@ -320,6 +390,15 @@ void _c4iw_free_ep(struct kref *kref)
320static void release_ep_resources(struct c4iw_ep *ep) 390static void release_ep_resources(struct c4iw_ep *ep)
321{ 391{
322 set_bit(RELEASE_RESOURCES, &ep->com.flags); 392 set_bit(RELEASE_RESOURCES, &ep->com.flags);
393
394 /*
395 * If we have a hwtid, then remove it from the idr table
396 * so lookups will no longer find this endpoint. Otherwise
397 * we have a race where one thread finds the ep ptr just
398 * before the other thread is freeing the ep memory.
399 */
400 if (ep->hwtid != -1)
401 remove_ep_tid(ep);
323 c4iw_put_ep(&ep->com); 402 c4iw_put_ep(&ep->com);
324} 403}
325 404
@@ -432,10 +511,74 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
432 511
433static void arp_failure_discard(void *handle, struct sk_buff *skb) 512static void arp_failure_discard(void *handle, struct sk_buff *skb)
434{ 513{
435 PDBG("%s c4iw_dev %p\n", __func__, handle); 514 pr_err(MOD "ARP failure\n");
436 kfree_skb(skb); 515 kfree_skb(skb);
437} 516}
438 517
518static void mpa_start_arp_failure(void *handle, struct sk_buff *skb)
519{
520 pr_err("ARP failure during MPA Negotiation - Closing Connection\n");
521}
522
523enum {
524 NUM_FAKE_CPLS = 2,
525 FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0,
526 FAKE_CPL_PASS_PUT_EP_SAFE = NUM_CPL_CMDS + 1,
527};
528
529static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
530{
531 struct c4iw_ep *ep;
532
533 ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
534 release_ep_resources(ep);
535 return 0;
536}
537
538static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
539{
540 struct c4iw_ep *ep;
541
542 ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
543 c4iw_put_ep(&ep->parent_ep->com);
544 release_ep_resources(ep);
545 return 0;
546}
547
548/*
549 * Fake up a special CPL opcode and call sched() so process_work() will call
550 * _put_ep_safe() in a safe context to free the ep resources. This is needed
551 * because ARP error handlers are called in an ATOMIC context, and
552 * _c4iw_free_ep() needs to block.
553 */
554static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb,
555 int cpl)
556{
557 struct cpl_act_establish *rpl = cplhdr(skb);
558
559 /* Set our special ARP_FAILURE opcode */
560 rpl->ot.opcode = cpl;
561
562 /*
563 * Save ep in the skb->cb area, after where sched() will save the dev
564 * ptr.
565 */
566 *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep;
567 sched(ep->com.dev, skb);
568}
569
570/* Handle an ARP failure for an accept */
571static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb)
572{
573 struct c4iw_ep *ep = handle;
574
575 pr_err(MOD "ARP failure during accept - tid %u -dropping connection\n",
576 ep->hwtid);
577
578 __state_set(&ep->com, DEAD);
579 queue_arp_failure_cpl(ep, skb, FAKE_CPL_PASS_PUT_EP_SAFE);
580}
581
439/* 582/*
440 * Handle an ARP failure for an active open. 583 * Handle an ARP failure for an active open.
441 */ 584 */
@@ -444,9 +587,8 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
444 struct c4iw_ep *ep = handle; 587 struct c4iw_ep *ep = handle;
445 588
446 printk(KERN_ERR MOD "ARP failure during connect\n"); 589 printk(KERN_ERR MOD "ARP failure during connect\n");
447 kfree_skb(skb);
448 connect_reply_upcall(ep, -EHOSTUNREACH); 590 connect_reply_upcall(ep, -EHOSTUNREACH);
449 state_set(&ep->com, DEAD); 591 __state_set(&ep->com, DEAD);
450 if (ep->com.remote_addr.ss_family == AF_INET6) { 592 if (ep->com.remote_addr.ss_family == AF_INET6) {
451 struct sockaddr_in6 *sin6 = 593 struct sockaddr_in6 *sin6 =
452 (struct sockaddr_in6 *)&ep->com.local_addr; 594 (struct sockaddr_in6 *)&ep->com.local_addr;
@@ -455,9 +597,7 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
455 } 597 }
456 remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); 598 remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
457 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); 599 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
458 dst_release(ep->dst); 600 queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
459 cxgb4_l2t_release(ep->l2t);
460 c4iw_put_ep(&ep->com);
461} 601}
462 602
463/* 603/*
@@ -466,15 +606,21 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
466 */ 606 */
467static void abort_arp_failure(void *handle, struct sk_buff *skb) 607static void abort_arp_failure(void *handle, struct sk_buff *skb)
468{ 608{
469 struct c4iw_rdev *rdev = handle; 609 int ret;
610 struct c4iw_ep *ep = handle;
611 struct c4iw_rdev *rdev = &ep->com.dev->rdev;
470 struct cpl_abort_req *req = cplhdr(skb); 612 struct cpl_abort_req *req = cplhdr(skb);
471 613
472 PDBG("%s rdev %p\n", __func__, rdev); 614 PDBG("%s rdev %p\n", __func__, rdev);
473 req->cmd = CPL_ABORT_NO_RST; 615 req->cmd = CPL_ABORT_NO_RST;
474 c4iw_ofld_send(rdev, skb); 616 ret = c4iw_ofld_send(rdev, skb);
617 if (ret) {
618 __state_set(&ep->com, DEAD);
619 queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
620 }
475} 621}
476 622
477static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) 623static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
478{ 624{
479 unsigned int flowclen = 80; 625 unsigned int flowclen = 80;
480 struct fw_flowc_wr *flowc; 626 struct fw_flowc_wr *flowc;
@@ -530,7 +676,7 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
530 } 676 }
531 677
532 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 678 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
533 c4iw_ofld_send(&ep->com.dev->rdev, skb); 679 return c4iw_ofld_send(&ep->com.dev->rdev, skb);
534} 680}
535 681
536static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp) 682static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp)
@@ -568,7 +714,7 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
568 return -ENOMEM; 714 return -ENOMEM;
569 } 715 }
570 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 716 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
571 t4_set_arp_err_handler(skb, &ep->com.dev->rdev, abort_arp_failure); 717 t4_set_arp_err_handler(skb, ep, abort_arp_failure);
572 req = (struct cpl_abort_req *) skb_put(skb, wrlen); 718 req = (struct cpl_abort_req *) skb_put(skb, wrlen);
573 memset(req, 0, wrlen); 719 memset(req, 0, wrlen);
574 INIT_TP_WR(req, ep->hwtid); 720 INIT_TP_WR(req, ep->hwtid);
@@ -807,10 +953,10 @@ clip_release:
807 return ret; 953 return ret;
808} 954}
809 955
810static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, 956static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
811 u8 mpa_rev_to_use) 957 u8 mpa_rev_to_use)
812{ 958{
813 int mpalen, wrlen; 959 int mpalen, wrlen, ret;
814 struct fw_ofld_tx_data_wr *req; 960 struct fw_ofld_tx_data_wr *req;
815 struct mpa_message *mpa; 961 struct mpa_message *mpa;
816 struct mpa_v2_conn_params mpa_v2_params; 962 struct mpa_v2_conn_params mpa_v2_params;
@@ -826,7 +972,7 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
826 skb = get_skb(skb, wrlen, GFP_KERNEL); 972 skb = get_skb(skb, wrlen, GFP_KERNEL);
827 if (!skb) { 973 if (!skb) {
828 connect_reply_upcall(ep, -ENOMEM); 974 connect_reply_upcall(ep, -ENOMEM);
829 return; 975 return -ENOMEM;
830 } 976 }
831 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 977 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
832 978
@@ -894,12 +1040,14 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
894 t4_set_arp_err_handler(skb, NULL, arp_failure_discard); 1040 t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
895 BUG_ON(ep->mpa_skb); 1041 BUG_ON(ep->mpa_skb);
896 ep->mpa_skb = skb; 1042 ep->mpa_skb = skb;
897 c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 1043 ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1044 if (ret)
1045 return ret;
898 start_ep_timer(ep); 1046 start_ep_timer(ep);
899 __state_set(&ep->com, MPA_REQ_SENT); 1047 __state_set(&ep->com, MPA_REQ_SENT);
900 ep->mpa_attr.initiator = 1; 1048 ep->mpa_attr.initiator = 1;
901 ep->snd_seq += mpalen; 1049 ep->snd_seq += mpalen;
902 return; 1050 return ret;
903} 1051}
904 1052
905static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) 1053static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
@@ -975,7 +1123,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
975 */ 1123 */
976 skb_get(skb); 1124 skb_get(skb);
977 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 1125 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
978 t4_set_arp_err_handler(skb, NULL, arp_failure_discard); 1126 t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
979 BUG_ON(ep->mpa_skb); 1127 BUG_ON(ep->mpa_skb);
980 ep->mpa_skb = skb; 1128 ep->mpa_skb = skb;
981 ep->snd_seq += mpalen; 1129 ep->snd_seq += mpalen;
@@ -1060,7 +1208,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
1060 * Function fw4_ack() will deref it. 1208 * Function fw4_ack() will deref it.
1061 */ 1209 */
1062 skb_get(skb); 1210 skb_get(skb);
1063 t4_set_arp_err_handler(skb, NULL, arp_failure_discard); 1211 t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
1064 ep->mpa_skb = skb; 1212 ep->mpa_skb = skb;
1065 __state_set(&ep->com, MPA_REP_SENT); 1213 __state_set(&ep->com, MPA_REP_SENT);
1066 ep->snd_seq += mpalen; 1214 ep->snd_seq += mpalen;
@@ -1074,6 +1222,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1074 unsigned int tid = GET_TID(req); 1222 unsigned int tid = GET_TID(req);
1075 unsigned int atid = TID_TID_G(ntohl(req->tos_atid)); 1223 unsigned int atid = TID_TID_G(ntohl(req->tos_atid));
1076 struct tid_info *t = dev->rdev.lldi.tids; 1224 struct tid_info *t = dev->rdev.lldi.tids;
1225 int ret;
1077 1226
1078 ep = lookup_atid(t, atid); 1227 ep = lookup_atid(t, atid);
1079 1228
@@ -1086,7 +1235,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1086 /* setup the hwtid for this connection */ 1235 /* setup the hwtid for this connection */
1087 ep->hwtid = tid; 1236 ep->hwtid = tid;
1088 cxgb4_insert_tid(t, ep, tid); 1237 cxgb4_insert_tid(t, ep, tid);
1089 insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); 1238 insert_ep_tid(ep);
1090 1239
1091 ep->snd_seq = be32_to_cpu(req->snd_isn); 1240 ep->snd_seq = be32_to_cpu(req->snd_isn);
1092 ep->rcv_seq = be32_to_cpu(req->rcv_isn); 1241 ep->rcv_seq = be32_to_cpu(req->rcv_isn);
@@ -1099,13 +1248,22 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1099 set_bit(ACT_ESTAB, &ep->com.history); 1248 set_bit(ACT_ESTAB, &ep->com.history);
1100 1249
1101 /* start MPA negotiation */ 1250 /* start MPA negotiation */
1102 send_flowc(ep, NULL); 1251 ret = send_flowc(ep, NULL);
1252 if (ret)
1253 goto err;
1103 if (ep->retry_with_mpa_v1) 1254 if (ep->retry_with_mpa_v1)
1104 send_mpa_req(ep, skb, 1); 1255 ret = send_mpa_req(ep, skb, 1);
1105 else 1256 else
1106 send_mpa_req(ep, skb, mpa_rev); 1257 ret = send_mpa_req(ep, skb, mpa_rev);
1258 if (ret)
1259 goto err;
1107 mutex_unlock(&ep->com.mutex); 1260 mutex_unlock(&ep->com.mutex);
1108 return 0; 1261 return 0;
1262err:
1263 mutex_unlock(&ep->com.mutex);
1264 connect_reply_upcall(ep, -ENOMEM);
1265 c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
1266 return 0;
1109} 1267}
1110 1268
1111static void close_complete_upcall(struct c4iw_ep *ep, int status) 1269static void close_complete_upcall(struct c4iw_ep *ep, int status)
@@ -1120,20 +1278,11 @@ static void close_complete_upcall(struct c4iw_ep *ep, int status)
1120 PDBG("close complete delivered ep %p cm_id %p tid %u\n", 1278 PDBG("close complete delivered ep %p cm_id %p tid %u\n",
1121 ep, ep->com.cm_id, ep->hwtid); 1279 ep, ep->com.cm_id, ep->hwtid);
1122 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1280 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1123 ep->com.cm_id->rem_ref(ep->com.cm_id); 1281 deref_cm_id(&ep->com);
1124 ep->com.cm_id = NULL;
1125 set_bit(CLOSE_UPCALL, &ep->com.history); 1282 set_bit(CLOSE_UPCALL, &ep->com.history);
1126 } 1283 }
1127} 1284}
1128 1285
1129static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
1130{
1131 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1132 __state_set(&ep->com, ABORTING);
1133 set_bit(ABORT_CONN, &ep->com.history);
1134 return send_abort(ep, skb, gfp);
1135}
1136
1137static void peer_close_upcall(struct c4iw_ep *ep) 1286static void peer_close_upcall(struct c4iw_ep *ep)
1138{ 1287{
1139 struct iw_cm_event event; 1288 struct iw_cm_event event;
@@ -1161,8 +1310,7 @@ static void peer_abort_upcall(struct c4iw_ep *ep)
1161 PDBG("abort delivered ep %p cm_id %p tid %u\n", ep, 1310 PDBG("abort delivered ep %p cm_id %p tid %u\n", ep,
1162 ep->com.cm_id, ep->hwtid); 1311 ep->com.cm_id, ep->hwtid);
1163 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1312 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1164 ep->com.cm_id->rem_ref(ep->com.cm_id); 1313 deref_cm_id(&ep->com);
1165 ep->com.cm_id = NULL;
1166 set_bit(ABORT_UPCALL, &ep->com.history); 1314 set_bit(ABORT_UPCALL, &ep->com.history);
1167 } 1315 }
1168} 1316}
@@ -1205,10 +1353,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1205 set_bit(CONN_RPL_UPCALL, &ep->com.history); 1353 set_bit(CONN_RPL_UPCALL, &ep->com.history);
1206 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1354 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1207 1355
1208 if (status < 0) { 1356 if (status < 0)
1209 ep->com.cm_id->rem_ref(ep->com.cm_id); 1357 deref_cm_id(&ep->com);
1210 ep->com.cm_id = NULL;
1211 }
1212} 1358}
1213 1359
1214static int connect_request_upcall(struct c4iw_ep *ep) 1360static int connect_request_upcall(struct c4iw_ep *ep)
@@ -1301,6 +1447,18 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
1301 1447
1302#define RELAXED_IRD_NEGOTIATION 1 1448#define RELAXED_IRD_NEGOTIATION 1
1303 1449
1450/*
1451 * process_mpa_reply - process streaming mode MPA reply
1452 *
1453 * Returns:
1454 *
1455 * 0 upon success indicating a connect request was delivered to the ULP
1456 * or the mpa request is incomplete but valid so far.
1457 *
1458 * 1 if a failure requires the caller to close the connection.
1459 *
1460 * 2 if a failure requires the caller to abort the connection.
1461 */
1304static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) 1462static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1305{ 1463{
1306 struct mpa_message *mpa; 1464 struct mpa_message *mpa;
@@ -1316,20 +1474,12 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1316 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 1474 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1317 1475
1318 /* 1476 /*
1319 * Stop mpa timer. If it expired, then
1320 * we ignore the MPA reply. process_timeout()
1321 * will abort the connection.
1322 */
1323 if (stop_ep_timer(ep))
1324 return 0;
1325
1326 /*
1327 * If we get more than the supported amount of private data 1477 * If we get more than the supported amount of private data
1328 * then we must fail this connection. 1478 * then we must fail this connection.
1329 */ 1479 */
1330 if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { 1480 if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
1331 err = -EINVAL; 1481 err = -EINVAL;
1332 goto err; 1482 goto err_stop_timer;
1333 } 1483 }
1334 1484
1335 /* 1485 /*
@@ -1351,11 +1501,11 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1351 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," 1501 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1352 " Received = %d\n", __func__, mpa_rev, mpa->revision); 1502 " Received = %d\n", __func__, mpa_rev, mpa->revision);
1353 err = -EPROTO; 1503 err = -EPROTO;
1354 goto err; 1504 goto err_stop_timer;
1355 } 1505 }
1356 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { 1506 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1357 err = -EPROTO; 1507 err = -EPROTO;
1358 goto err; 1508 goto err_stop_timer;
1359 } 1509 }
1360 1510
1361 plen = ntohs(mpa->private_data_size); 1511 plen = ntohs(mpa->private_data_size);
@@ -1365,7 +1515,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1365 */ 1515 */
1366 if (plen > MPA_MAX_PRIVATE_DATA) { 1516 if (plen > MPA_MAX_PRIVATE_DATA) {
1367 err = -EPROTO; 1517 err = -EPROTO;
1368 goto err; 1518 goto err_stop_timer;
1369 } 1519 }
1370 1520
1371 /* 1521 /*
@@ -1373,7 +1523,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1373 */ 1523 */
1374 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 1524 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1375 err = -EPROTO; 1525 err = -EPROTO;
1376 goto err; 1526 goto err_stop_timer;
1377 } 1527 }
1378 1528
1379 ep->plen = (u8) plen; 1529 ep->plen = (u8) plen;
@@ -1387,10 +1537,18 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1387 1537
1388 if (mpa->flags & MPA_REJECT) { 1538 if (mpa->flags & MPA_REJECT) {
1389 err = -ECONNREFUSED; 1539 err = -ECONNREFUSED;
1390 goto err; 1540 goto err_stop_timer;
1391 } 1541 }
1392 1542
1393 /* 1543 /*
1544 * Stop mpa timer. If it expired, then
1545 * we ignore the MPA reply. process_timeout()
1546 * will abort the connection.
1547 */
1548 if (stop_ep_timer(ep))
1549 return 0;
1550
1551 /*
1394 * If we get here we have accumulated the entire mpa 1552 * If we get here we have accumulated the entire mpa
1395 * start reply message including private data. And 1553 * start reply message including private data. And
1396 * the MPA header is valid. 1554 * the MPA header is valid.
@@ -1529,15 +1687,28 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1529 goto out; 1687 goto out;
1530 } 1688 }
1531 goto out; 1689 goto out;
1690err_stop_timer:
1691 stop_ep_timer(ep);
1532err: 1692err:
1533 __state_set(&ep->com, ABORTING); 1693 disconnect = 2;
1534 send_abort(ep, skb, GFP_KERNEL);
1535out: 1694out:
1536 connect_reply_upcall(ep, err); 1695 connect_reply_upcall(ep, err);
1537 return disconnect; 1696 return disconnect;
1538} 1697}
1539 1698
1540static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) 1699/*
1700 * process_mpa_request - process streaming mode MPA request
1701 *
1702 * Returns:
1703 *
1704 * 0 upon success indicating a connect request was delivered to the ULP
1705 * or the mpa request is incomplete but valid so far.
1706 *
1707 * 1 if a failure requires the caller to close the connection.
1708 *
1709 * 2 if a failure requires the caller to abort the connection.
1710 */
1711static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1541{ 1712{
1542 struct mpa_message *mpa; 1713 struct mpa_message *mpa;
1543 struct mpa_v2_conn_params *mpa_v2_params; 1714 struct mpa_v2_conn_params *mpa_v2_params;
@@ -1549,11 +1720,8 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1549 * If we get more than the supported amount of private data 1720 * If we get more than the supported amount of private data
1550 * then we must fail this connection. 1721 * then we must fail this connection.
1551 */ 1722 */
1552 if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { 1723 if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt))
1553 (void)stop_ep_timer(ep); 1724 goto err_stop_timer;
1554 abort_connection(ep, skb, GFP_KERNEL);
1555 return;
1556 }
1557 1725
1558 PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); 1726 PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
1559 1727
@@ -1569,7 +1737,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1569 * We'll continue process when more data arrives. 1737 * We'll continue process when more data arrives.
1570 */ 1738 */
1571 if (ep->mpa_pkt_len < sizeof(*mpa)) 1739 if (ep->mpa_pkt_len < sizeof(*mpa))
1572 return; 1740 return 0;
1573 1741
1574 PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); 1742 PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
1575 mpa = (struct mpa_message *) ep->mpa_pkt; 1743 mpa = (struct mpa_message *) ep->mpa_pkt;
@@ -1580,43 +1748,32 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1580 if (mpa->revision > mpa_rev) { 1748 if (mpa->revision > mpa_rev) {
1581 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," 1749 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1582 " Received = %d\n", __func__, mpa_rev, mpa->revision); 1750 " Received = %d\n", __func__, mpa_rev, mpa->revision);
1583 (void)stop_ep_timer(ep); 1751 goto err_stop_timer;
1584 abort_connection(ep, skb, GFP_KERNEL);
1585 return;
1586 } 1752 }
1587 1753
1588 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { 1754 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
1589 (void)stop_ep_timer(ep); 1755 goto err_stop_timer;
1590 abort_connection(ep, skb, GFP_KERNEL);
1591 return;
1592 }
1593 1756
1594 plen = ntohs(mpa->private_data_size); 1757 plen = ntohs(mpa->private_data_size);
1595 1758
1596 /* 1759 /*
1597 * Fail if there's too much private data. 1760 * Fail if there's too much private data.
1598 */ 1761 */
1599 if (plen > MPA_MAX_PRIVATE_DATA) { 1762 if (plen > MPA_MAX_PRIVATE_DATA)
1600 (void)stop_ep_timer(ep); 1763 goto err_stop_timer;
1601 abort_connection(ep, skb, GFP_KERNEL);
1602 return;
1603 }
1604 1764
1605 /* 1765 /*
1606 * If plen does not account for pkt size 1766 * If plen does not account for pkt size
1607 */ 1767 */
1608 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 1768 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
1609 (void)stop_ep_timer(ep); 1769 goto err_stop_timer;
1610 abort_connection(ep, skb, GFP_KERNEL);
1611 return;
1612 }
1613 ep->plen = (u8) plen; 1770 ep->plen = (u8) plen;
1614 1771
1615 /* 1772 /*
1616 * If we don't have all the pdata yet, then bail. 1773 * If we don't have all the pdata yet, then bail.
1617 */ 1774 */
1618 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) 1775 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1619 return; 1776 return 0;
1620 1777
1621 /* 1778 /*
1622 * If we get here we have accumulated the entire mpa 1779 * If we get here we have accumulated the entire mpa
@@ -1665,26 +1822,26 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1665 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, 1822 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1666 ep->mpa_attr.p2p_type); 1823 ep->mpa_attr.p2p_type);
1667 1824
1668 /* 1825 __state_set(&ep->com, MPA_REQ_RCVD);
1669 * If the endpoint timer already expired, then we ignore 1826
1670 * the start request. process_timeout() will abort 1827 /* drive upcall */
1671 * the connection. 1828 mutex_lock_nested(&ep->parent_ep->com.mutex, SINGLE_DEPTH_NESTING);
1672 */ 1829 if (ep->parent_ep->com.state != DEAD) {
1673 if (!stop_ep_timer(ep)) { 1830 if (connect_request_upcall(ep))
1674 __state_set(&ep->com, MPA_REQ_RCVD); 1831 goto err_unlock_parent;
1675 1832 } else {
1676 /* drive upcall */ 1833 goto err_unlock_parent;
1677 mutex_lock_nested(&ep->parent_ep->com.mutex,
1678 SINGLE_DEPTH_NESTING);
1679 if (ep->parent_ep->com.state != DEAD) {
1680 if (connect_request_upcall(ep))
1681 abort_connection(ep, skb, GFP_KERNEL);
1682 } else {
1683 abort_connection(ep, skb, GFP_KERNEL);
1684 }
1685 mutex_unlock(&ep->parent_ep->com.mutex);
1686 } 1834 }
1687 return; 1835 mutex_unlock(&ep->parent_ep->com.mutex);
1836 return 0;
1837
1838err_unlock_parent:
1839 mutex_unlock(&ep->parent_ep->com.mutex);
1840 goto err_out;
1841err_stop_timer:
1842 (void)stop_ep_timer(ep);
1843err_out:
1844 return 2;
1688} 1845}
1689 1846
1690static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) 1847static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
@@ -1693,11 +1850,10 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1693 struct cpl_rx_data *hdr = cplhdr(skb); 1850 struct cpl_rx_data *hdr = cplhdr(skb);
1694 unsigned int dlen = ntohs(hdr->len); 1851 unsigned int dlen = ntohs(hdr->len);
1695 unsigned int tid = GET_TID(hdr); 1852 unsigned int tid = GET_TID(hdr);
1696 struct tid_info *t = dev->rdev.lldi.tids;
1697 __u8 status = hdr->status; 1853 __u8 status = hdr->status;
1698 int disconnect = 0; 1854 int disconnect = 0;
1699 1855
1700 ep = lookup_tid(t, tid); 1856 ep = get_ep_from_tid(dev, tid);
1701 if (!ep) 1857 if (!ep)
1702 return 0; 1858 return 0;
1703 PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); 1859 PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
@@ -1715,7 +1871,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1715 break; 1871 break;
1716 case MPA_REQ_WAIT: 1872 case MPA_REQ_WAIT:
1717 ep->rcv_seq += dlen; 1873 ep->rcv_seq += dlen;
1718 process_mpa_request(ep, skb); 1874 disconnect = process_mpa_request(ep, skb);
1719 break; 1875 break;
1720 case FPDU_MODE: { 1876 case FPDU_MODE: {
1721 struct c4iw_qp_attributes attrs; 1877 struct c4iw_qp_attributes attrs;
@@ -1736,7 +1892,8 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1736 } 1892 }
1737 mutex_unlock(&ep->com.mutex); 1893 mutex_unlock(&ep->com.mutex);
1738 if (disconnect) 1894 if (disconnect)
1739 c4iw_ep_disconnect(ep, 0, GFP_KERNEL); 1895 c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
1896 c4iw_put_ep(&ep->com);
1740 return 0; 1897 return 0;
1741} 1898}
1742 1899
@@ -1746,9 +1903,8 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1746 struct cpl_abort_rpl_rss *rpl = cplhdr(skb); 1903 struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
1747 int release = 0; 1904 int release = 0;
1748 unsigned int tid = GET_TID(rpl); 1905 unsigned int tid = GET_TID(rpl);
1749 struct tid_info *t = dev->rdev.lldi.tids;
1750 1906
1751 ep = lookup_tid(t, tid); 1907 ep = get_ep_from_tid(dev, tid);
1752 if (!ep) { 1908 if (!ep) {
1753 printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n"); 1909 printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n");
1754 return 0; 1910 return 0;
@@ -1770,10 +1926,11 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1770 1926
1771 if (release) 1927 if (release)
1772 release_ep_resources(ep); 1928 release_ep_resources(ep);
1929 c4iw_put_ep(&ep->com);
1773 return 0; 1930 return 0;
1774} 1931}
1775 1932
1776static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) 1933static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
1777{ 1934{
1778 struct sk_buff *skb; 1935 struct sk_buff *skb;
1779 struct fw_ofld_connection_wr *req; 1936 struct fw_ofld_connection_wr *req;
@@ -1843,7 +2000,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
1843 req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2); 2000 req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2);
1844 set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); 2001 set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
1845 set_bit(ACT_OFLD_CONN, &ep->com.history); 2002 set_bit(ACT_OFLD_CONN, &ep->com.history);
1846 c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 2003 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1847} 2004}
1848 2005
1849/* 2006/*
@@ -1986,6 +2143,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
1986 2143
1987 PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); 2144 PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
1988 init_timer(&ep->timer); 2145 init_timer(&ep->timer);
2146 c4iw_init_wr_wait(&ep->com.wr_wait);
1989 2147
1990 /* 2148 /*
1991 * Allocate an active TID to initiate a TCP connection. 2149 * Allocate an active TID to initiate a TCP connection.
@@ -2069,6 +2227,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2069 struct sockaddr_in *ra; 2227 struct sockaddr_in *ra;
2070 struct sockaddr_in6 *la6; 2228 struct sockaddr_in6 *la6;
2071 struct sockaddr_in6 *ra6; 2229 struct sockaddr_in6 *ra6;
2230 int ret = 0;
2072 2231
2073 ep = lookup_atid(t, atid); 2232 ep = lookup_atid(t, atid);
2074 la = (struct sockaddr_in *)&ep->com.local_addr; 2233 la = (struct sockaddr_in *)&ep->com.local_addr;
@@ -2104,9 +2263,10 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2104 mutex_unlock(&dev->rdev.stats.lock); 2263 mutex_unlock(&dev->rdev.stats.lock);
2105 if (ep->com.local_addr.ss_family == AF_INET && 2264 if (ep->com.local_addr.ss_family == AF_INET &&
2106 dev->rdev.lldi.enable_fw_ofld_conn) { 2265 dev->rdev.lldi.enable_fw_ofld_conn) {
2107 send_fw_act_open_req(ep, 2266 ret = send_fw_act_open_req(ep, TID_TID_G(AOPEN_ATID_G(
2108 TID_TID_G(AOPEN_ATID_G( 2267 ntohl(rpl->atid_status))));
2109 ntohl(rpl->atid_status)))); 2268 if (ret)
2269 goto fail;
2110 return 0; 2270 return 0;
2111 } 2271 }
2112 break; 2272 break;
@@ -2146,6 +2306,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2146 break; 2306 break;
2147 } 2307 }
2148 2308
2309fail:
2149 connect_reply_upcall(ep, status2errno(status)); 2310 connect_reply_upcall(ep, status2errno(status));
2150 state_set(&ep->com, DEAD); 2311 state_set(&ep->com, DEAD);
2151 2312
@@ -2170,9 +2331,8 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2170static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 2331static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2171{ 2332{
2172 struct cpl_pass_open_rpl *rpl = cplhdr(skb); 2333 struct cpl_pass_open_rpl *rpl = cplhdr(skb);
2173 struct tid_info *t = dev->rdev.lldi.tids;
2174 unsigned int stid = GET_TID(rpl); 2334 unsigned int stid = GET_TID(rpl);
2175 struct c4iw_listen_ep *ep = lookup_stid(t, stid); 2335 struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
2176 2336
2177 if (!ep) { 2337 if (!ep) {
2178 PDBG("%s stid %d lookup failure!\n", __func__, stid); 2338 PDBG("%s stid %d lookup failure!\n", __func__, stid);
@@ -2181,7 +2341,7 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2181 PDBG("%s ep %p status %d error %d\n", __func__, ep, 2341 PDBG("%s ep %p status %d error %d\n", __func__, ep,
2182 rpl->status, status2errno(rpl->status)); 2342 rpl->status, status2errno(rpl->status));
2183 c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); 2343 c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
2184 2344 c4iw_put_ep(&ep->com);
2185out: 2345out:
2186 return 0; 2346 return 0;
2187} 2347}
@@ -2189,17 +2349,17 @@ out:
2189static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 2349static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2190{ 2350{
2191 struct cpl_close_listsvr_rpl *rpl = cplhdr(skb); 2351 struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
2192 struct tid_info *t = dev->rdev.lldi.tids;
2193 unsigned int stid = GET_TID(rpl); 2352 unsigned int stid = GET_TID(rpl);
2194 struct c4iw_listen_ep *ep = lookup_stid(t, stid); 2353 struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
2195 2354
2196 PDBG("%s ep %p\n", __func__, ep); 2355 PDBG("%s ep %p\n", __func__, ep);
2197 c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); 2356 c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
2357 c4iw_put_ep(&ep->com);
2198 return 0; 2358 return 0;
2199} 2359}
2200 2360
2201static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, 2361static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
2202 struct cpl_pass_accept_req *req) 2362 struct cpl_pass_accept_req *req)
2203{ 2363{
2204 struct cpl_pass_accept_rpl *rpl; 2364 struct cpl_pass_accept_rpl *rpl;
2205 unsigned int mtu_idx; 2365 unsigned int mtu_idx;
@@ -2287,10 +2447,9 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
2287 rpl->opt0 = cpu_to_be64(opt0); 2447 rpl->opt0 = cpu_to_be64(opt0);
2288 rpl->opt2 = cpu_to_be32(opt2); 2448 rpl->opt2 = cpu_to_be32(opt2);
2289 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); 2449 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
2290 t4_set_arp_err_handler(skb, NULL, arp_failure_discard); 2450 t4_set_arp_err_handler(skb, ep, pass_accept_rpl_arp_failure);
2291 c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2292 2451
2293 return; 2452 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2294} 2453}
2295 2454
2296static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) 2455static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
@@ -2355,7 +2514,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2355 unsigned short hdrs; 2514 unsigned short hdrs;
2356 u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); 2515 u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
2357 2516
2358 parent_ep = lookup_stid(t, stid); 2517 parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
2359 if (!parent_ep) { 2518 if (!parent_ep) {
2360 PDBG("%s connect request on invalid stid %d\n", __func__, stid); 2519 PDBG("%s connect request on invalid stid %d\n", __func__, stid);
2361 goto reject; 2520 goto reject;
@@ -2468,9 +2627,13 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2468 2627
2469 init_timer(&child_ep->timer); 2628 init_timer(&child_ep->timer);
2470 cxgb4_insert_tid(t, child_ep, hwtid); 2629 cxgb4_insert_tid(t, child_ep, hwtid);
2471 insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid); 2630 insert_ep_tid(child_ep);
2472 accept_cr(child_ep, skb, req); 2631 if (accept_cr(child_ep, skb, req)) {
2473 set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); 2632 c4iw_put_ep(&parent_ep->com);
2633 release_ep_resources(child_ep);
2634 } else {
2635 set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
2636 }
2474 if (iptype == 6) { 2637 if (iptype == 6) {
2475 sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; 2638 sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2476 cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0], 2639 cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0],
@@ -2479,6 +2642,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2479 goto out; 2642 goto out;
2480reject: 2643reject:
2481 reject_cr(dev, hwtid, skb); 2644 reject_cr(dev, hwtid, skb);
2645 if (parent_ep)
2646 c4iw_put_ep(&parent_ep->com);
2482out: 2647out:
2483 return 0; 2648 return 0;
2484} 2649}
@@ -2487,10 +2652,10 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
2487{ 2652{
2488 struct c4iw_ep *ep; 2653 struct c4iw_ep *ep;
2489 struct cpl_pass_establish *req = cplhdr(skb); 2654 struct cpl_pass_establish *req = cplhdr(skb);
2490 struct tid_info *t = dev->rdev.lldi.tids;
2491 unsigned int tid = GET_TID(req); 2655 unsigned int tid = GET_TID(req);
2656 int ret;
2492 2657
2493 ep = lookup_tid(t, tid); 2658 ep = get_ep_from_tid(dev, tid);
2494 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 2659 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2495 ep->snd_seq = be32_to_cpu(req->snd_isn); 2660 ep->snd_seq = be32_to_cpu(req->snd_isn);
2496 ep->rcv_seq = be32_to_cpu(req->rcv_isn); 2661 ep->rcv_seq = be32_to_cpu(req->rcv_isn);
@@ -2501,10 +2666,15 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
2501 set_emss(ep, ntohs(req->tcp_opt)); 2666 set_emss(ep, ntohs(req->tcp_opt));
2502 2667
2503 dst_confirm(ep->dst); 2668 dst_confirm(ep->dst);
2504 state_set(&ep->com, MPA_REQ_WAIT); 2669 mutex_lock(&ep->com.mutex);
2670 ep->com.state = MPA_REQ_WAIT;
2505 start_ep_timer(ep); 2671 start_ep_timer(ep);
2506 send_flowc(ep, skb);
2507 set_bit(PASS_ESTAB, &ep->com.history); 2672 set_bit(PASS_ESTAB, &ep->com.history);
2673 ret = send_flowc(ep, skb);
2674 mutex_unlock(&ep->com.mutex);
2675 if (ret)
2676 c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
2677 c4iw_put_ep(&ep->com);
2508 2678
2509 return 0; 2679 return 0;
2510} 2680}
@@ -2516,11 +2686,13 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
2516 struct c4iw_qp_attributes attrs; 2686 struct c4iw_qp_attributes attrs;
2517 int disconnect = 1; 2687 int disconnect = 1;
2518 int release = 0; 2688 int release = 0;
2519 struct tid_info *t = dev->rdev.lldi.tids;
2520 unsigned int tid = GET_TID(hdr); 2689 unsigned int tid = GET_TID(hdr);
2521 int ret; 2690 int ret;
2522 2691
2523 ep = lookup_tid(t, tid); 2692 ep = get_ep_from_tid(dev, tid);
2693 if (!ep)
2694 return 0;
2695
2524 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 2696 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2525 dst_confirm(ep->dst); 2697 dst_confirm(ep->dst);
2526 2698
@@ -2592,6 +2764,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
2592 c4iw_ep_disconnect(ep, 0, GFP_KERNEL); 2764 c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
2593 if (release) 2765 if (release)
2594 release_ep_resources(ep); 2766 release_ep_resources(ep);
2767 c4iw_put_ep(&ep->com);
2595 return 0; 2768 return 0;
2596} 2769}
2597 2770
@@ -2604,10 +2777,12 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2604 struct c4iw_qp_attributes attrs; 2777 struct c4iw_qp_attributes attrs;
2605 int ret; 2778 int ret;
2606 int release = 0; 2779 int release = 0;
2607 struct tid_info *t = dev->rdev.lldi.tids;
2608 unsigned int tid = GET_TID(req); 2780 unsigned int tid = GET_TID(req);
2609 2781
2610 ep = lookup_tid(t, tid); 2782 ep = get_ep_from_tid(dev, tid);
2783 if (!ep)
2784 return 0;
2785
2611 if (is_neg_adv(req->status)) { 2786 if (is_neg_adv(req->status)) {
2612 PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", 2787 PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
2613 __func__, ep->hwtid, req->status, 2788 __func__, ep->hwtid, req->status,
@@ -2616,7 +2791,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2616 mutex_lock(&dev->rdev.stats.lock); 2791 mutex_lock(&dev->rdev.stats.lock);
2617 dev->rdev.stats.neg_adv++; 2792 dev->rdev.stats.neg_adv++;
2618 mutex_unlock(&dev->rdev.stats.lock); 2793 mutex_unlock(&dev->rdev.stats.lock);
2619 return 0; 2794 goto deref_ep;
2620 } 2795 }
2621 PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, 2796 PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
2622 ep->com.state); 2797 ep->com.state);
@@ -2633,6 +2808,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2633 mutex_lock(&ep->com.mutex); 2808 mutex_lock(&ep->com.mutex);
2634 switch (ep->com.state) { 2809 switch (ep->com.state) {
2635 case CONNECTING: 2810 case CONNECTING:
2811 c4iw_put_ep(&ep->parent_ep->com);
2636 break; 2812 break;
2637 case MPA_REQ_WAIT: 2813 case MPA_REQ_WAIT:
2638 (void)stop_ep_timer(ep); 2814 (void)stop_ep_timer(ep);
@@ -2681,7 +2857,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2681 case DEAD: 2857 case DEAD:
2682 PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); 2858 PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
2683 mutex_unlock(&ep->com.mutex); 2859 mutex_unlock(&ep->com.mutex);
2684 return 0; 2860 goto deref_ep;
2685 default: 2861 default:
2686 BUG_ON(1); 2862 BUG_ON(1);
2687 break; 2863 break;
@@ -2728,6 +2904,10 @@ out:
2728 c4iw_reconnect(ep); 2904 c4iw_reconnect(ep);
2729 } 2905 }
2730 2906
2907deref_ep:
2908 c4iw_put_ep(&ep->com);
2909 /* Dereferencing ep, referenced in peer_abort_intr() */
2910 c4iw_put_ep(&ep->com);
2731 return 0; 2911 return 0;
2732} 2912}
2733 2913
@@ -2737,16 +2917,18 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2737 struct c4iw_qp_attributes attrs; 2917 struct c4iw_qp_attributes attrs;
2738 struct cpl_close_con_rpl *rpl = cplhdr(skb); 2918 struct cpl_close_con_rpl *rpl = cplhdr(skb);
2739 int release = 0; 2919 int release = 0;
2740 struct tid_info *t = dev->rdev.lldi.tids;
2741 unsigned int tid = GET_TID(rpl); 2920 unsigned int tid = GET_TID(rpl);
2742 2921
2743 ep = lookup_tid(t, tid); 2922 ep = get_ep_from_tid(dev, tid);
2923 if (!ep)
2924 return 0;
2744 2925
2745 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 2926 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2746 BUG_ON(!ep); 2927 BUG_ON(!ep);
2747 2928
2748 /* The cm_id may be null if we failed to connect */ 2929 /* The cm_id may be null if we failed to connect */
2749 mutex_lock(&ep->com.mutex); 2930 mutex_lock(&ep->com.mutex);
2931 set_bit(CLOSE_CON_RPL, &ep->com.history);
2750 switch (ep->com.state) { 2932 switch (ep->com.state) {
2751 case CLOSING: 2933 case CLOSING:
2752 __state_set(&ep->com, MORIBUND); 2934 __state_set(&ep->com, MORIBUND);
@@ -2774,18 +2956,18 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2774 mutex_unlock(&ep->com.mutex); 2956 mutex_unlock(&ep->com.mutex);
2775 if (release) 2957 if (release)
2776 release_ep_resources(ep); 2958 release_ep_resources(ep);
2959 c4iw_put_ep(&ep->com);
2777 return 0; 2960 return 0;
2778} 2961}
2779 2962
2780static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) 2963static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
2781{ 2964{
2782 struct cpl_rdma_terminate *rpl = cplhdr(skb); 2965 struct cpl_rdma_terminate *rpl = cplhdr(skb);
2783 struct tid_info *t = dev->rdev.lldi.tids;
2784 unsigned int tid = GET_TID(rpl); 2966 unsigned int tid = GET_TID(rpl);
2785 struct c4iw_ep *ep; 2967 struct c4iw_ep *ep;
2786 struct c4iw_qp_attributes attrs; 2968 struct c4iw_qp_attributes attrs;
2787 2969
2788 ep = lookup_tid(t, tid); 2970 ep = get_ep_from_tid(dev, tid);
2789 BUG_ON(!ep); 2971 BUG_ON(!ep);
2790 2972
2791 if (ep && ep->com.qp) { 2973 if (ep && ep->com.qp) {
@@ -2796,6 +2978,7 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
2796 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 2978 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2797 } else 2979 } else
2798 printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid); 2980 printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
2981 c4iw_put_ep(&ep->com);
2799 2982
2800 return 0; 2983 return 0;
2801} 2984}
@@ -2811,15 +2994,16 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
2811 struct cpl_fw4_ack *hdr = cplhdr(skb); 2994 struct cpl_fw4_ack *hdr = cplhdr(skb);
2812 u8 credits = hdr->credits; 2995 u8 credits = hdr->credits;
2813 unsigned int tid = GET_TID(hdr); 2996 unsigned int tid = GET_TID(hdr);
2814 struct tid_info *t = dev->rdev.lldi.tids;
2815 2997
2816 2998
2817 ep = lookup_tid(t, tid); 2999 ep = get_ep_from_tid(dev, tid);
3000 if (!ep)
3001 return 0;
2818 PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits); 3002 PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
2819 if (credits == 0) { 3003 if (credits == 0) {
2820 PDBG("%s 0 credit ack ep %p tid %u state %u\n", 3004 PDBG("%s 0 credit ack ep %p tid %u state %u\n",
2821 __func__, ep, ep->hwtid, state_read(&ep->com)); 3005 __func__, ep, ep->hwtid, state_read(&ep->com));
2822 return 0; 3006 goto out;
2823 } 3007 }
2824 3008
2825 dst_confirm(ep->dst); 3009 dst_confirm(ep->dst);
@@ -2829,7 +3013,13 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
2829 state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0); 3013 state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
2830 kfree_skb(ep->mpa_skb); 3014 kfree_skb(ep->mpa_skb);
2831 ep->mpa_skb = NULL; 3015 ep->mpa_skb = NULL;
3016 mutex_lock(&ep->com.mutex);
3017 if (test_bit(STOP_MPA_TIMER, &ep->com.flags))
3018 stop_ep_timer(ep);
3019 mutex_unlock(&ep->com.mutex);
2832 } 3020 }
3021out:
3022 c4iw_put_ep(&ep->com);
2833 return 0; 3023 return 0;
2834} 3024}
2835 3025
@@ -2841,22 +3031,23 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
2841 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 3031 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2842 3032
2843 mutex_lock(&ep->com.mutex); 3033 mutex_lock(&ep->com.mutex);
2844 if (ep->com.state == DEAD) { 3034 if (ep->com.state != MPA_REQ_RCVD) {
2845 mutex_unlock(&ep->com.mutex); 3035 mutex_unlock(&ep->com.mutex);
2846 c4iw_put_ep(&ep->com); 3036 c4iw_put_ep(&ep->com);
2847 return -ECONNRESET; 3037 return -ECONNRESET;
2848 } 3038 }
2849 set_bit(ULP_REJECT, &ep->com.history); 3039 set_bit(ULP_REJECT, &ep->com.history);
2850 BUG_ON(ep->com.state != MPA_REQ_RCVD);
2851 if (mpa_rev == 0) 3040 if (mpa_rev == 0)
2852 abort_connection(ep, NULL, GFP_KERNEL); 3041 disconnect = 2;
2853 else { 3042 else {
2854 err = send_mpa_reject(ep, pdata, pdata_len); 3043 err = send_mpa_reject(ep, pdata, pdata_len);
2855 disconnect = 1; 3044 disconnect = 1;
2856 } 3045 }
2857 mutex_unlock(&ep->com.mutex); 3046 mutex_unlock(&ep->com.mutex);
2858 if (disconnect) 3047 if (disconnect) {
2859 err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL); 3048 stop_ep_timer(ep);
3049 err = c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
3050 }
2860 c4iw_put_ep(&ep->com); 3051 c4iw_put_ep(&ep->com);
2861 return 0; 3052 return 0;
2862} 3053}
@@ -2869,24 +3060,23 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2869 struct c4iw_ep *ep = to_ep(cm_id); 3060 struct c4iw_ep *ep = to_ep(cm_id);
2870 struct c4iw_dev *h = to_c4iw_dev(cm_id->device); 3061 struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
2871 struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); 3062 struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
3063 int abort = 0;
2872 3064
2873 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 3065 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2874 3066
2875 mutex_lock(&ep->com.mutex); 3067 mutex_lock(&ep->com.mutex);
2876 if (ep->com.state == DEAD) { 3068 if (ep->com.state != MPA_REQ_RCVD) {
2877 err = -ECONNRESET; 3069 err = -ECONNRESET;
2878 goto err; 3070 goto err_out;
2879 } 3071 }
2880 3072
2881 BUG_ON(ep->com.state != MPA_REQ_RCVD);
2882 BUG_ON(!qp); 3073 BUG_ON(!qp);
2883 3074
2884 set_bit(ULP_ACCEPT, &ep->com.history); 3075 set_bit(ULP_ACCEPT, &ep->com.history);
2885 if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) || 3076 if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) ||
2886 (conn_param->ird > cur_max_read_depth(ep->com.dev))) { 3077 (conn_param->ird > cur_max_read_depth(ep->com.dev))) {
2887 abort_connection(ep, NULL, GFP_KERNEL);
2888 err = -EINVAL; 3078 err = -EINVAL;
2889 goto err; 3079 goto err_abort;
2890 } 3080 }
2891 3081
2892 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 3082 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
@@ -2898,9 +3088,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2898 ep->ord = conn_param->ord; 3088 ep->ord = conn_param->ord;
2899 send_mpa_reject(ep, conn_param->private_data, 3089 send_mpa_reject(ep, conn_param->private_data,
2900 conn_param->private_data_len); 3090 conn_param->private_data_len);
2901 abort_connection(ep, NULL, GFP_KERNEL);
2902 err = -ENOMEM; 3091 err = -ENOMEM;
2903 goto err; 3092 goto err_abort;
2904 } 3093 }
2905 } 3094 }
2906 if (conn_param->ird < ep->ord) { 3095 if (conn_param->ird < ep->ord) {
@@ -2908,9 +3097,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2908 ep->ord <= h->rdev.lldi.max_ordird_qp) { 3097 ep->ord <= h->rdev.lldi.max_ordird_qp) {
2909 conn_param->ird = ep->ord; 3098 conn_param->ird = ep->ord;
2910 } else { 3099 } else {
2911 abort_connection(ep, NULL, GFP_KERNEL);
2912 err = -ENOMEM; 3100 err = -ENOMEM;
2913 goto err; 3101 goto err_abort;
2914 } 3102 }
2915 } 3103 }
2916 } 3104 }
@@ -2929,8 +3117,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2929 3117
2930 PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); 3118 PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
2931 3119
2932 cm_id->add_ref(cm_id);
2933 ep->com.cm_id = cm_id; 3120 ep->com.cm_id = cm_id;
3121 ref_cm_id(&ep->com);
2934 ep->com.qp = qp; 3122 ep->com.qp = qp;
2935 ref_qp(ep); 3123 ref_qp(ep);
2936 3124
@@ -2951,23 +3139,27 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2951 err = c4iw_modify_qp(ep->com.qp->rhp, 3139 err = c4iw_modify_qp(ep->com.qp->rhp,
2952 ep->com.qp, mask, &attrs, 1); 3140 ep->com.qp, mask, &attrs, 1);
2953 if (err) 3141 if (err)
2954 goto err1; 3142 goto err_deref_cm_id;
3143
3144 set_bit(STOP_MPA_TIMER, &ep->com.flags);
2955 err = send_mpa_reply(ep, conn_param->private_data, 3145 err = send_mpa_reply(ep, conn_param->private_data,
2956 conn_param->private_data_len); 3146 conn_param->private_data_len);
2957 if (err) 3147 if (err)
2958 goto err1; 3148 goto err_deref_cm_id;
2959 3149
2960 __state_set(&ep->com, FPDU_MODE); 3150 __state_set(&ep->com, FPDU_MODE);
2961 established_upcall(ep); 3151 established_upcall(ep);
2962 mutex_unlock(&ep->com.mutex); 3152 mutex_unlock(&ep->com.mutex);
2963 c4iw_put_ep(&ep->com); 3153 c4iw_put_ep(&ep->com);
2964 return 0; 3154 return 0;
2965err1: 3155err_deref_cm_id:
2966 ep->com.cm_id = NULL; 3156 deref_cm_id(&ep->com);
2967 abort_connection(ep, NULL, GFP_KERNEL); 3157err_abort:
2968 cm_id->rem_ref(cm_id); 3158 abort = 1;
2969err: 3159err_out:
2970 mutex_unlock(&ep->com.mutex); 3160 mutex_unlock(&ep->com.mutex);
3161 if (abort)
3162 c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
2971 c4iw_put_ep(&ep->com); 3163 c4iw_put_ep(&ep->com);
2972 return err; 3164 return err;
2973} 3165}
@@ -3067,9 +3259,9 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3067 if (peer2peer && ep->ord == 0) 3259 if (peer2peer && ep->ord == 0)
3068 ep->ord = 1; 3260 ep->ord = 1;
3069 3261
3070 cm_id->add_ref(cm_id);
3071 ep->com.dev = dev;
3072 ep->com.cm_id = cm_id; 3262 ep->com.cm_id = cm_id;
3263 ref_cm_id(&ep->com);
3264 ep->com.dev = dev;
3073 ep->com.qp = get_qhp(dev, conn_param->qpn); 3265 ep->com.qp = get_qhp(dev, conn_param->qpn);
3074 if (!ep->com.qp) { 3266 if (!ep->com.qp) {
3075 PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn); 3267 PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
@@ -3108,7 +3300,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3108 /* 3300 /*
3109 * Handle loopback requests to INADDR_ANY. 3301 * Handle loopback requests to INADDR_ANY.
3110 */ 3302 */
3111 if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) { 3303 if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
3112 err = pick_local_ipaddrs(dev, cm_id); 3304 err = pick_local_ipaddrs(dev, cm_id);
3113 if (err) 3305 if (err)
3114 goto fail1; 3306 goto fail1;
@@ -3176,7 +3368,7 @@ fail2:
3176 remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); 3368 remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
3177 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); 3369 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
3178fail1: 3370fail1:
3179 cm_id->rem_ref(cm_id); 3371 deref_cm_id(&ep->com);
3180 c4iw_put_ep(&ep->com); 3372 c4iw_put_ep(&ep->com);
3181out: 3373out:
3182 return err; 3374 return err;
@@ -3270,8 +3462,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3270 goto fail1; 3462 goto fail1;
3271 } 3463 }
3272 PDBG("%s ep %p\n", __func__, ep); 3464 PDBG("%s ep %p\n", __func__, ep);
3273 cm_id->add_ref(cm_id);
3274 ep->com.cm_id = cm_id; 3465 ep->com.cm_id = cm_id;
3466 ref_cm_id(&ep->com);
3275 ep->com.dev = dev; 3467 ep->com.dev = dev;
3276 ep->backlog = backlog; 3468 ep->backlog = backlog;
3277 memcpy(&ep->com.local_addr, &cm_id->m_local_addr, 3469 memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
@@ -3311,7 +3503,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3311 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, 3503 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3312 ep->com.local_addr.ss_family); 3504 ep->com.local_addr.ss_family);
3313fail2: 3505fail2:
3314 cm_id->rem_ref(cm_id); 3506 deref_cm_id(&ep->com);
3315 c4iw_put_ep(&ep->com); 3507 c4iw_put_ep(&ep->com);
3316fail1: 3508fail1:
3317out: 3509out:
@@ -3350,7 +3542,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
3350 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, 3542 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3351 ep->com.local_addr.ss_family); 3543 ep->com.local_addr.ss_family);
3352done: 3544done:
3353 cm_id->rem_ref(cm_id); 3545 deref_cm_id(&ep->com);
3354 c4iw_put_ep(&ep->com); 3546 c4iw_put_ep(&ep->com);
3355 return err; 3547 return err;
3356} 3548}
@@ -3367,6 +3559,12 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
3367 PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, 3559 PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
3368 states[ep->com.state], abrupt); 3560 states[ep->com.state], abrupt);
3369 3561
3562 /*
3563 * Ref the ep here in case we have fatal errors causing the
3564 * ep to be released and freed.
3565 */
3566 c4iw_get_ep(&ep->com);
3567
3370 rdev = &ep->com.dev->rdev; 3568 rdev = &ep->com.dev->rdev;
3371 if (c4iw_fatal_error(rdev)) { 3569 if (c4iw_fatal_error(rdev)) {
3372 fatal = 1; 3570 fatal = 1;
@@ -3418,10 +3616,30 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
3418 set_bit(EP_DISC_CLOSE, &ep->com.history); 3616 set_bit(EP_DISC_CLOSE, &ep->com.history);
3419 ret = send_halfclose(ep, gfp); 3617 ret = send_halfclose(ep, gfp);
3420 } 3618 }
3421 if (ret) 3619 if (ret) {
3620 set_bit(EP_DISC_FAIL, &ep->com.history);
3621 if (!abrupt) {
3622 stop_ep_timer(ep);
3623 close_complete_upcall(ep, -EIO);
3624 }
3625 if (ep->com.qp) {
3626 struct c4iw_qp_attributes attrs;
3627
3628 attrs.next_state = C4IW_QP_STATE_ERROR;
3629 ret = c4iw_modify_qp(ep->com.qp->rhp,
3630 ep->com.qp,
3631 C4IW_QP_ATTR_NEXT_STATE,
3632 &attrs, 1);
3633 if (ret)
3634 pr_err(MOD
3635 "%s - qp <- error failed!\n",
3636 __func__);
3637 }
3422 fatal = 1; 3638 fatal = 1;
3639 }
3423 } 3640 }
3424 mutex_unlock(&ep->com.mutex); 3641 mutex_unlock(&ep->com.mutex);
3642 c4iw_put_ep(&ep->com);
3425 if (fatal) 3643 if (fatal)
3426 release_ep_resources(ep); 3644 release_ep_resources(ep);
3427 return ret; 3645 return ret;
@@ -3676,7 +3894,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
3676 struct cpl_pass_accept_req *req = (void *)(rss + 1); 3894 struct cpl_pass_accept_req *req = (void *)(rss + 1);
3677 struct l2t_entry *e; 3895 struct l2t_entry *e;
3678 struct dst_entry *dst; 3896 struct dst_entry *dst;
3679 struct c4iw_ep *lep; 3897 struct c4iw_ep *lep = NULL;
3680 u16 window; 3898 u16 window;
3681 struct port_info *pi; 3899 struct port_info *pi;
3682 struct net_device *pdev; 3900 struct net_device *pdev;
@@ -3701,7 +3919,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
3701 */ 3919 */
3702 stid = (__force int) cpu_to_be32((__force u32) rss->hash_val); 3920 stid = (__force int) cpu_to_be32((__force u32) rss->hash_val);
3703 3921
3704 lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid); 3922 lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
3705 if (!lep) { 3923 if (!lep) {
3706 PDBG("%s connect request on invalid stid %d\n", __func__, stid); 3924 PDBG("%s connect request on invalid stid %d\n", __func__, stid);
3707 goto reject; 3925 goto reject;
@@ -3802,6 +4020,8 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
3802free_dst: 4020free_dst:
3803 dst_release(dst); 4021 dst_release(dst);
3804reject: 4022reject:
4023 if (lep)
4024 c4iw_put_ep(&lep->com);
3805 return 0; 4025 return 0;
3806} 4026}
3807 4027
@@ -3809,7 +4029,7 @@ reject:
3809 * These are the real handlers that are called from a 4029 * These are the real handlers that are called from a
3810 * work queue. 4030 * work queue.
3811 */ 4031 */
3812static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = { 4032static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = {
3813 [CPL_ACT_ESTABLISH] = act_establish, 4033 [CPL_ACT_ESTABLISH] = act_establish,
3814 [CPL_ACT_OPEN_RPL] = act_open_rpl, 4034 [CPL_ACT_OPEN_RPL] = act_open_rpl,
3815 [CPL_RX_DATA] = rx_data, 4035 [CPL_RX_DATA] = rx_data,
@@ -3825,7 +4045,9 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = {
3825 [CPL_RDMA_TERMINATE] = terminate, 4045 [CPL_RDMA_TERMINATE] = terminate,
3826 [CPL_FW4_ACK] = fw4_ack, 4046 [CPL_FW4_ACK] = fw4_ack,
3827 [CPL_FW6_MSG] = deferred_fw6_msg, 4047 [CPL_FW6_MSG] = deferred_fw6_msg,
3828 [CPL_RX_PKT] = rx_pkt 4048 [CPL_RX_PKT] = rx_pkt,
4049 [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe,
4050 [FAKE_CPL_PASS_PUT_EP_SAFE] = _put_pass_ep_safe
3829}; 4051};
3830 4052
3831static void process_timeout(struct c4iw_ep *ep) 4053static void process_timeout(struct c4iw_ep *ep)
@@ -3839,11 +4061,12 @@ static void process_timeout(struct c4iw_ep *ep)
3839 set_bit(TIMEDOUT, &ep->com.history); 4061 set_bit(TIMEDOUT, &ep->com.history);
3840 switch (ep->com.state) { 4062 switch (ep->com.state) {
3841 case MPA_REQ_SENT: 4063 case MPA_REQ_SENT:
3842 __state_set(&ep->com, ABORTING);
3843 connect_reply_upcall(ep, -ETIMEDOUT); 4064 connect_reply_upcall(ep, -ETIMEDOUT);
3844 break; 4065 break;
3845 case MPA_REQ_WAIT: 4066 case MPA_REQ_WAIT:
3846 __state_set(&ep->com, ABORTING); 4067 case MPA_REQ_RCVD:
4068 case MPA_REP_SENT:
4069 case FPDU_MODE:
3847 break; 4070 break;
3848 case CLOSING: 4071 case CLOSING:
3849 case MORIBUND: 4072 case MORIBUND:
@@ -3853,7 +4076,6 @@ static void process_timeout(struct c4iw_ep *ep)
3853 ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, 4076 ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
3854 &attrs, 1); 4077 &attrs, 1);
3855 } 4078 }
3856 __state_set(&ep->com, ABORTING);
3857 close_complete_upcall(ep, -ETIMEDOUT); 4079 close_complete_upcall(ep, -ETIMEDOUT);
3858 break; 4080 break;
3859 case ABORTING: 4081 case ABORTING:
@@ -3871,9 +4093,9 @@ static void process_timeout(struct c4iw_ep *ep)
3871 __func__, ep, ep->hwtid, ep->com.state); 4093 __func__, ep, ep->hwtid, ep->com.state);
3872 abort = 0; 4094 abort = 0;
3873 } 4095 }
3874 if (abort)
3875 abort_connection(ep, NULL, GFP_KERNEL);
3876 mutex_unlock(&ep->com.mutex); 4096 mutex_unlock(&ep->com.mutex);
4097 if (abort)
4098 c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
3877 c4iw_put_ep(&ep->com); 4099 c4iw_put_ep(&ep->com);
3878} 4100}
3879 4101
@@ -4006,10 +4228,10 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
4006{ 4228{
4007 struct cpl_abort_req_rss *req = cplhdr(skb); 4229 struct cpl_abort_req_rss *req = cplhdr(skb);
4008 struct c4iw_ep *ep; 4230 struct c4iw_ep *ep;
4009 struct tid_info *t = dev->rdev.lldi.tids;
4010 unsigned int tid = GET_TID(req); 4231 unsigned int tid = GET_TID(req);
4011 4232
4012 ep = lookup_tid(t, tid); 4233 ep = get_ep_from_tid(dev, tid);
4234 /* This EP will be dereferenced in peer_abort() */
4013 if (!ep) { 4235 if (!ep) {
4014 printk(KERN_WARNING MOD 4236 printk(KERN_WARNING MOD
4015 "Abort on non-existent endpoint, tid %d\n", tid); 4237 "Abort on non-existent endpoint, tid %d\n", tid);
@@ -4020,24 +4242,13 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
4020 PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", 4242 PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
4021 __func__, ep->hwtid, req->status, 4243 __func__, ep->hwtid, req->status,
4022 neg_adv_str(req->status)); 4244 neg_adv_str(req->status));
4023 ep->stats.abort_neg_adv++; 4245 goto out;
4024 dev->rdev.stats.neg_adv++;
4025 kfree_skb(skb);
4026 return 0;
4027 } 4246 }
4028 PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, 4247 PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
4029 ep->com.state); 4248 ep->com.state);
4030 4249
4031 /* 4250 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
4032 * Wake up any threads in rdma_init() or rdma_fini(). 4251out:
4033 * However, if we are on MPAv2 and want to retry with MPAv1
4034 * then, don't wake up yet.
4035 */
4036 if (mpa_rev == 2 && !ep->tried_with_mpa_v1) {
4037 if (ep->com.state != MPA_REQ_SENT)
4038 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
4039 } else
4040 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
4041 sched(dev, skb); 4252 sched(dev, skb);
4042 return 0; 4253 return 0;
4043} 4254}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index df43f871ab61..f6f34a75af27 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -755,6 +755,7 @@ enum c4iw_ep_flags {
755 CLOSE_SENT = 3, 755 CLOSE_SENT = 3,
756 TIMEOUT = 4, 756 TIMEOUT = 4,
757 QP_REFERENCED = 5, 757 QP_REFERENCED = 5,
758 STOP_MPA_TIMER = 7,
758}; 759};
759 760
760enum c4iw_ep_history { 761enum c4iw_ep_history {
@@ -779,7 +780,13 @@ enum c4iw_ep_history {
779 EP_DISC_ABORT = 18, 780 EP_DISC_ABORT = 18,
780 CONN_RPL_UPCALL = 19, 781 CONN_RPL_UPCALL = 19,
781 ACT_RETRY_NOMEM = 20, 782 ACT_RETRY_NOMEM = 20,
782 ACT_RETRY_INUSE = 21 783 ACT_RETRY_INUSE = 21,
784 CLOSE_CON_RPL = 22,
785 EP_DISC_FAIL = 24,
786 QP_REFED = 25,
787 QP_DEREFED = 26,
788 CM_ID_REFED = 27,
789 CM_ID_DEREFED = 28,
783}; 790};
784 791
785struct c4iw_ep_common { 792struct c4iw_ep_common {
@@ -917,9 +924,8 @@ void c4iw_qp_rem_ref(struct ib_qp *qp);
917struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, 924struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
918 enum ib_mr_type mr_type, 925 enum ib_mr_type mr_type,
919 u32 max_num_sg); 926 u32 max_num_sg);
920int c4iw_map_mr_sg(struct ib_mr *ibmr, 927int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
921 struct scatterlist *sg, 928 unsigned int *sg_offset);
922 int sg_nents);
923int c4iw_dealloc_mw(struct ib_mw *mw); 929int c4iw_dealloc_mw(struct ib_mw *mw);
924struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 930struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
925 struct ib_udata *udata); 931 struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 008be07d5604..55d0651ee4de 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -86,8 +86,9 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
86 (wait ? FW_WR_COMPL_F : 0)); 86 (wait ? FW_WR_COMPL_F : 0));
87 req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L; 87 req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L;
88 req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(wr_len, 16))); 88 req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(wr_len, 16)));
89 req->cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE)); 89 req->cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE) |
90 req->cmd |= cpu_to_be32(T5_ULP_MEMIO_ORDER_V(1)); 90 T5_ULP_MEMIO_ORDER_V(1) |
91 T5_ULP_MEMIO_FID_V(rdev->lldi.rxq_ids[0]));
91 req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN_V(len>>5)); 92 req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN_V(len>>5));
92 req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), 16)); 93 req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), 16));
93 req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR_V(addr)); 94 req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR_V(addr));
@@ -690,15 +691,14 @@ static int c4iw_set_page(struct ib_mr *ibmr, u64 addr)
690 return 0; 691 return 0;
691} 692}
692 693
693int c4iw_map_mr_sg(struct ib_mr *ibmr, 694int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
694 struct scatterlist *sg, 695 unsigned int *sg_offset)
695 int sg_nents)
696{ 696{
697 struct c4iw_mr *mhp = to_c4iw_mr(ibmr); 697 struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
698 698
699 mhp->mpl_len = 0; 699 mhp->mpl_len = 0;
700 700
701 return ib_sg_to_pages(ibmr, sg, sg_nents, c4iw_set_page); 701 return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, c4iw_set_page);
702} 702}
703 703
704int c4iw_dereg_mr(struct ib_mr *ib_mr) 704int c4iw_dereg_mr(struct ib_mr *ib_mr)
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 819767681445..8b9532034558 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -50,8 +50,6 @@
50#include <rdma/ib_pack.h> 50#include <rdma/ib_pack.h>
51#include <rdma/rdma_cm.h> 51#include <rdma/rdma_cm.h>
52#include <rdma/iw_cm.h> 52#include <rdma/iw_cm.h>
53#include <rdma/iw_portmap.h>
54#include <rdma/rdma_netlink.h>
55#include <crypto/hash.h> 53#include <crypto/hash.h>
56 54
57#include "i40iw_status.h" 55#include "i40iw_status.h"
@@ -254,6 +252,7 @@ struct i40iw_device {
254 u32 arp_table_size; 252 u32 arp_table_size;
255 u32 next_arp_index; 253 u32 next_arp_index;
256 spinlock_t resource_lock; /* hw resource access */ 254 spinlock_t resource_lock; /* hw resource access */
255 spinlock_t qptable_lock;
257 u32 vendor_id; 256 u32 vendor_id;
258 u32 vendor_part_id; 257 u32 vendor_part_id;
259 u32 of_device_registered; 258 u32 of_device_registered;
@@ -392,7 +391,7 @@ void i40iw_flush_wqes(struct i40iw_device *iwdev,
392 391
393void i40iw_manage_arp_cache(struct i40iw_device *iwdev, 392void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
394 unsigned char *mac_addr, 393 unsigned char *mac_addr,
395 __be32 *ip_addr, 394 u32 *ip_addr,
396 bool ipv4, 395 bool ipv4,
397 u32 action); 396 u32 action);
398 397
@@ -550,7 +549,7 @@ enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
550 struct i40iw_qp_flush_info *info, 549 struct i40iw_qp_flush_info *info,
551 bool wait); 550 bool wait);
552 551
553void i40iw_copy_ip_ntohl(u32 *dst, u32 *src); 552void i40iw_copy_ip_ntohl(u32 *dst, __be32 *src);
554struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *ib_pd, 553struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *ib_pd,
555 u64 addr, 554 u64 addr,
556 u64 size, 555 u64 size,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 38f917a6c778..d2fa72516960 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -771,6 +771,7 @@ static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node,
771{ 771{
772 struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr; 772 struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr;
773 struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg; 773 struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
774 u16 ctrl_ird, ctrl_ord;
774 775
775 /* initialize the upper 5 bytes of the frame */ 776 /* initialize the upper 5 bytes of the frame */
776 i40iw_build_mpa_v1(cm_node, start_addr, mpa_key); 777 i40iw_build_mpa_v1(cm_node, start_addr, mpa_key);
@@ -779,38 +780,38 @@ static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node,
779 780
780 /* initialize RTR msg */ 781 /* initialize RTR msg */
781 if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) { 782 if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
782 rtr_msg->ctrl_ird = IETF_NO_IRD_ORD; 783 ctrl_ird = IETF_NO_IRD_ORD;
783 rtr_msg->ctrl_ord = IETF_NO_IRD_ORD; 784 ctrl_ord = IETF_NO_IRD_ORD;
784 } else { 785 } else {
785 rtr_msg->ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ? 786 ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
786 IETF_NO_IRD_ORD : cm_node->ird_size; 787 IETF_NO_IRD_ORD : cm_node->ird_size;
787 rtr_msg->ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ? 788 ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
788 IETF_NO_IRD_ORD : cm_node->ord_size; 789 IETF_NO_IRD_ORD : cm_node->ord_size;
789 } 790 }
790 791
791 rtr_msg->ctrl_ird |= IETF_PEER_TO_PEER; 792 ctrl_ird |= IETF_PEER_TO_PEER;
792 rtr_msg->ctrl_ird |= IETF_FLPDU_ZERO_LEN; 793 ctrl_ird |= IETF_FLPDU_ZERO_LEN;
793 794
794 switch (mpa_key) { 795 switch (mpa_key) {
795 case MPA_KEY_REQUEST: 796 case MPA_KEY_REQUEST:
796 rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE; 797 ctrl_ord |= IETF_RDMA0_WRITE;
797 rtr_msg->ctrl_ord |= IETF_RDMA0_READ; 798 ctrl_ord |= IETF_RDMA0_READ;
798 break; 799 break;
799 case MPA_KEY_REPLY: 800 case MPA_KEY_REPLY:
800 switch (cm_node->send_rdma0_op) { 801 switch (cm_node->send_rdma0_op) {
801 case SEND_RDMA_WRITE_ZERO: 802 case SEND_RDMA_WRITE_ZERO:
802 rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE; 803 ctrl_ord |= IETF_RDMA0_WRITE;
803 break; 804 break;
804 case SEND_RDMA_READ_ZERO: 805 case SEND_RDMA_READ_ZERO:
805 rtr_msg->ctrl_ord |= IETF_RDMA0_READ; 806 ctrl_ord |= IETF_RDMA0_READ;
806 break; 807 break;
807 } 808 }
808 break; 809 break;
809 default: 810 default:
810 break; 811 break;
811 } 812 }
812 rtr_msg->ctrl_ird = htons(rtr_msg->ctrl_ird); 813 rtr_msg->ctrl_ird = htons(ctrl_ird);
813 rtr_msg->ctrl_ord = htons(rtr_msg->ctrl_ord); 814 rtr_msg->ctrl_ord = htons(ctrl_ord);
814} 815}
815 816
816/** 817/**
@@ -2107,7 +2108,7 @@ static bool i40iw_ipv6_is_loopback(u32 *loc_addr, u32 *rem_addr)
2107 struct in6_addr raddr6; 2108 struct in6_addr raddr6;
2108 2109
2109 i40iw_copy_ip_htonl(raddr6.in6_u.u6_addr32, rem_addr); 2110 i40iw_copy_ip_htonl(raddr6.in6_u.u6_addr32, rem_addr);
2110 return (!memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6)); 2111 return !memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6);
2111} 2112}
2112 2113
2113/** 2114/**
@@ -2160,7 +2161,7 @@ static struct i40iw_cm_node *i40iw_make_cm_node(
2160 cm_node->tcp_cntxt.rcv_wnd = 2161 cm_node->tcp_cntxt.rcv_wnd =
2161 I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE; 2162 I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE;
2162 ts = current_kernel_time(); 2163 ts = current_kernel_time();
2163 cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec); 2164 cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec;
2164 cm_node->tcp_cntxt.mss = iwdev->mss; 2165 cm_node->tcp_cntxt.mss = iwdev->mss;
2165 2166
2166 cm_node->iwdev = iwdev; 2167 cm_node->iwdev = iwdev;
@@ -2234,7 +2235,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
2234 if (cm_node->listener) { 2235 if (cm_node->listener) {
2235 i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true); 2236 i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
2236 } else { 2237 } else {
2237 if (!i40iw_listen_port_in_use(cm_core, htons(cm_node->loc_port)) && 2238 if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) &&
2238 cm_node->apbvt_set && cm_node->iwdev) { 2239 cm_node->apbvt_set && cm_node->iwdev) {
2239 i40iw_manage_apbvt(cm_node->iwdev, 2240 i40iw_manage_apbvt(cm_node->iwdev,
2240 cm_node->loc_port, 2241 cm_node->loc_port,
@@ -2852,7 +2853,6 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
2852 void *private_data, 2853 void *private_data,
2853 struct i40iw_cm_info *cm_info) 2854 struct i40iw_cm_info *cm_info)
2854{ 2855{
2855 int ret;
2856 struct i40iw_cm_node *cm_node; 2856 struct i40iw_cm_node *cm_node;
2857 struct i40iw_cm_listener *loopback_remotelistener; 2857 struct i40iw_cm_listener *loopback_remotelistener;
2858 struct i40iw_cm_node *loopback_remotenode; 2858 struct i40iw_cm_node *loopback_remotenode;
@@ -2922,30 +2922,6 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
2922 memcpy(cm_node->pdata_buf, private_data, private_data_len); 2922 memcpy(cm_node->pdata_buf, private_data, private_data_len);
2923 2923
2924 cm_node->state = I40IW_CM_STATE_SYN_SENT; 2924 cm_node->state = I40IW_CM_STATE_SYN_SENT;
2925 ret = i40iw_send_syn(cm_node, 0);
2926
2927 if (ret) {
2928 if (cm_node->ipv4)
2929 i40iw_debug(cm_node->dev,
2930 I40IW_DEBUG_CM,
2931 "Api - connect() FAILED: dest addr=%pI4",
2932 cm_node->rem_addr);
2933 else
2934 i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
2935 "Api - connect() FAILED: dest addr=%pI6",
2936 cm_node->rem_addr);
2937 i40iw_rem_ref_cm_node(cm_node);
2938 cm_node = NULL;
2939 }
2940
2941 if (cm_node)
2942 i40iw_debug(cm_node->dev,
2943 I40IW_DEBUG_CM,
2944 "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n",
2945 cm_node->rem_port,
2946 cm_node,
2947 cm_node->cm_id);
2948
2949 return cm_node; 2925 return cm_node;
2950} 2926}
2951 2927
@@ -3266,11 +3242,13 @@ static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node,
3266 3242
3267 tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[0]); 3243 tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[0]);
3268 tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[0]); 3244 tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[0]);
3269 tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table(iwqp->iwdev, 3245 tcp_info->arp_idx =
3270 &tcp_info->dest_ip_addr3, 3246 cpu_to_le16((u16)i40iw_arp_table(
3271 true, 3247 iwqp->iwdev,
3272 NULL, 3248 &tcp_info->dest_ip_addr3,
3273 I40IW_ARP_RESOLVE)); 3249 true,
3250 NULL,
3251 I40IW_ARP_RESOLVE));
3274 } else { 3252 } else {
3275 tcp_info->src_port = cpu_to_le16(cm_node->loc_port); 3253 tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
3276 tcp_info->dst_port = cpu_to_le16(cm_node->rem_port); 3254 tcp_info->dst_port = cpu_to_le16(cm_node->rem_port);
@@ -3282,12 +3260,13 @@ static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node,
3282 tcp_info->local_ipaddr1 = cpu_to_le32(cm_node->loc_addr[1]); 3260 tcp_info->local_ipaddr1 = cpu_to_le32(cm_node->loc_addr[1]);
3283 tcp_info->local_ipaddr2 = cpu_to_le32(cm_node->loc_addr[2]); 3261 tcp_info->local_ipaddr2 = cpu_to_le32(cm_node->loc_addr[2]);
3284 tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[3]); 3262 tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[3]);
3285 tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table( 3263 tcp_info->arp_idx =
3286 iwqp->iwdev, 3264 cpu_to_le16((u16)i40iw_arp_table(
3287 &tcp_info->dest_ip_addr0, 3265 iwqp->iwdev,
3288 false, 3266 &tcp_info->dest_ip_addr0,
3289 NULL, 3267 false,
3290 I40IW_ARP_RESOLVE)); 3268 NULL,
3269 I40IW_ARP_RESOLVE));
3291 } 3270 }
3292} 3271}
3293 3272
@@ -3564,7 +3543,6 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3564 struct i40iw_cm_node *cm_node; 3543 struct i40iw_cm_node *cm_node;
3565 struct ib_qp_attr attr; 3544 struct ib_qp_attr attr;
3566 int passive_state; 3545 int passive_state;
3567 struct i40iw_ib_device *iwibdev;
3568 struct ib_mr *ibmr; 3546 struct ib_mr *ibmr;
3569 struct i40iw_pd *iwpd; 3547 struct i40iw_pd *iwpd;
3570 u16 buf_len = 0; 3548 u16 buf_len = 0;
@@ -3627,7 +3605,6 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3627 !i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) || 3605 !i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
3628 (!cm_node->ipv4 && 3606 (!cm_node->ipv4 &&
3629 !i40iw_ipv6_is_loopback(cm_node->loc_addr, cm_node->rem_addr))) { 3607 !i40iw_ipv6_is_loopback(cm_node->loc_addr, cm_node->rem_addr))) {
3630 iwibdev = iwdev->iwibdev;
3631 iwpd = iwqp->iwpd; 3608 iwpd = iwqp->iwpd;
3632 tagged_offset = (uintptr_t)iwqp->ietf_mem.va; 3609 tagged_offset = (uintptr_t)iwqp->ietf_mem.va;
3633 ibmr = i40iw_reg_phys_mr(&iwpd->ibpd, 3610 ibmr = i40iw_reg_phys_mr(&iwpd->ibpd,
@@ -3752,6 +3729,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3752 struct sockaddr_in *raddr; 3729 struct sockaddr_in *raddr;
3753 struct sockaddr_in6 *laddr6; 3730 struct sockaddr_in6 *laddr6;
3754 struct sockaddr_in6 *raddr6; 3731 struct sockaddr_in6 *raddr6;
3732 bool qhash_set = false;
3755 int apbvt_set = 0; 3733 int apbvt_set = 0;
3756 enum i40iw_status_code status; 3734 enum i40iw_status_code status;
3757 3735
@@ -3810,6 +3788,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3810 true); 3788 true);
3811 if (status) 3789 if (status)
3812 return -EINVAL; 3790 return -EINVAL;
3791 qhash_set = true;
3813 } 3792 }
3814 status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD); 3793 status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD);
3815 if (status) { 3794 if (status) {
@@ -3828,23 +3807,8 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3828 conn_param->private_data_len, 3807 conn_param->private_data_len,
3829 (void *)conn_param->private_data, 3808 (void *)conn_param->private_data,
3830 &cm_info); 3809 &cm_info);
3831 if (!cm_node) { 3810 if (!cm_node)
3832 i40iw_manage_qhash(iwdev, 3811 goto err;
3833 &cm_info,
3834 I40IW_QHASH_TYPE_TCP_ESTABLISHED,
3835 I40IW_QHASH_MANAGE_TYPE_DELETE,
3836 NULL,
3837 false);
3838
3839 if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
3840 cm_info.loc_port))
3841 i40iw_manage_apbvt(iwdev,
3842 cm_info.loc_port,
3843 I40IW_MANAGE_APBVT_DEL);
3844 cm_id->rem_ref(cm_id);
3845 iwdev->cm_core.stats_connect_errs++;
3846 return -ENOMEM;
3847 }
3848 3812
3849 i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); 3813 i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
3850 if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO && 3814 if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
@@ -3852,12 +3816,54 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3852 cm_node->ord_size = 1; 3816 cm_node->ord_size = 1;
3853 3817
3854 cm_node->apbvt_set = apbvt_set; 3818 cm_node->apbvt_set = apbvt_set;
3855 cm_node->qhash_set = true; 3819 cm_node->qhash_set = qhash_set;
3856 iwqp->cm_node = cm_node; 3820 iwqp->cm_node = cm_node;
3857 cm_node->iwqp = iwqp; 3821 cm_node->iwqp = iwqp;
3858 iwqp->cm_id = cm_id; 3822 iwqp->cm_id = cm_id;
3859 i40iw_add_ref(&iwqp->ibqp); 3823 i40iw_add_ref(&iwqp->ibqp);
3824
3825 if (cm_node->state == I40IW_CM_STATE_SYN_SENT) {
3826 if (i40iw_send_syn(cm_node, 0)) {
3827 i40iw_rem_ref_cm_node(cm_node);
3828 goto err;
3829 }
3830 }
3831
3832 i40iw_debug(cm_node->dev,
3833 I40IW_DEBUG_CM,
3834 "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n",
3835 cm_node->rem_port,
3836 cm_node,
3837 cm_node->cm_id);
3860 return 0; 3838 return 0;
3839
3840err:
3841 if (cm_node) {
3842 if (cm_node->ipv4)
3843 i40iw_debug(cm_node->dev,
3844 I40IW_DEBUG_CM,
3845 "Api - connect() FAILED: dest addr=%pI4",
3846 cm_node->rem_addr);
3847 else
3848 i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
3849 "Api - connect() FAILED: dest addr=%pI6",
3850 cm_node->rem_addr);
3851 }
3852 i40iw_manage_qhash(iwdev,
3853 &cm_info,
3854 I40IW_QHASH_TYPE_TCP_ESTABLISHED,
3855 I40IW_QHASH_MANAGE_TYPE_DELETE,
3856 NULL,
3857 false);
3858
3859 if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
3860 cm_info.loc_port))
3861 i40iw_manage_apbvt(iwdev,
3862 cm_info.loc_port,
3863 I40IW_MANAGE_APBVT_DEL);
3864 cm_id->rem_ref(cm_id);
3865 iwdev->cm_core.stats_connect_errs++;
3866 return -ENOMEM;
3861} 3867}
3862 3868
3863/** 3869/**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index 5f8ceb4a8e84..e9046d9f9645 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -1,6 +1,6 @@
1/******************************************************************************* 1/*******************************************************************************
2* 2*
3* Copyright (c) 2015 Intel Corporation. All rights reserved. 3* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
4* 4*
5* This software is available to you under a choice of one of two 5* This software is available to you under a choice of one of two
6* licenses. You may choose to be licensed under the terms of the GNU 6* licenses. You may choose to be licensed under the terms of the GNU
@@ -291,8 +291,6 @@ struct i40iw_cm_listener {
291 u8 loc_mac[ETH_ALEN]; 291 u8 loc_mac[ETH_ALEN];
292 u32 loc_addr[4]; 292 u32 loc_addr[4];
293 u16 loc_port; 293 u16 loc_port;
294 u32 map_loc_addr[4];
295 u16 map_loc_port;
296 struct iw_cm_id *cm_id; 294 struct iw_cm_id *cm_id;
297 atomic_t ref_count; 295 atomic_t ref_count;
298 struct i40iw_device *iwdev; 296 struct i40iw_device *iwdev;
@@ -317,8 +315,6 @@ struct i40iw_kmem_info {
317struct i40iw_cm_node { 315struct i40iw_cm_node {
318 u32 loc_addr[4], rem_addr[4]; 316 u32 loc_addr[4], rem_addr[4];
319 u16 loc_port, rem_port; 317 u16 loc_port, rem_port;
320 u32 map_loc_addr[4], map_rem_addr[4];
321 u16 map_loc_port, map_rem_port;
322 u16 vlan_id; 318 u16 vlan_id;
323 enum i40iw_cm_node_state state; 319 enum i40iw_cm_node_state state;
324 u8 loc_mac[ETH_ALEN]; 320 u8 loc_mac[ETH_ALEN];
@@ -370,10 +366,6 @@ struct i40iw_cm_info {
370 u16 rem_port; 366 u16 rem_port;
371 u32 loc_addr[4]; 367 u32 loc_addr[4];
372 u32 rem_addr[4]; 368 u32 rem_addr[4];
373 u16 map_loc_port;
374 u16 map_rem_port;
375 u32 map_loc_addr[4];
376 u32 map_rem_addr[4];
377 u16 vlan_id; 369 u16 vlan_id;
378 int backlog; 370 int backlog;
379 u16 user_pri; 371 u16 user_pri;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index f05802bf6ca0..2c4b4d072d6a 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -114,16 +114,21 @@ static enum i40iw_status_code i40iw_cqp_poll_registers(
114 * i40iw_sc_parse_fpm_commit_buf - parse fpm commit buffer 114 * i40iw_sc_parse_fpm_commit_buf - parse fpm commit buffer
115 * @buf: ptr to fpm commit buffer 115 * @buf: ptr to fpm commit buffer
116 * @info: ptr to i40iw_hmc_obj_info struct 116 * @info: ptr to i40iw_hmc_obj_info struct
117 * @sd: number of SDs for HMC objects
117 * 118 *
118 * parses fpm commit info and copy base value 119 * parses fpm commit info and copy base value
119 * of hmc objects in hmc_info 120 * of hmc objects in hmc_info
120 */ 121 */
121static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf( 122static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf(
122 u64 *buf, 123 u64 *buf,
123 struct i40iw_hmc_obj_info *info) 124 struct i40iw_hmc_obj_info *info,
125 u32 *sd)
124{ 126{
125 u64 temp; 127 u64 temp;
128 u64 size;
129 u64 base = 0;
126 u32 i, j; 130 u32 i, j;
131 u32 k = 0;
127 u32 low; 132 u32 low;
128 133
129 /* copy base values in obj_info */ 134 /* copy base values in obj_info */
@@ -131,10 +136,20 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf(
131 i <= I40IW_HMC_IW_PBLE; i++, j += 8) { 136 i <= I40IW_HMC_IW_PBLE; i++, j += 8) {
132 get_64bit_val(buf, j, &temp); 137 get_64bit_val(buf, j, &temp);
133 info[i].base = RS_64_1(temp, 32) * 512; 138 info[i].base = RS_64_1(temp, 32) * 512;
139 if (info[i].base > base) {
140 base = info[i].base;
141 k = i;
142 }
134 low = (u32)(temp); 143 low = (u32)(temp);
135 if (low) 144 if (low)
136 info[i].cnt = low; 145 info[i].cnt = low;
137 } 146 }
147 size = info[k].cnt * info[k].size + info[k].base;
148 if (size & 0x1FFFFF)
149 *sd = (u32)((size >> 21) + 1); /* add 1 for remainder */
150 else
151 *sd = (u32)(size >> 21);
152
138 return 0; 153 return 0;
139} 154}
140 155
@@ -2909,6 +2924,65 @@ static enum i40iw_status_code i40iw_sc_mw_alloc(
2909} 2924}
2910 2925
2911/** 2926/**
2927 * i40iw_sc_mr_fast_register - Posts RDMA fast register mr WR to iwarp qp
2928 * @qp: sc qp struct
2929 * @info: fast mr info
2930 * @post_sq: flag for cqp db to ring
2931 */
2932enum i40iw_status_code i40iw_sc_mr_fast_register(
2933 struct i40iw_sc_qp *qp,
2934 struct i40iw_fast_reg_stag_info *info,
2935 bool post_sq)
2936{
2937 u64 temp, header;
2938 u64 *wqe;
2939 u32 wqe_idx;
2940
2941 wqe = i40iw_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, I40IW_QP_WQE_MIN_SIZE,
2942 0, info->wr_id);
2943 if (!wqe)
2944 return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
2945
2946 i40iw_debug(qp->dev, I40IW_DEBUG_MR, "%s: wr_id[%llxh] wqe_idx[%04d] location[%p]\n",
2947 __func__, info->wr_id, wqe_idx,
2948 &qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid);
2949 temp = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? (uintptr_t)info->va : info->fbo;
2950 set_64bit_val(wqe, 0, temp);
2951
2952 temp = RS_64(info->first_pm_pbl_index >> 16, I40IWQPSQ_FIRSTPMPBLIDXHI);
2953 set_64bit_val(wqe,
2954 8,
2955 LS_64(temp, I40IWQPSQ_FIRSTPMPBLIDXHI) |
2956 LS_64(info->reg_addr_pa >> I40IWQPSQ_PBLADDR_SHIFT, I40IWQPSQ_PBLADDR));
2957
2958 set_64bit_val(wqe,
2959 16,
2960 info->total_len |
2961 LS_64(info->first_pm_pbl_index, I40IWQPSQ_FIRSTPMPBLIDXLO));
2962
2963 header = LS_64(info->stag_key, I40IWQPSQ_STAGKEY) |
2964 LS_64(info->stag_idx, I40IWQPSQ_STAGINDEX) |
2965 LS_64(I40IWQP_OP_FAST_REGISTER, I40IWQPSQ_OPCODE) |
2966 LS_64(info->chunk_size, I40IWQPSQ_LPBLSIZE) |
2967 LS_64(info->page_size, I40IWQPSQ_HPAGESIZE) |
2968 LS_64(info->access_rights, I40IWQPSQ_STAGRIGHTS) |
2969 LS_64(info->addr_type, I40IWQPSQ_VABASEDTO) |
2970 LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
2971 LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
2972 LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
2973 LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
2974
2975 i40iw_insert_wqe_hdr(wqe, header);
2976
2977 i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "FAST_REG WQE",
2978 wqe, I40IW_QP_WQE_MIN_SIZE);
2979
2980 if (post_sq)
2981 i40iw_qp_post_wr(&qp->qp_uk);
2982 return 0;
2983}
2984
2985/**
2912 * i40iw_sc_send_lsmm - send last streaming mode message 2986 * i40iw_sc_send_lsmm - send last streaming mode message
2913 * @qp: sc qp struct 2987 * @qp: sc qp struct
2914 * @lsmm_buf: buffer with lsmm message 2988 * @lsmm_buf: buffer with lsmm message
@@ -3147,7 +3221,7 @@ enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev, u8 hmc_fn_
3147 i40iw_cqp_commit_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id); 3221 i40iw_cqp_commit_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id);
3148 3222
3149 /* parse the fpm_commit_buf and fill hmc obj info */ 3223 /* parse the fpm_commit_buf and fill hmc obj info */
3150 i40iw_sc_parse_fpm_commit_buf((u64 *)query_fpm_mem.va, hmc_info->hmc_obj); 3224 i40iw_sc_parse_fpm_commit_buf((u64 *)query_fpm_mem.va, hmc_info->hmc_obj, &hmc_info->sd_table.sd_cnt);
3151 mem_size = sizeof(struct i40iw_hmc_sd_entry) * 3225 mem_size = sizeof(struct i40iw_hmc_sd_entry) *
3152 (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index); 3226 (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index);
3153 ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size); 3227 ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
@@ -3221,7 +3295,9 @@ static enum i40iw_status_code i40iw_sc_configure_iw_fpm(struct i40iw_sc_dev *dev
3221 3295
3222 /* parse the fpm_commit_buf and fill hmc obj info */ 3296 /* parse the fpm_commit_buf and fill hmc obj info */
3223 if (!ret_code) 3297 if (!ret_code)
3224 ret_code = i40iw_sc_parse_fpm_commit_buf(dev->fpm_commit_buf, hmc_info->hmc_obj); 3298 ret_code = i40iw_sc_parse_fpm_commit_buf(dev->fpm_commit_buf,
3299 hmc_info->hmc_obj,
3300 &hmc_info->sd_table.sd_cnt);
3225 3301
3226 i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "COMMIT FPM BUFFER", 3302 i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "COMMIT FPM BUFFER",
3227 commit_fpm_mem.va, I40IW_COMMIT_FPM_BUF_SIZE); 3303 commit_fpm_mem.va, I40IW_COMMIT_FPM_BUF_SIZE);
@@ -3469,6 +3545,40 @@ static bool i40iw_ring_full(struct i40iw_sc_cqp *cqp)
3469} 3545}
3470 3546
3471/** 3547/**
3548 * i40iw_est_sd - returns approximate number of SDs for HMC
3549 * @dev: sc device struct
3550 * @hmc_info: hmc structure, size and count for HMC objects
3551 */
3552static u64 i40iw_est_sd(struct i40iw_sc_dev *dev, struct i40iw_hmc_info *hmc_info)
3553{
3554 int i;
3555 u64 size = 0;
3556 u64 sd;
3557
3558 for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_PBLE; i++)
3559 size += hmc_info->hmc_obj[i].cnt * hmc_info->hmc_obj[i].size;
3560
3561 if (dev->is_pf)
3562 size += hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt * hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size;
3563
3564 if (size & 0x1FFFFF)
3565 sd = (size >> 21) + 1; /* add 1 for remainder */
3566 else
3567 sd = size >> 21;
3568
3569 if (!dev->is_pf) {
3570 /* 2MB alignment for VF PBLE HMC */
3571 size = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt * hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size;
3572 if (size & 0x1FFFFF)
3573 sd += (size >> 21) + 1; /* add 1 for remainder */
3574 else
3575 sd += size >> 21;
3576 }
3577
3578 return sd;
3579}
3580
3581/**
3472 * i40iw_config_fpm_values - configure HMC objects 3582 * i40iw_config_fpm_values - configure HMC objects
3473 * @dev: sc device struct 3583 * @dev: sc device struct
3474 * @qp_count: desired qp count 3584 * @qp_count: desired qp count
@@ -3479,7 +3589,7 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
3479 u32 i, mem_size; 3589 u32 i, mem_size;
3480 u32 qpwantedoriginal, qpwanted, mrwanted, pblewanted; 3590 u32 qpwantedoriginal, qpwanted, mrwanted, pblewanted;
3481 u32 powerof2; 3591 u32 powerof2;
3482 u64 sd_needed, bytes_needed; 3592 u64 sd_needed;
3483 u32 loop_count = 0; 3593 u32 loop_count = 0;
3484 3594
3485 struct i40iw_hmc_info *hmc_info; 3595 struct i40iw_hmc_info *hmc_info;
@@ -3497,23 +3607,15 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
3497 return ret_code; 3607 return ret_code;
3498 } 3608 }
3499 3609
3500 bytes_needed = 0; 3610 for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
3501 for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) {
3502 hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt; 3611 hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
3503 bytes_needed += 3612 sd_needed = i40iw_est_sd(dev, hmc_info);
3504 (hmc_info->hmc_obj[i].max_cnt) * (hmc_info->hmc_obj[i].size);
3505 i40iw_debug(dev, I40IW_DEBUG_HMC,
3506 "%s i[%04d] max_cnt[0x%04X] size[0x%04llx]\n",
3507 __func__, i, hmc_info->hmc_obj[i].max_cnt,
3508 hmc_info->hmc_obj[i].size);
3509 }
3510 sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; /* round up */
3511 i40iw_debug(dev, I40IW_DEBUG_HMC, 3613 i40iw_debug(dev, I40IW_DEBUG_HMC,
3512 "%s: FW initial max sd_count[%08lld] first_sd_index[%04d]\n", 3614 "%s: FW initial max sd_count[%08lld] first_sd_index[%04d]\n",
3513 __func__, sd_needed, hmc_info->first_sd_index); 3615 __func__, sd_needed, hmc_info->first_sd_index);
3514 i40iw_debug(dev, I40IW_DEBUG_HMC, 3616 i40iw_debug(dev, I40IW_DEBUG_HMC,
3515 "%s: bytes_needed=0x%llx sd count %d where max sd is %d\n", 3617 "%s: sd count %d where max sd is %d\n",
3516 __func__, bytes_needed, hmc_info->sd_table.sd_cnt, 3618 __func__, hmc_info->sd_table.sd_cnt,
3517 hmc_fpm_misc->max_sds); 3619 hmc_fpm_misc->max_sds);
3518 3620
3519 qpwanted = min(qp_count, hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt); 3621 qpwanted = min(qp_count, hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt);
@@ -3555,11 +3657,7 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
3555 hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt = pblewanted; 3657 hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt = pblewanted;
3556 3658
3557 /* How much memory is needed for all the objects. */ 3659 /* How much memory is needed for all the objects. */
3558 bytes_needed = 0; 3660 sd_needed = i40iw_est_sd(dev, hmc_info);
3559 for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
3560 bytes_needed +=
3561 (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size);
3562 sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1;
3563 if ((loop_count > 1000) || 3661 if ((loop_count > 1000) ||
3564 ((!(loop_count % 10)) && 3662 ((!(loop_count % 10)) &&
3565 (qpwanted > qpwantedoriginal * 2 / 3))) { 3663 (qpwanted > qpwantedoriginal * 2 / 3))) {
@@ -3580,15 +3678,7 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
3580 pblewanted -= FPM_MULTIPLIER * 1000; 3678 pblewanted -= FPM_MULTIPLIER * 1000;
3581 } while (sd_needed > hmc_fpm_misc->max_sds && loop_count < 2000); 3679 } while (sd_needed > hmc_fpm_misc->max_sds && loop_count < 2000);
3582 3680
3583 bytes_needed = 0; 3681 sd_needed = i40iw_est_sd(dev, hmc_info);
3584 for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) {
3585 bytes_needed += (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size);
3586 i40iw_debug(dev, I40IW_DEBUG_HMC,
3587 "%s i[%04d] cnt[0x%04x] size[0x%04llx]\n",
3588 __func__, i, hmc_info->hmc_obj[i].cnt,
3589 hmc_info->hmc_obj[i].size);
3590 }
3591 sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; /* round up not truncate. */
3592 3682
3593 i40iw_debug(dev, I40IW_DEBUG_HMC, 3683 i40iw_debug(dev, I40IW_DEBUG_HMC,
3594 "loop_cnt=%d, sd_needed=%lld, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d\n", 3684 "loop_cnt=%d, sd_needed=%lld, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d\n",
@@ -3606,8 +3696,6 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
3606 return ret_code; 3696 return ret_code;
3607 } 3697 }
3608 3698
3609 hmc_info->sd_table.sd_cnt = (u32)sd_needed;
3610
3611 mem_size = sizeof(struct i40iw_hmc_sd_entry) * 3699 mem_size = sizeof(struct i40iw_hmc_sd_entry) *
3612 (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index + 1); 3700 (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index + 1);
3613 ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size); 3701 ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
@@ -3911,11 +3999,11 @@ enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev)
3911 */ 3999 */
3912static u32 i40iw_iwarp_opcode(struct i40iw_aeqe_info *info, u8 *pkt) 4000static u32 i40iw_iwarp_opcode(struct i40iw_aeqe_info *info, u8 *pkt)
3913{ 4001{
3914 u16 *mpa; 4002 __be16 *mpa;
3915 u32 opcode = 0xffffffff; 4003 u32 opcode = 0xffffffff;
3916 4004
3917 if (info->q2_data_written) { 4005 if (info->q2_data_written) {
3918 mpa = (u16 *)pkt; 4006 mpa = (__be16 *)pkt;
3919 opcode = ntohs(mpa[1]) & 0xf; 4007 opcode = ntohs(mpa[1]) & 0xf;
3920 } 4008 }
3921 return opcode; 4009 return opcode;
@@ -3977,7 +4065,7 @@ static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp,
3977 if (info->q2_data_written) { 4065 if (info->q2_data_written) {
3978 /* Use data from offending packet to fill in ddp & rdma hdrs */ 4066 /* Use data from offending packet to fill in ddp & rdma hdrs */
3979 pkt = i40iw_locate_mpa(pkt); 4067 pkt = i40iw_locate_mpa(pkt);
3980 ddp_seg_len = ntohs(*(u16 *)pkt); 4068 ddp_seg_len = ntohs(*(__be16 *)pkt);
3981 if (ddp_seg_len) { 4069 if (ddp_seg_len) {
3982 copy_len = 2; 4070 copy_len = 2;
3983 termhdr->hdrct = DDP_LEN_FLAG; 4071 termhdr->hdrct = DDP_LEN_FLAG;
@@ -4188,13 +4276,13 @@ void i40iw_terminate_connection(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *
4188void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info) 4276void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info)
4189{ 4277{
4190 u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET; 4278 u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
4191 u32 *mpa; 4279 __be32 *mpa;
4192 u8 ddp_ctl; 4280 u8 ddp_ctl;
4193 u8 rdma_ctl; 4281 u8 rdma_ctl;
4194 u16 aeq_id = 0; 4282 u16 aeq_id = 0;
4195 struct i40iw_terminate_hdr *termhdr; 4283 struct i40iw_terminate_hdr *termhdr;
4196 4284
4197 mpa = (u32 *)i40iw_locate_mpa(pkt); 4285 mpa = (__be32 *)i40iw_locate_mpa(pkt);
4198 if (info->q2_data_written) { 4286 if (info->q2_data_written) {
4199 /* did not validate the frame - do it now */ 4287 /* did not validate the frame - do it now */
4200 ddp_ctl = (ntohl(mpa[0]) >> 8) & 0xff; 4288 ddp_ctl = (ntohl(mpa[0]) >> 8) & 0xff;
@@ -4559,17 +4647,18 @@ static struct i40iw_pd_ops iw_pd_ops = {
4559}; 4647};
4560 4648
4561static struct i40iw_priv_qp_ops iw_priv_qp_ops = { 4649static struct i40iw_priv_qp_ops iw_priv_qp_ops = {
4562 i40iw_sc_qp_init, 4650 .qp_init = i40iw_sc_qp_init,
4563 i40iw_sc_qp_create, 4651 .qp_create = i40iw_sc_qp_create,
4564 i40iw_sc_qp_modify, 4652 .qp_modify = i40iw_sc_qp_modify,
4565 i40iw_sc_qp_destroy, 4653 .qp_destroy = i40iw_sc_qp_destroy,
4566 i40iw_sc_qp_flush_wqes, 4654 .qp_flush_wqes = i40iw_sc_qp_flush_wqes,
4567 i40iw_sc_qp_upload_context, 4655 .qp_upload_context = i40iw_sc_qp_upload_context,
4568 i40iw_sc_qp_setctx, 4656 .qp_setctx = i40iw_sc_qp_setctx,
4569 i40iw_sc_send_lsmm, 4657 .qp_send_lsmm = i40iw_sc_send_lsmm,
4570 i40iw_sc_send_lsmm_nostag, 4658 .qp_send_lsmm_nostag = i40iw_sc_send_lsmm_nostag,
4571 i40iw_sc_send_rtt, 4659 .qp_send_rtt = i40iw_sc_send_rtt,
4572 i40iw_sc_post_wqe0, 4660 .qp_post_wqe0 = i40iw_sc_post_wqe0,
4661 .iw_mr_fast_register = i40iw_sc_mr_fast_register
4573}; 4662};
4574 4663
4575static struct i40iw_priv_cq_ops iw_priv_cq_ops = { 4664static struct i40iw_priv_cq_ops iw_priv_cq_ops = {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
index aab88d65f805..bd942da91a27 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -1290,7 +1290,7 @@
1290 1290
1291/* wqe size considering 32 bytes per wqe*/ 1291/* wqe size considering 32 bytes per wqe*/
1292#define I40IWQP_SW_MIN_WQSIZE 4 /* 128 bytes */ 1292#define I40IWQP_SW_MIN_WQSIZE 4 /* 128 bytes */
1293#define I40IWQP_SW_MAX_WQSIZE 16384 /* 524288 bytes */ 1293#define I40IWQP_SW_MAX_WQSIZE 2048 /* 2048 bytes */
1294 1294
1295#define I40IWQP_OP_RDMA_WRITE 0 1295#define I40IWQP_OP_RDMA_WRITE 0
1296#define I40IWQP_OP_RDMA_READ 1 1296#define I40IWQP_OP_RDMA_READ 1
@@ -1512,6 +1512,8 @@ enum i40iw_alignment {
1512 I40IW_SD_BUF_ALIGNMENT = 0x100 1512 I40IW_SD_BUF_ALIGNMENT = 0x100
1513}; 1513};
1514 1514
1515#define I40IW_WQE_SIZE_64 64
1516
1515#define I40IW_QP_WQE_MIN_SIZE 32 1517#define I40IW_QP_WQE_MIN_SIZE 32
1516#define I40IW_QP_WQE_MAX_SIZE 128 1518#define I40IW_QP_WQE_MAX_SIZE 128
1517 1519
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 9fd302425563..3ee0cad96bc6 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -106,7 +106,9 @@ u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev)
106 set_bit(2, iwdev->allocated_pds); 106 set_bit(2, iwdev->allocated_pds);
107 107
108 spin_lock_init(&iwdev->resource_lock); 108 spin_lock_init(&iwdev->resource_lock);
109 mrdrvbits = 24 - get_count_order(iwdev->max_mr); 109 spin_lock_init(&iwdev->qptable_lock);
110 /* stag index mask has a minimum of 14 bits */
111 mrdrvbits = 24 - max(get_count_order(iwdev->max_mr), 14);
110 iwdev->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits)); 112 iwdev->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits));
111 return 0; 113 return 0;
112} 114}
@@ -301,11 +303,15 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
301 "%s ae_id = 0x%x bool qp=%d qp_id = %d\n", 303 "%s ae_id = 0x%x bool qp=%d qp_id = %d\n",
302 __func__, info->ae_id, info->qp, info->qp_cq_id); 304 __func__, info->ae_id, info->qp, info->qp_cq_id);
303 if (info->qp) { 305 if (info->qp) {
306 spin_lock_irqsave(&iwdev->qptable_lock, flags);
304 iwqp = iwdev->qp_table[info->qp_cq_id]; 307 iwqp = iwdev->qp_table[info->qp_cq_id];
305 if (!iwqp) { 308 if (!iwqp) {
309 spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
306 i40iw_pr_err("qp_id %d is already freed\n", info->qp_cq_id); 310 i40iw_pr_err("qp_id %d is already freed\n", info->qp_cq_id);
307 continue; 311 continue;
308 } 312 }
313 i40iw_add_ref(&iwqp->ibqp);
314 spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
309 qp = &iwqp->sc_qp; 315 qp = &iwqp->sc_qp;
310 spin_lock_irqsave(&iwqp->lock, flags); 316 spin_lock_irqsave(&iwqp->lock, flags);
311 iwqp->hw_tcp_state = info->tcp_state; 317 iwqp->hw_tcp_state = info->tcp_state;
@@ -411,6 +417,8 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
411 i40iw_terminate_connection(qp, info); 417 i40iw_terminate_connection(qp, info);
412 break; 418 break;
413 } 419 }
420 if (info->qp)
421 i40iw_rem_ref(&iwqp->ibqp);
414 } while (1); 422 } while (1);
415 423
416 if (aeqcnt) 424 if (aeqcnt)
@@ -460,7 +468,7 @@ int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool ad
460 */ 468 */
461void i40iw_manage_arp_cache(struct i40iw_device *iwdev, 469void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
462 unsigned char *mac_addr, 470 unsigned char *mac_addr,
463 __be32 *ip_addr, 471 u32 *ip_addr,
464 bool ipv4, 472 bool ipv4,
465 u32 action) 473 u32 action)
466{ 474{
@@ -481,7 +489,7 @@ void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
481 cqp_info->cqp_cmd = OP_ADD_ARP_CACHE_ENTRY; 489 cqp_info->cqp_cmd = OP_ADD_ARP_CACHE_ENTRY;
482 info = &cqp_info->in.u.add_arp_cache_entry.info; 490 info = &cqp_info->in.u.add_arp_cache_entry.info;
483 memset(info, 0, sizeof(*info)); 491 memset(info, 0, sizeof(*info));
484 info->arp_index = cpu_to_le32(arp_index); 492 info->arp_index = cpu_to_le16((u16)arp_index);
485 info->permanent = true; 493 info->permanent = true;
486 ether_addr_copy(info->mac_addr, mac_addr); 494 ether_addr_copy(info->mac_addr, mac_addr);
487 cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request; 495 cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index e41fae2422ab..c963cad92f5a 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -270,7 +270,6 @@ static void i40iw_disable_irq(struct i40iw_sc_dev *dev,
270 i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0); 270 i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0);
271 else 271 else
272 i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0); 272 i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0);
273 synchronize_irq(msix_vec->irq);
274 free_irq(msix_vec->irq, dev_id); 273 free_irq(msix_vec->irq, dev_id);
275} 274}
276 275
@@ -1147,10 +1146,7 @@ static enum i40iw_status_code i40iw_alloc_set_mac_ipaddr(struct i40iw_device *iw
1147 if (!status) { 1146 if (!status) {
1148 status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr, 1147 status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr,
1149 (u8)iwdev->mac_ip_table_idx); 1148 (u8)iwdev->mac_ip_table_idx);
1150 if (!status) 1149 if (status)
1151 status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr,
1152 (u8)iwdev->mac_ip_table_idx);
1153 else
1154 i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx); 1150 i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
1155 } 1151 }
1156 return status; 1152 return status;
@@ -1165,7 +1161,7 @@ static void i40iw_add_ipv6_addr(struct i40iw_device *iwdev)
1165 struct net_device *ip_dev; 1161 struct net_device *ip_dev;
1166 struct inet6_dev *idev; 1162 struct inet6_dev *idev;
1167 struct inet6_ifaddr *ifp; 1163 struct inet6_ifaddr *ifp;
1168 __be32 local_ipaddr6[4]; 1164 u32 local_ipaddr6[4];
1169 1165
1170 rcu_read_lock(); 1166 rcu_read_lock();
1171 for_each_netdev_rcu(&init_net, ip_dev) { 1167 for_each_netdev_rcu(&init_net, ip_dev) {
@@ -1512,6 +1508,7 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
1512 I40IW_HMC_PROFILE_DEFAULT; 1508 I40IW_HMC_PROFILE_DEFAULT;
1513 iwdev->max_rdma_vfs = 1509 iwdev->max_rdma_vfs =
1514 (iwdev->resource_profile != I40IW_HMC_PROFILE_DEFAULT) ? max_rdma_vfs : 0; 1510 (iwdev->resource_profile != I40IW_HMC_PROFILE_DEFAULT) ? max_rdma_vfs : 0;
1511 iwdev->max_enabled_vfs = iwdev->max_rdma_vfs;
1515 iwdev->netdev = ldev->netdev; 1512 iwdev->netdev = ldev->netdev;
1516 hdl->client = client; 1513 hdl->client = client;
1517 iwdev->mss = (!ldev->params.mtu) ? I40IW_DEFAULT_MSS : ldev->params.mtu - I40IW_MTU_TO_MSS; 1514 iwdev->mss = (!ldev->params.mtu) ? I40IW_DEFAULT_MSS : ldev->params.mtu - I40IW_MTU_TO_MSS;
@@ -1531,7 +1528,10 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
1531 goto exit; 1528 goto exit;
1532 iwdev->obj_next = iwdev->obj_mem; 1529 iwdev->obj_next = iwdev->obj_mem;
1533 iwdev->push_mode = push_mode; 1530 iwdev->push_mode = push_mode;
1531
1534 init_waitqueue_head(&iwdev->vchnl_waitq); 1532 init_waitqueue_head(&iwdev->vchnl_waitq);
1533 init_waitqueue_head(&dev->vf_reqs);
1534
1535 status = i40iw_initialize_dev(iwdev, ldev); 1535 status = i40iw_initialize_dev(iwdev, ldev);
1536exit: 1536exit:
1537 if (status) { 1537 if (status) {
@@ -1710,7 +1710,6 @@ static void i40iw_vf_reset(struct i40e_info *ldev, struct i40e_client *client, u
1710 for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) { 1710 for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) {
1711 if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id)) 1711 if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id))
1712 continue; 1712 continue;
1713
1714 /* free all resources allocated on behalf of vf */ 1713 /* free all resources allocated on behalf of vf */
1715 tmp_vfdev = dev->vf_dev[i]; 1714 tmp_vfdev = dev->vf_dev[i];
1716 spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags); 1715 spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags);
@@ -1819,8 +1818,6 @@ static int i40iw_virtchnl_receive(struct i40e_info *ldev,
1819 dev = &hdl->device.sc_dev; 1818 dev = &hdl->device.sc_dev;
1820 iwdev = dev->back_dev; 1819 iwdev = dev->back_dev;
1821 1820
1822 i40iw_debug(dev, I40IW_DEBUG_VIRT, "msg %p, message length %u\n", msg, len);
1823
1824 if (dev->vchnl_if.vchnl_recv) { 1821 if (dev->vchnl_if.vchnl_recv) {
1825 ret_code = dev->vchnl_if.vchnl_recv(dev, vf_id, msg, len); 1822 ret_code = dev->vchnl_if.vchnl_recv(dev, vf_id, msg, len);
1826 if (!dev->is_pf) { 1823 if (!dev->is_pf) {
@@ -1832,6 +1829,39 @@ static int i40iw_virtchnl_receive(struct i40e_info *ldev,
1832} 1829}
1833 1830
1834/** 1831/**
1832 * i40iw_vf_clear_to_send - wait to send virtual channel message
1833 * @dev: iwarp device *
1834 * Wait for until virtual channel is clear
1835 * before sending the next message
1836 *
1837 * Returns false if error
1838 * Returns true if clear to send
1839 */
1840bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev)
1841{
1842 struct i40iw_device *iwdev;
1843 wait_queue_t wait;
1844
1845 iwdev = dev->back_dev;
1846
1847 if (!wq_has_sleeper(&dev->vf_reqs) &&
1848 (atomic_read(&iwdev->vchnl_msgs) == 0))
1849 return true; /* virtual channel is clear */
1850
1851 init_wait(&wait);
1852 add_wait_queue_exclusive(&dev->vf_reqs, &wait);
1853
1854 if (!wait_event_timeout(dev->vf_reqs,
1855 (atomic_read(&iwdev->vchnl_msgs) == 0),
1856 I40IW_VCHNL_EVENT_TIMEOUT))
1857 dev->vchnl_up = false;
1858
1859 remove_wait_queue(&dev->vf_reqs, &wait);
1860
1861 return dev->vchnl_up;
1862}
1863
1864/**
1835 * i40iw_virtchnl_send - send a message through the virtual channel 1865 * i40iw_virtchnl_send - send a message through the virtual channel
1836 * @dev: iwarp device 1866 * @dev: iwarp device
1837 * @vf_id: virtual function id associated with the message 1867 * @vf_id: virtual function id associated with the message
@@ -1848,18 +1878,16 @@ static enum i40iw_status_code i40iw_virtchnl_send(struct i40iw_sc_dev *dev,
1848{ 1878{
1849 struct i40iw_device *iwdev; 1879 struct i40iw_device *iwdev;
1850 struct i40e_info *ldev; 1880 struct i40e_info *ldev;
1851 enum i40iw_status_code ret_code = I40IW_ERR_BAD_PTR;
1852 1881
1853 if (!dev || !dev->back_dev) 1882 if (!dev || !dev->back_dev)
1854 return ret_code; 1883 return I40IW_ERR_BAD_PTR;
1855 1884
1856 iwdev = dev->back_dev; 1885 iwdev = dev->back_dev;
1857 ldev = iwdev->ldev; 1886 ldev = iwdev->ldev;
1858 1887
1859 if (ldev && ldev->ops && ldev->ops->virtchnl_send) 1888 if (ldev && ldev->ops && ldev->ops->virtchnl_send)
1860 ret_code = ldev->ops->virtchnl_send(ldev, &i40iw_client, vf_id, msg, len); 1889 return ldev->ops->virtchnl_send(ldev, &i40iw_client, vf_id, msg, len);
1861 1890 return I40IW_ERR_BAD_PTR;
1862 return ret_code;
1863} 1891}
1864 1892
1865/* client interface functions */ 1893/* client interface functions */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
index 7e20493510e8..80f422bf3967 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
@@ -172,6 +172,7 @@ struct i40iw_hw;
172u8 __iomem *i40iw_get_hw_addr(void *dev); 172u8 __iomem *i40iw_get_hw_addr(void *dev);
173void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); 173void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
174enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev); 174enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev);
175bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev);
175enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc, void *addr, 176enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc, void *addr,
176 u32 length, u32 value); 177 u32 length, u32 value);
177struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *buf); 178struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *buf);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c
index ded853d2fad8..85993dc44f6e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_pble.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c
@@ -404,13 +404,14 @@ static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev,
404 sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa; 404 sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa;
405 if (sd_entry->valid) 405 if (sd_entry->valid)
406 return 0; 406 return 0;
407 if (dev->is_pf) 407 if (dev->is_pf) {
408 ret_code = i40iw_hmc_sd_one(dev, hmc_info->hmc_fn_id, 408 ret_code = i40iw_hmc_sd_one(dev, hmc_info->hmc_fn_id,
409 sd_reg_val, idx->sd_idx, 409 sd_reg_val, idx->sd_idx,
410 sd_entry->entry_type, true); 410 sd_entry->entry_type, true);
411 if (ret_code) { 411 if (ret_code) {
412 i40iw_pr_err("cqp cmd failed for sd (pbles)\n"); 412 i40iw_pr_err("cqp cmd failed for sd (pbles)\n");
413 goto error; 413 goto error;
414 }
414 } 415 }
415 416
416 sd_entry->valid = true; 417 sd_entry->valid = true;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index 8eb400d8a7a0..e9c6e82af9c7 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -1194,7 +1194,7 @@ static enum i40iw_status_code i40iw_ieq_process_buf(struct i40iw_puda_rsrc *ieq,
1194 1194
1195 ioffset = (u16)(buf->data - (u8 *)buf->mem.va); 1195 ioffset = (u16)(buf->data - (u8 *)buf->mem.va);
1196 while (datalen) { 1196 while (datalen) {
1197 fpdu_len = i40iw_ieq_get_fpdu_length(ntohs(*(u16 *)datap)); 1197 fpdu_len = i40iw_ieq_get_fpdu_length(ntohs(*(__be16 *)datap));
1198 if (fpdu_len > pfpdu->max_fpdu_data) { 1198 if (fpdu_len > pfpdu->max_fpdu_data) {
1199 i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ, 1199 i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
1200 "%s: error bad fpdu_len\n", __func__); 1200 "%s: error bad fpdu_len\n", __func__);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h
index b0110c15e044..91c421762f06 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_status.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_status.h
@@ -95,6 +95,7 @@ enum i40iw_status_code {
95 I40IW_ERR_INVALID_MAC_ADDR = -65, 95 I40IW_ERR_INVALID_MAC_ADDR = -65,
96 I40IW_ERR_BAD_STAG = -66, 96 I40IW_ERR_BAD_STAG = -66,
97 I40IW_ERR_CQ_COMPL_ERROR = -67, 97 I40IW_ERR_CQ_COMPL_ERROR = -67,
98 I40IW_ERR_QUEUE_DESTROYED = -68
98 99
99}; 100};
100#endif 101#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
index edb3a8c8267a..16cc61720b53 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -479,16 +479,17 @@ struct i40iw_sc_dev {
479 struct i40iw_virt_mem ieq_mem; 479 struct i40iw_virt_mem ieq_mem;
480 struct i40iw_puda_rsrc *ieq; 480 struct i40iw_puda_rsrc *ieq;
481 481
482 struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; 482 const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops;
483 483
484 struct i40iw_hmc_fpm_misc hmc_fpm_misc; 484 struct i40iw_hmc_fpm_misc hmc_fpm_misc;
485 u16 qs_handle; 485 u16 qs_handle;
486 u32 debug_mask; 486 u32 debug_mask;
487 u16 exception_lan_queue; 487 u16 exception_lan_queue;
488 u8 hmc_fn_id; 488 u8 hmc_fn_id;
489 bool is_pf; 489 bool is_pf;
490 bool vchnl_up; 490 bool vchnl_up;
491 u8 vf_id; 491 u8 vf_id;
492 wait_queue_head_t vf_reqs;
492 u64 cqp_cmd_stats[OP_SIZE_CQP_STAT_ARRAY]; 493 u64 cqp_cmd_stats[OP_SIZE_CQP_STAT_ARRAY];
493 struct i40iw_vchnl_vf_msg_buffer vchnl_vf_msg_buf; 494 struct i40iw_vchnl_vf_msg_buffer vchnl_vf_msg_buf;
494 u8 hw_rev; 495 u8 hw_rev;
@@ -889,8 +890,8 @@ struct i40iw_qhash_table_info {
889 u32 qp_num; 890 u32 qp_num;
890 u32 dest_ip[4]; 891 u32 dest_ip[4];
891 u32 src_ip[4]; 892 u32 src_ip[4];
892 u32 dest_port; 893 u16 dest_port;
893 u32 src_port; 894 u16 src_port;
894}; 895};
895 896
896struct i40iw_local_mac_ipaddr_entry_info { 897struct i40iw_local_mac_ipaddr_entry_info {
@@ -1040,6 +1041,9 @@ struct i40iw_priv_qp_ops {
1040 void (*qp_send_lsmm_nostag)(struct i40iw_sc_qp *, void *, u32); 1041 void (*qp_send_lsmm_nostag)(struct i40iw_sc_qp *, void *, u32);
1041 void (*qp_send_rtt)(struct i40iw_sc_qp *, bool); 1042 void (*qp_send_rtt)(struct i40iw_sc_qp *, bool);
1042 enum i40iw_status_code (*qp_post_wqe0)(struct i40iw_sc_qp *, u8); 1043 enum i40iw_status_code (*qp_post_wqe0)(struct i40iw_sc_qp *, u8);
1044 enum i40iw_status_code (*iw_mr_fast_register)(struct i40iw_sc_qp *,
1045 struct i40iw_fast_reg_stag_info *,
1046 bool);
1043}; 1047};
1044 1048
1045struct i40iw_priv_cq_ops { 1049struct i40iw_priv_cq_ops {
@@ -1108,7 +1112,7 @@ struct i40iw_hmc_ops {
1108 enum i40iw_status_code (*parse_fpm_query_buf)(u64 *, struct i40iw_hmc_info *, 1112 enum i40iw_status_code (*parse_fpm_query_buf)(u64 *, struct i40iw_hmc_info *,
1109 struct i40iw_hmc_fpm_misc *); 1113 struct i40iw_hmc_fpm_misc *);
1110 enum i40iw_status_code (*configure_iw_fpm)(struct i40iw_sc_dev *, u8); 1114 enum i40iw_status_code (*configure_iw_fpm)(struct i40iw_sc_dev *, u8);
1111 enum i40iw_status_code (*parse_fpm_commit_buf)(u64 *, struct i40iw_hmc_obj_info *); 1115 enum i40iw_status_code (*parse_fpm_commit_buf)(u64 *, struct i40iw_hmc_obj_info *, u32 *sd);
1112 enum i40iw_status_code (*create_hmc_object)(struct i40iw_sc_dev *dev, 1116 enum i40iw_status_code (*create_hmc_object)(struct i40iw_sc_dev *dev,
1113 struct i40iw_hmc_create_obj_info *); 1117 struct i40iw_hmc_create_obj_info *);
1114 enum i40iw_status_code (*del_hmc_object)(struct i40iw_sc_dev *dev, 1118 enum i40iw_status_code (*del_hmc_object)(struct i40iw_sc_dev *dev,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
index f78c3dc8bdb2..e35faea88c13 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -56,6 +56,9 @@ static enum i40iw_status_code i40iw_nop_1(struct i40iw_qp_uk *qp)
56 56
57 wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring); 57 wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
58 wqe = qp->sq_base[wqe_idx].elem; 58 wqe = qp->sq_base[wqe_idx].elem;
59
60 qp->sq_wrtrk_array[wqe_idx].wqe_size = I40IW_QP_WQE_MIN_SIZE;
61
59 peek_head = (qp->sq_ring.head + 1) % qp->sq_ring.size; 62 peek_head = (qp->sq_ring.head + 1) % qp->sq_ring.size;
60 wqe_0 = qp->sq_base[peek_head].elem; 63 wqe_0 = qp->sq_base[peek_head].elem;
61 if (peek_head) 64 if (peek_head)
@@ -130,7 +133,10 @@ static void i40iw_qp_ring_push_db(struct i40iw_qp_uk *qp, u32 wqe_idx)
130 */ 133 */
131u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, 134u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp,
132 u32 *wqe_idx, 135 u32 *wqe_idx,
133 u8 wqe_size) 136 u8 wqe_size,
137 u32 total_size,
138 u64 wr_id
139 )
134{ 140{
135 u64 *wqe = NULL; 141 u64 *wqe = NULL;
136 u64 wqe_ptr; 142 u64 wqe_ptr;
@@ -159,6 +165,17 @@ u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp,
159 if (!*wqe_idx) 165 if (!*wqe_idx)
160 qp->swqe_polarity = !qp->swqe_polarity; 166 qp->swqe_polarity = !qp->swqe_polarity;
161 } 167 }
168
169 if (((*wqe_idx & 3) == 1) && (wqe_size == I40IW_WQE_SIZE_64)) {
170 i40iw_nop_1(qp);
171 I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
172 if (ret_code)
173 return NULL;
174 *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
175 if (!*wqe_idx)
176 qp->swqe_polarity = !qp->swqe_polarity;
177 }
178
162 for (i = 0; i < wqe_size / I40IW_QP_WQE_MIN_SIZE; i++) { 179 for (i = 0; i < wqe_size / I40IW_QP_WQE_MIN_SIZE; i++) {
163 I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code); 180 I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
164 if (ret_code) 181 if (ret_code)
@@ -169,8 +186,15 @@ u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp,
169 186
170 peek_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring); 187 peek_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
171 wqe_0 = qp->sq_base[peek_head].elem; 188 wqe_0 = qp->sq_base[peek_head].elem;
172 if (peek_head & 0x3) 189
173 wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID); 190 if (((peek_head & 3) == 1) || ((peek_head & 3) == 3)) {
191 if (RS_64(wqe_0[3], I40IWQPSQ_VALID) != !qp->swqe_polarity)
192 wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID);
193 }
194
195 qp->sq_wrtrk_array[*wqe_idx].wrid = wr_id;
196 qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size;
197 qp->sq_wrtrk_array[*wqe_idx].wqe_size = wqe_size;
174 return wqe; 198 return wqe;
175} 199}
176 200
@@ -249,12 +273,9 @@ static enum i40iw_status_code i40iw_rdma_write(struct i40iw_qp_uk *qp,
249 if (ret_code) 273 if (ret_code)
250 return ret_code; 274 return ret_code;
251 275
252 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size); 276 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, total_size, info->wr_id);
253 if (!wqe) 277 if (!wqe)
254 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 278 return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
255
256 qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
257 qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
258 set_64bit_val(wqe, 16, 279 set_64bit_val(wqe, 16,
259 LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO)); 280 LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
260 if (!op_info->rem_addr.stag) 281 if (!op_info->rem_addr.stag)
@@ -309,12 +330,9 @@ static enum i40iw_status_code i40iw_rdma_read(struct i40iw_qp_uk *qp,
309 ret_code = i40iw_fragcnt_to_wqesize_sq(1, &wqe_size); 330 ret_code = i40iw_fragcnt_to_wqesize_sq(1, &wqe_size);
310 if (ret_code) 331 if (ret_code)
311 return ret_code; 332 return ret_code;
312 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size); 333 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->lo_addr.len, info->wr_id);
313 if (!wqe) 334 if (!wqe)
314 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 335 return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
315
316 qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
317 qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->lo_addr.len;
318 local_fence |= info->local_fence; 336 local_fence |= info->local_fence;
319 337
320 set_64bit_val(wqe, 16, LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO)); 338 set_64bit_val(wqe, 16, LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
@@ -366,13 +384,11 @@ static enum i40iw_status_code i40iw_send(struct i40iw_qp_uk *qp,
366 if (ret_code) 384 if (ret_code)
367 return ret_code; 385 return ret_code;
368 386
369 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size); 387 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, total_size, info->wr_id);
370 if (!wqe) 388 if (!wqe)
371 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 389 return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
372 390
373 read_fence |= info->read_fence; 391 read_fence |= info->read_fence;
374 qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
375 qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
376 set_64bit_val(wqe, 16, 0); 392 set_64bit_val(wqe, 16, 0);
377 header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) | 393 header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
378 LS_64(info->op_type, I40IWQPSQ_OPCODE) | 394 LS_64(info->op_type, I40IWQPSQ_OPCODE) |
@@ -427,13 +443,11 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp,
427 if (ret_code) 443 if (ret_code)
428 return ret_code; 444 return ret_code;
429 445
430 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size); 446 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->len, info->wr_id);
431 if (!wqe) 447 if (!wqe)
432 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 448 return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
433 449
434 read_fence |= info->read_fence; 450 read_fence |= info->read_fence;
435 qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
436 qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
437 set_64bit_val(wqe, 16, 451 set_64bit_val(wqe, 16,
438 LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO)); 452 LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
439 453
@@ -507,14 +521,11 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp,
507 if (ret_code) 521 if (ret_code)
508 return ret_code; 522 return ret_code;
509 523
510 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size); 524 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->len, info->wr_id);
511 if (!wqe) 525 if (!wqe)
512 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 526 return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
513 527
514 read_fence |= info->read_fence; 528 read_fence |= info->read_fence;
515
516 qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
517 qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
518 header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) | 529 header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
519 LS_64(info->op_type, I40IWQPSQ_OPCODE) | 530 LS_64(info->op_type, I40IWQPSQ_OPCODE) |
520 LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) | 531 LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) |
@@ -574,12 +585,9 @@ static enum i40iw_status_code i40iw_stag_local_invalidate(struct i40iw_qp_uk *qp
574 op_info = &info->op.inv_local_stag; 585 op_info = &info->op.inv_local_stag;
575 local_fence = info->local_fence; 586 local_fence = info->local_fence;
576 587
577 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE); 588 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id);
578 if (!wqe) 589 if (!wqe)
579 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 590 return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
580
581 qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
582 qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
583 set_64bit_val(wqe, 0, 0); 591 set_64bit_val(wqe, 0, 0);
584 set_64bit_val(wqe, 8, 592 set_64bit_val(wqe, 8,
585 LS_64(op_info->target_stag, I40IWQPSQ_LOCSTAG)); 593 LS_64(op_info->target_stag, I40IWQPSQ_LOCSTAG));
@@ -619,12 +627,9 @@ static enum i40iw_status_code i40iw_mw_bind(struct i40iw_qp_uk *qp,
619 op_info = &info->op.bind_window; 627 op_info = &info->op.bind_window;
620 628
621 local_fence |= info->local_fence; 629 local_fence |= info->local_fence;
622 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE); 630 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id);
623 if (!wqe) 631 if (!wqe)
624 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 632 return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
625
626 qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
627 qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
628 set_64bit_val(wqe, 0, (uintptr_t)op_info->va); 633 set_64bit_val(wqe, 0, (uintptr_t)op_info->va);
629 set_64bit_val(wqe, 8, 634 set_64bit_val(wqe, 8,
630 LS_64(op_info->mr_stag, I40IWQPSQ_PARENTMRSTAG) | 635 LS_64(op_info->mr_stag, I40IWQPSQ_PARENTMRSTAG) |
@@ -760,7 +765,7 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
760 enum i40iw_status_code ret_code2 = 0; 765 enum i40iw_status_code ret_code2 = 0;
761 bool move_cq_head = true; 766 bool move_cq_head = true;
762 u8 polarity; 767 u8 polarity;
763 u8 addl_frag_cnt, addl_wqes = 0; 768 u8 addl_wqes = 0;
764 769
765 if (cq->avoid_mem_cflct) 770 if (cq->avoid_mem_cflct)
766 cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(cq); 771 cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(cq);
@@ -797,6 +802,10 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
797 info->is_srq = (bool)RS_64(qword3, I40IWCQ_SRQ); 802 info->is_srq = (bool)RS_64(qword3, I40IWCQ_SRQ);
798 803
799 qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx; 804 qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx;
805 if (!qp) {
806 ret_code = I40IW_ERR_QUEUE_DESTROYED;
807 goto exit;
808 }
800 wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX); 809 wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX);
801 info->qp_handle = (i40iw_qp_handle)(unsigned long)qp; 810 info->qp_handle = (i40iw_qp_handle)(unsigned long)qp;
802 811
@@ -827,11 +836,8 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
827 info->op_type = (u8)RS_64(qword3, I40IWCQ_OP); 836 info->op_type = (u8)RS_64(qword3, I40IWCQ_OP);
828 sw_wqe = qp->sq_base[wqe_idx].elem; 837 sw_wqe = qp->sq_base[wqe_idx].elem;
829 get_64bit_val(sw_wqe, 24, &wqe_qword); 838 get_64bit_val(sw_wqe, 24, &wqe_qword);
830 addl_frag_cnt =
831 (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT);
832 i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes);
833 839
834 addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE); 840 addl_wqes = qp->sq_wrtrk_array[wqe_idx].wqe_size / I40IW_QP_WQE_MIN_SIZE;
835 I40IW_RING_SET_TAIL(qp->sq_ring, (wqe_idx + addl_wqes)); 841 I40IW_RING_SET_TAIL(qp->sq_ring, (wqe_idx + addl_wqes));
836 } else { 842 } else {
837 do { 843 do {
@@ -843,9 +849,7 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
843 get_64bit_val(sw_wqe, 24, &wqe_qword); 849 get_64bit_val(sw_wqe, 24, &wqe_qword);
844 op_type = (u8)RS_64(wqe_qword, I40IWQPSQ_OPCODE); 850 op_type = (u8)RS_64(wqe_qword, I40IWQPSQ_OPCODE);
845 info->op_type = op_type; 851 info->op_type = op_type;
846 addl_frag_cnt = (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT); 852 addl_wqes = qp->sq_wrtrk_array[tail].wqe_size / I40IW_QP_WQE_MIN_SIZE;
847 i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes);
848 addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE);
849 I40IW_RING_SET_TAIL(qp->sq_ring, (tail + addl_wqes)); 853 I40IW_RING_SET_TAIL(qp->sq_ring, (tail + addl_wqes));
850 if (op_type != I40IWQP_OP_NOP) { 854 if (op_type != I40IWQP_OP_NOP) {
851 info->wr_id = qp->sq_wrtrk_array[tail].wrid; 855 info->wr_id = qp->sq_wrtrk_array[tail].wrid;
@@ -859,6 +863,7 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
859 863
860 ret_code = 0; 864 ret_code = 0;
861 865
866exit:
862 if (!ret_code && 867 if (!ret_code &&
863 (info->comp_status == I40IW_COMPL_STATUS_FLUSHED)) 868 (info->comp_status == I40IW_COMPL_STATUS_FLUSHED))
864 if (pring && (I40IW_RING_MORE_WORK(*pring))) 869 if (pring && (I40IW_RING_MORE_WORK(*pring)))
@@ -893,19 +898,21 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
893 * i40iw_get_wqe_shift - get shift count for maximum wqe size 898 * i40iw_get_wqe_shift - get shift count for maximum wqe size
894 * @wqdepth: depth of wq required. 899 * @wqdepth: depth of wq required.
895 * @sge: Maximum Scatter Gather Elements wqe 900 * @sge: Maximum Scatter Gather Elements wqe
901 * @inline_data: Maximum inline data size
896 * @shift: Returns the shift needed based on sge 902 * @shift: Returns the shift needed based on sge
897 * 903 *
898 * Shift can be used to left shift the wqe size based on sge. 904 * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size.
899 * If sge, == 1, shift =0 (wqe_size of 32 bytes), for sge=2 and 3, shift =1 905 * For 1 SGE or inline data <= 16, shift = 0 (wqe size of 32 bytes).
900 * (64 bytes wqes) and 2 otherwise (128 bytes wqe). 906 * For 2 or 3 SGEs or inline data <= 48, shift = 1 (wqe size of 64 bytes).
907 * Shift of 2 otherwise (wqe size of 128 bytes).
901 */ 908 */
902enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift) 909enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift)
903{ 910{
904 u32 size; 911 u32 size;
905 912
906 *shift = 0; 913 *shift = 0;
907 if (sge > 1) 914 if (sge > 1 || inline_data > 16)
908 *shift = (sge < 4) ? 1 : 2; 915 *shift = (sge < 4 && inline_data <= 48) ? 1 : 2;
909 916
910 /* check if wqdepth is multiple of 2 or not */ 917 /* check if wqdepth is multiple of 2 or not */
911 918
@@ -968,11 +975,11 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
968 975
969 if (info->max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT) 976 if (info->max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
970 return I40IW_ERR_INVALID_FRAG_COUNT; 977 return I40IW_ERR_INVALID_FRAG_COUNT;
971 ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, &sqshift); 978 ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, info->max_inline_data, &sqshift);
972 if (ret_code) 979 if (ret_code)
973 return ret_code; 980 return ret_code;
974 981
975 ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, &rqshift); 982 ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, 0, &rqshift);
976 if (ret_code) 983 if (ret_code)
977 return ret_code; 984 return ret_code;
978 985
@@ -1097,12 +1104,9 @@ enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp,
1097 u64 header, *wqe; 1104 u64 header, *wqe;
1098 u32 wqe_idx; 1105 u32 wqe_idx;
1099 1106
1100 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE); 1107 wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, wr_id);
1101 if (!wqe) 1108 if (!wqe)
1102 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 1109 return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
1103
1104 qp->sq_wrtrk_array[wqe_idx].wrid = wr_id;
1105 qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
1106 set_64bit_val(wqe, 0, 0); 1110 set_64bit_val(wqe, 0, 0);
1107 set_64bit_val(wqe, 8, 0); 1111 set_64bit_val(wqe, 8, 0);
1108 set_64bit_val(wqe, 16, 0); 1112 set_64bit_val(wqe, 16, 0);
@@ -1125,7 +1129,7 @@ enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp,
1125 * @frag_cnt: number of fragments 1129 * @frag_cnt: number of fragments
1126 * @wqe_size: size of sq wqe returned 1130 * @wqe_size: size of sq wqe returned
1127 */ 1131 */
1128enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size) 1132enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size)
1129{ 1133{
1130 switch (frag_cnt) { 1134 switch (frag_cnt) {
1131 case 0: 1135 case 0:
@@ -1156,7 +1160,7 @@ enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size)
1156 * @frag_cnt: number of fragments 1160 * @frag_cnt: number of fragments
1157 * @wqe_size: size of rq wqe returned 1161 * @wqe_size: size of rq wqe returned
1158 */ 1162 */
1159enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size) 1163enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size)
1160{ 1164{
1161 switch (frag_cnt) { 1165 switch (frag_cnt) {
1162 case 0: 1166 case 0:
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
index 5cd971bb8cc7..4627646fe8cd 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_user.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h
@@ -61,7 +61,7 @@ enum i40iw_device_capabilities_const {
61 I40IW_MAX_CQ_SIZE = 1048575, 61 I40IW_MAX_CQ_SIZE = 1048575,
62 I40IW_MAX_AEQ_ALLOCATE_COUNT = 255, 62 I40IW_MAX_AEQ_ALLOCATE_COUNT = 255,
63 I40IW_DB_ID_ZERO = 0, 63 I40IW_DB_ID_ZERO = 0,
64 I40IW_MAX_WQ_FRAGMENT_COUNT = 6, 64 I40IW_MAX_WQ_FRAGMENT_COUNT = 3,
65 I40IW_MAX_SGE_RD = 1, 65 I40IW_MAX_SGE_RD = 1,
66 I40IW_MAX_OUTBOUND_MESSAGE_SIZE = 2147483647, 66 I40IW_MAX_OUTBOUND_MESSAGE_SIZE = 2147483647,
67 I40IW_MAX_INBOUND_MESSAGE_SIZE = 2147483647, 67 I40IW_MAX_INBOUND_MESSAGE_SIZE = 2147483647,
@@ -70,8 +70,8 @@ enum i40iw_device_capabilities_const {
70 I40IW_MAX_VF_FPM_ID = 47, 70 I40IW_MAX_VF_FPM_ID = 47,
71 I40IW_MAX_VF_PER_PF = 127, 71 I40IW_MAX_VF_PER_PF = 127,
72 I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496, 72 I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496,
73 I40IW_MAX_INLINE_DATA_SIZE = 112, 73 I40IW_MAX_INLINE_DATA_SIZE = 48,
74 I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 112, 74 I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48,
75 I40IW_MAX_IRD_SIZE = 32, 75 I40IW_MAX_IRD_SIZE = 32,
76 I40IW_QPCTX_ENCD_MAXIRD = 3, 76 I40IW_QPCTX_ENCD_MAXIRD = 3,
77 I40IW_MAX_WQ_ENTRIES = 2048, 77 I40IW_MAX_WQ_ENTRIES = 2048,
@@ -102,6 +102,8 @@ enum i40iw_device_capabilities_const {
102 102
103#define I40IW_STAG_INDEX_FROM_STAG(stag) (((stag) && 0xFFFFFF00) >> 8) 103#define I40IW_STAG_INDEX_FROM_STAG(stag) (((stag) && 0xFFFFFF00) >> 8)
104 104
105#define I40IW_MAX_MR_SIZE 0x10000000000L
106
105struct i40iw_qp_uk; 107struct i40iw_qp_uk;
106struct i40iw_cq_uk; 108struct i40iw_cq_uk;
107struct i40iw_srq_uk; 109struct i40iw_srq_uk;
@@ -198,7 +200,7 @@ enum i40iw_completion_notify {
198 200
199struct i40iw_post_send { 201struct i40iw_post_send {
200 i40iw_sgl sg_list; 202 i40iw_sgl sg_list;
201 u8 num_sges; 203 u32 num_sges;
202}; 204};
203 205
204struct i40iw_post_inline_send { 206struct i40iw_post_inline_send {
@@ -220,7 +222,7 @@ struct i40iw_post_inline_send_w_inv {
220 222
221struct i40iw_rdma_write { 223struct i40iw_rdma_write {
222 i40iw_sgl lo_sg_list; 224 i40iw_sgl lo_sg_list;
223 u8 num_lo_sges; 225 u32 num_lo_sges;
224 struct i40iw_sge rem_addr; 226 struct i40iw_sge rem_addr;
225}; 227};
226 228
@@ -345,7 +347,9 @@ struct i40iw_dev_uk {
345 347
346struct i40iw_sq_uk_wr_trk_info { 348struct i40iw_sq_uk_wr_trk_info {
347 u64 wrid; 349 u64 wrid;
348 u64 wr_len; 350 u32 wr_len;
351 u8 wqe_size;
352 u8 reserved[3];
349}; 353};
350 354
351struct i40iw_qp_quanta { 355struct i40iw_qp_quanta {
@@ -367,6 +371,8 @@ struct i40iw_qp_uk {
367 u32 qp_id; 371 u32 qp_id;
368 u32 sq_size; 372 u32 sq_size;
369 u32 rq_size; 373 u32 rq_size;
374 u32 max_sq_frag_cnt;
375 u32 max_rq_frag_cnt;
370 struct i40iw_qp_uk_ops ops; 376 struct i40iw_qp_uk_ops ops;
371 bool use_srq; 377 bool use_srq;
372 u8 swqe_polarity; 378 u8 swqe_polarity;
@@ -374,8 +380,6 @@ struct i40iw_qp_uk {
374 u8 rwqe_polarity; 380 u8 rwqe_polarity;
375 u8 rq_wqe_size; 381 u8 rq_wqe_size;
376 u8 rq_wqe_size_multiplier; 382 u8 rq_wqe_size_multiplier;
377 u8 max_sq_frag_cnt;
378 u8 max_rq_frag_cnt;
379 bool deferred_flag; 383 bool deferred_flag;
380}; 384};
381 385
@@ -404,8 +408,9 @@ struct i40iw_qp_uk_init_info {
404 u32 qp_id; 408 u32 qp_id;
405 u32 sq_size; 409 u32 sq_size;
406 u32 rq_size; 410 u32 rq_size;
407 u8 max_sq_frag_cnt; 411 u32 max_sq_frag_cnt;
408 u8 max_rq_frag_cnt; 412 u32 max_rq_frag_cnt;
413 u32 max_inline_data;
409 414
410}; 415};
411 416
@@ -422,7 +427,10 @@ void i40iw_device_init_uk(struct i40iw_dev_uk *dev);
422 427
423void i40iw_qp_post_wr(struct i40iw_qp_uk *qp); 428void i40iw_qp_post_wr(struct i40iw_qp_uk *qp);
424u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx, 429u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx,
425 u8 wqe_size); 430 u8 wqe_size,
431 u32 total_size,
432 u64 wr_id
433 );
426u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx); 434u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx);
427u64 *i40iw_qp_get_next_srq_wqe(struct i40iw_srq_uk *srq, u32 *wqe_idx); 435u64 *i40iw_qp_get_next_srq_wqe(struct i40iw_srq_uk *srq, u32 *wqe_idx);
428 436
@@ -434,9 +442,9 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
434void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq); 442void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq);
435enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp, u64 wr_id, 443enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp, u64 wr_id,
436 bool signaled, bool post_sq); 444 bool signaled, bool post_sq);
437enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size); 445enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size);
438enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size); 446enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size);
439enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size, 447enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size,
440 u8 *wqe_size); 448 u8 *wqe_size);
441enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift); 449enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift);
442#endif 450#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 1ceec81bd8eb..0e8db0a35141 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -59,7 +59,7 @@
59 * @action: modify, delete or add 59 * @action: modify, delete or add
60 */ 60 */
61int i40iw_arp_table(struct i40iw_device *iwdev, 61int i40iw_arp_table(struct i40iw_device *iwdev,
62 __be32 *ip_addr, 62 u32 *ip_addr,
63 bool ipv4, 63 bool ipv4,
64 u8 *mac_addr, 64 u8 *mac_addr,
65 u32 action) 65 u32 action)
@@ -152,7 +152,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
152 struct net_device *upper_dev; 152 struct net_device *upper_dev;
153 struct i40iw_device *iwdev; 153 struct i40iw_device *iwdev;
154 struct i40iw_handler *hdl; 154 struct i40iw_handler *hdl;
155 __be32 local_ipaddr; 155 u32 local_ipaddr;
156 156
157 hdl = i40iw_find_netdev(event_netdev); 157 hdl = i40iw_find_netdev(event_netdev);
158 if (!hdl) 158 if (!hdl)
@@ -167,11 +167,10 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
167 switch (event) { 167 switch (event) {
168 case NETDEV_DOWN: 168 case NETDEV_DOWN:
169 if (upper_dev) 169 if (upper_dev)
170 local_ipaddr = 170 local_ipaddr = ntohl(
171 ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address; 171 ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
172 else 172 else
173 local_ipaddr = ifa->ifa_address; 173 local_ipaddr = ntohl(ifa->ifa_address);
174 local_ipaddr = ntohl(local_ipaddr);
175 i40iw_manage_arp_cache(iwdev, 174 i40iw_manage_arp_cache(iwdev,
176 netdev->dev_addr, 175 netdev->dev_addr,
177 &local_ipaddr, 176 &local_ipaddr,
@@ -180,11 +179,10 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
180 return NOTIFY_OK; 179 return NOTIFY_OK;
181 case NETDEV_UP: 180 case NETDEV_UP:
182 if (upper_dev) 181 if (upper_dev)
183 local_ipaddr = 182 local_ipaddr = ntohl(
184 ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address; 183 ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
185 else 184 else
186 local_ipaddr = ifa->ifa_address; 185 local_ipaddr = ntohl(ifa->ifa_address);
187 local_ipaddr = ntohl(local_ipaddr);
188 i40iw_manage_arp_cache(iwdev, 186 i40iw_manage_arp_cache(iwdev,
189 netdev->dev_addr, 187 netdev->dev_addr,
190 &local_ipaddr, 188 &local_ipaddr,
@@ -194,12 +192,11 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
194 case NETDEV_CHANGEADDR: 192 case NETDEV_CHANGEADDR:
195 /* Add the address to the IP table */ 193 /* Add the address to the IP table */
196 if (upper_dev) 194 if (upper_dev)
197 local_ipaddr = 195 local_ipaddr = ntohl(
198 ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address; 196 ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
199 else 197 else
200 local_ipaddr = ifa->ifa_address; 198 local_ipaddr = ntohl(ifa->ifa_address);
201 199
202 local_ipaddr = ntohl(local_ipaddr);
203 i40iw_manage_arp_cache(iwdev, 200 i40iw_manage_arp_cache(iwdev,
204 netdev->dev_addr, 201 netdev->dev_addr,
205 &local_ipaddr, 202 &local_ipaddr,
@@ -227,7 +224,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
227 struct net_device *netdev; 224 struct net_device *netdev;
228 struct i40iw_device *iwdev; 225 struct i40iw_device *iwdev;
229 struct i40iw_handler *hdl; 226 struct i40iw_handler *hdl;
230 __be32 local_ipaddr6[4]; 227 u32 local_ipaddr6[4];
231 228
232 hdl = i40iw_find_netdev(event_netdev); 229 hdl = i40iw_find_netdev(event_netdev);
233 if (!hdl) 230 if (!hdl)
@@ -506,14 +503,19 @@ void i40iw_rem_ref(struct ib_qp *ibqp)
506 struct cqp_commands_info *cqp_info; 503 struct cqp_commands_info *cqp_info;
507 struct i40iw_device *iwdev; 504 struct i40iw_device *iwdev;
508 u32 qp_num; 505 u32 qp_num;
506 unsigned long flags;
509 507
510 iwqp = to_iwqp(ibqp); 508 iwqp = to_iwqp(ibqp);
511 if (!atomic_dec_and_test(&iwqp->refcount)) 509 iwdev = iwqp->iwdev;
510 spin_lock_irqsave(&iwdev->qptable_lock, flags);
511 if (!atomic_dec_and_test(&iwqp->refcount)) {
512 spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
512 return; 513 return;
514 }
513 515
514 iwdev = iwqp->iwdev;
515 qp_num = iwqp->ibqp.qp_num; 516 qp_num = iwqp->ibqp.qp_num;
516 iwdev->qp_table[qp_num] = NULL; 517 iwdev->qp_table[qp_num] = NULL;
518 spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
517 cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); 519 cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
518 if (!cqp_request) 520 if (!cqp_request)
519 return; 521 return;
@@ -985,21 +987,24 @@ enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev,
985enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev) 987enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev)
986{ 988{
987 struct i40iw_device *iwdev = dev->back_dev; 989 struct i40iw_device *iwdev = dev->back_dev;
988 enum i40iw_status_code err_code = 0;
989 int timeout_ret; 990 int timeout_ret;
990 991
991 i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s[%u] dev %p, iwdev %p\n", 992 i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s[%u] dev %p, iwdev %p\n",
992 __func__, __LINE__, dev, iwdev); 993 __func__, __LINE__, dev, iwdev);
993 atomic_add(2, &iwdev->vchnl_msgs); 994
995 atomic_set(&iwdev->vchnl_msgs, 2);
994 timeout_ret = wait_event_timeout(iwdev->vchnl_waitq, 996 timeout_ret = wait_event_timeout(iwdev->vchnl_waitq,
995 (atomic_read(&iwdev->vchnl_msgs) == 1), 997 (atomic_read(&iwdev->vchnl_msgs) == 1),
996 I40IW_VCHNL_EVENT_TIMEOUT); 998 I40IW_VCHNL_EVENT_TIMEOUT);
997 atomic_dec(&iwdev->vchnl_msgs); 999 atomic_dec(&iwdev->vchnl_msgs);
998 if (!timeout_ret) { 1000 if (!timeout_ret) {
999 i40iw_pr_err("virt channel completion timeout = 0x%x\n", timeout_ret); 1001 i40iw_pr_err("virt channel completion timeout = 0x%x\n", timeout_ret);
1000 err_code = I40IW_ERR_TIMEOUT; 1002 atomic_set(&iwdev->vchnl_msgs, 0);
1003 dev->vchnl_up = false;
1004 return I40IW_ERR_TIMEOUT;
1001 } 1005 }
1002 return err_code; 1006 wake_up(&dev->vf_reqs);
1007 return 0;
1003} 1008}
1004 1009
1005/** 1010/**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 1fe3b84a06e4..4a740f7a0519 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -63,8 +63,8 @@ static int i40iw_query_device(struct ib_device *ibdev,
63 ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr); 63 ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr);
64 props->fw_ver = I40IW_FW_VERSION; 64 props->fw_ver = I40IW_FW_VERSION;
65 props->device_cap_flags = iwdev->device_cap_flags; 65 props->device_cap_flags = iwdev->device_cap_flags;
66 props->vendor_id = iwdev->vendor_id; 66 props->vendor_id = iwdev->ldev->pcidev->vendor;
67 props->vendor_part_id = iwdev->vendor_part_id; 67 props->vendor_part_id = iwdev->ldev->pcidev->device;
68 props->hw_ver = (u32)iwdev->sc_dev.hw_rev; 68 props->hw_ver = (u32)iwdev->sc_dev.hw_rev;
69 props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; 69 props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
70 props->max_qp = iwdev->max_qp; 70 props->max_qp = iwdev->max_qp;
@@ -74,7 +74,7 @@ static int i40iw_query_device(struct ib_device *ibdev,
74 props->max_cqe = iwdev->max_cqe; 74 props->max_cqe = iwdev->max_cqe;
75 props->max_mr = iwdev->max_mr; 75 props->max_mr = iwdev->max_mr;
76 props->max_pd = iwdev->max_pd; 76 props->max_pd = iwdev->max_pd;
77 props->max_sge_rd = 1; 77 props->max_sge_rd = I40IW_MAX_SGE_RD;
78 props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE; 78 props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE;
79 props->max_qp_init_rd_atom = props->max_qp_rd_atom; 79 props->max_qp_init_rd_atom = props->max_qp_rd_atom;
80 props->atomic_cap = IB_ATOMIC_NONE; 80 props->atomic_cap = IB_ATOMIC_NONE;
@@ -120,7 +120,7 @@ static int i40iw_query_port(struct ib_device *ibdev,
120 props->pkey_tbl_len = 1; 120 props->pkey_tbl_len = 1;
121 props->active_width = IB_WIDTH_4X; 121 props->active_width = IB_WIDTH_4X;
122 props->active_speed = 1; 122 props->active_speed = 1;
123 props->max_msg_sz = 0x80000000; 123 props->max_msg_sz = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
124 return 0; 124 return 0;
125} 125}
126 126
@@ -437,7 +437,6 @@ void i40iw_free_qp_resources(struct i40iw_device *iwdev,
437 kfree(iwqp->kqp.wrid_mem); 437 kfree(iwqp->kqp.wrid_mem);
438 iwqp->kqp.wrid_mem = NULL; 438 iwqp->kqp.wrid_mem = NULL;
439 kfree(iwqp->allocated_buffer); 439 kfree(iwqp->allocated_buffer);
440 iwqp->allocated_buffer = NULL;
441} 440}
442 441
443/** 442/**
@@ -521,14 +520,12 @@ static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev,
521 enum i40iw_status_code status; 520 enum i40iw_status_code status;
522 struct i40iw_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; 521 struct i40iw_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
523 522
524 ukinfo->max_sq_frag_cnt = I40IW_MAX_WQ_FRAGMENT_COUNT;
525
526 sq_size = i40iw_qp_roundup(ukinfo->sq_size + 1); 523 sq_size = i40iw_qp_roundup(ukinfo->sq_size + 1);
527 rq_size = i40iw_qp_roundup(ukinfo->rq_size + 1); 524 rq_size = i40iw_qp_roundup(ukinfo->rq_size + 1);
528 525
529 status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, &sqshift); 526 status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, &sqshift);
530 if (!status) 527 if (!status)
531 status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, &rqshift); 528 status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, 0, &rqshift);
532 529
533 if (status) 530 if (status)
534 return -ENOSYS; 531 return -ENOSYS;
@@ -609,6 +606,9 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
609 if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE) 606 if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE)
610 init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE; 607 init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
611 608
609 if (init_attr->cap.max_send_sge > I40IW_MAX_WQ_FRAGMENT_COUNT)
610 init_attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
611
612 memset(&init_info, 0, sizeof(init_info)); 612 memset(&init_info, 0, sizeof(init_info));
613 613
614 sq_size = init_attr->cap.max_send_wr; 614 sq_size = init_attr->cap.max_send_wr;
@@ -618,6 +618,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
618 init_info.qp_uk_init_info.rq_size = rq_size; 618 init_info.qp_uk_init_info.rq_size = rq_size;
619 init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge; 619 init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
620 init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge; 620 init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
621 init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data;
621 622
622 mem = kzalloc(sizeof(*iwqp), GFP_KERNEL); 623 mem = kzalloc(sizeof(*iwqp), GFP_KERNEL);
623 if (!mem) 624 if (!mem)
@@ -722,8 +723,10 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
722 iwarp_info = &iwqp->iwarp_info; 723 iwarp_info = &iwqp->iwarp_info;
723 iwarp_info->rd_enable = true; 724 iwarp_info->rd_enable = true;
724 iwarp_info->wr_rdresp_en = true; 725 iwarp_info->wr_rdresp_en = true;
725 if (!iwqp->user_mode) 726 if (!iwqp->user_mode) {
727 iwarp_info->fast_reg_en = true;
726 iwarp_info->priv_mode_en = true; 728 iwarp_info->priv_mode_en = true;
729 }
727 iwarp_info->ddp_ver = 1; 730 iwarp_info->ddp_ver = 1;
728 iwarp_info->rdmap_ver = 1; 731 iwarp_info->rdmap_ver = 1;
729 732
@@ -784,6 +787,8 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
784 return ERR_PTR(err_code); 787 return ERR_PTR(err_code);
785 } 788 }
786 } 789 }
790 init_completion(&iwqp->sq_drained);
791 init_completion(&iwqp->rq_drained);
787 792
788 return &iwqp->ibqp; 793 return &iwqp->ibqp;
789error: 794error:
@@ -1444,6 +1449,166 @@ static int i40iw_handle_q_mem(struct i40iw_device *iwdev,
1444} 1449}
1445 1450
1446/** 1451/**
1452 * i40iw_hw_alloc_stag - cqp command to allocate stag
1453 * @iwdev: iwarp device
1454 * @iwmr: iwarp mr pointer
1455 */
1456static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr)
1457{
1458 struct i40iw_allocate_stag_info *info;
1459 struct i40iw_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
1460 enum i40iw_status_code status;
1461 int err = 0;
1462 struct i40iw_cqp_request *cqp_request;
1463 struct cqp_commands_info *cqp_info;
1464
1465 cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
1466 if (!cqp_request)
1467 return -ENOMEM;
1468
1469 cqp_info = &cqp_request->info;
1470 info = &cqp_info->in.u.alloc_stag.info;
1471 memset(info, 0, sizeof(*info));
1472 info->page_size = PAGE_SIZE;
1473 info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT;
1474 info->pd_id = iwpd->sc_pd.pd_id;
1475 info->total_len = iwmr->length;
1476 cqp_info->cqp_cmd = OP_ALLOC_STAG;
1477 cqp_info->post_sq = 1;
1478 cqp_info->in.u.alloc_stag.dev = &iwdev->sc_dev;
1479 cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request;
1480
1481 status = i40iw_handle_cqp_op(iwdev, cqp_request);
1482 if (status) {
1483 err = -ENOMEM;
1484 i40iw_pr_err("CQP-OP MR Reg fail");
1485 }
1486 return err;
1487}
1488
1489/**
1490 * i40iw_alloc_mr - register stag for fast memory registration
1491 * @pd: ibpd pointer
1492 * @mr_type: memory for stag registrion
1493 * @max_num_sg: man number of pages
1494 */
1495static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd,
1496 enum ib_mr_type mr_type,
1497 u32 max_num_sg)
1498{
1499 struct i40iw_pd *iwpd = to_iwpd(pd);
1500 struct i40iw_device *iwdev = to_iwdev(pd->device);
1501 struct i40iw_pble_alloc *palloc;
1502 struct i40iw_pbl *iwpbl;
1503 struct i40iw_mr *iwmr;
1504 enum i40iw_status_code status;
1505 u32 stag;
1506 int err_code = -ENOMEM;
1507
1508 iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
1509 if (!iwmr)
1510 return ERR_PTR(-ENOMEM);
1511
1512 stag = i40iw_create_stag(iwdev);
1513 if (!stag) {
1514 err_code = -EOVERFLOW;
1515 goto err;
1516 }
1517 iwmr->stag = stag;
1518 iwmr->ibmr.rkey = stag;
1519 iwmr->ibmr.lkey = stag;
1520 iwmr->ibmr.pd = pd;
1521 iwmr->ibmr.device = pd->device;
1522 iwpbl = &iwmr->iwpbl;
1523 iwpbl->iwmr = iwmr;
1524 iwmr->type = IW_MEMREG_TYPE_MEM;
1525 palloc = &iwpbl->pble_alloc;
1526 iwmr->page_cnt = max_num_sg;
1527 mutex_lock(&iwdev->pbl_mutex);
1528 status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
1529 mutex_unlock(&iwdev->pbl_mutex);
1530 if (!status)
1531 goto err1;
1532
1533 if (palloc->level != I40IW_LEVEL_1)
1534 goto err2;
1535 err_code = i40iw_hw_alloc_stag(iwdev, iwmr);
1536 if (err_code)
1537 goto err2;
1538 iwpbl->pbl_allocated = true;
1539 i40iw_add_pdusecount(iwpd);
1540 return &iwmr->ibmr;
1541err2:
1542 i40iw_free_pble(iwdev->pble_rsrc, palloc);
1543err1:
1544 i40iw_free_stag(iwdev, stag);
1545err:
1546 kfree(iwmr);
1547 return ERR_PTR(err_code);
1548}
1549
1550/**
1551 * i40iw_set_page - populate pbl list for fmr
1552 * @ibmr: ib mem to access iwarp mr pointer
1553 * @addr: page dma address fro pbl list
1554 */
1555static int i40iw_set_page(struct ib_mr *ibmr, u64 addr)
1556{
1557 struct i40iw_mr *iwmr = to_iwmr(ibmr);
1558 struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
1559 struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
1560 u64 *pbl;
1561
1562 if (unlikely(iwmr->npages == iwmr->page_cnt))
1563 return -ENOMEM;
1564
1565 pbl = (u64 *)palloc->level1.addr;
1566 pbl[iwmr->npages++] = cpu_to_le64(addr);
1567 return 0;
1568}
1569
1570/**
1571 * i40iw_map_mr_sg - map of sg list for fmr
1572 * @ibmr: ib mem to access iwarp mr pointer
1573 * @sg: scatter gather list for fmr
1574 * @sg_nents: number of sg pages
1575 */
1576static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
1577 int sg_nents, unsigned int *sg_offset)
1578{
1579 struct i40iw_mr *iwmr = to_iwmr(ibmr);
1580
1581 iwmr->npages = 0;
1582 return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, i40iw_set_page);
1583}
1584
1585/**
1586 * i40iw_drain_sq - drain the send queue
1587 * @ibqp: ib qp pointer
1588 */
1589static void i40iw_drain_sq(struct ib_qp *ibqp)
1590{
1591 struct i40iw_qp *iwqp = to_iwqp(ibqp);
1592 struct i40iw_sc_qp *qp = &iwqp->sc_qp;
1593
1594 if (I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring))
1595 wait_for_completion(&iwqp->sq_drained);
1596}
1597
1598/**
1599 * i40iw_drain_rq - drain the receive queue
1600 * @ibqp: ib qp pointer
1601 */
1602static void i40iw_drain_rq(struct ib_qp *ibqp)
1603{
1604 struct i40iw_qp *iwqp = to_iwqp(ibqp);
1605 struct i40iw_sc_qp *qp = &iwqp->sc_qp;
1606
1607 if (I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring))
1608 wait_for_completion(&iwqp->rq_drained);
1609}
1610
1611/**
1447 * i40iw_hwreg_mr - send cqp command for memory registration 1612 * i40iw_hwreg_mr - send cqp command for memory registration
1448 * @iwdev: iwarp device 1613 * @iwdev: iwarp device
1449 * @iwmr: iwarp mr pointer 1614 * @iwmr: iwarp mr pointer
@@ -1526,14 +1691,16 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
1526 struct i40iw_mr *iwmr; 1691 struct i40iw_mr *iwmr;
1527 struct ib_umem *region; 1692 struct ib_umem *region;
1528 struct i40iw_mem_reg_req req; 1693 struct i40iw_mem_reg_req req;
1529 u32 pbl_depth = 0; 1694 u64 pbl_depth = 0;
1530 u32 stag = 0; 1695 u32 stag = 0;
1531 u16 access; 1696 u16 access;
1532 u32 region_length; 1697 u64 region_length;
1533 bool use_pbles = false; 1698 bool use_pbles = false;
1534 unsigned long flags; 1699 unsigned long flags;
1535 int err = -ENOSYS; 1700 int err = -ENOSYS;
1536 1701
1702 if (length > I40IW_MAX_MR_SIZE)
1703 return ERR_PTR(-EINVAL);
1537 region = ib_umem_get(pd->uobject->context, start, length, acc, 0); 1704 region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
1538 if (IS_ERR(region)) 1705 if (IS_ERR(region))
1539 return (struct ib_mr *)region; 1706 return (struct ib_mr *)region;
@@ -1564,7 +1731,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
1564 palloc = &iwpbl->pble_alloc; 1731 palloc = &iwpbl->pble_alloc;
1565 1732
1566 iwmr->type = req.reg_type; 1733 iwmr->type = req.reg_type;
1567 iwmr->page_cnt = pbl_depth; 1734 iwmr->page_cnt = (u32)pbl_depth;
1568 1735
1569 switch (req.reg_type) { 1736 switch (req.reg_type) {
1570 case IW_MEMREG_TYPE_QP: 1737 case IW_MEMREG_TYPE_QP:
@@ -1881,12 +2048,14 @@ static int i40iw_post_send(struct ib_qp *ibqp,
1881 enum i40iw_status_code ret; 2048 enum i40iw_status_code ret;
1882 int err = 0; 2049 int err = 0;
1883 unsigned long flags; 2050 unsigned long flags;
2051 bool inv_stag;
1884 2052
1885 iwqp = (struct i40iw_qp *)ibqp; 2053 iwqp = (struct i40iw_qp *)ibqp;
1886 ukqp = &iwqp->sc_qp.qp_uk; 2054 ukqp = &iwqp->sc_qp.qp_uk;
1887 2055
1888 spin_lock_irqsave(&iwqp->lock, flags); 2056 spin_lock_irqsave(&iwqp->lock, flags);
1889 while (ib_wr) { 2057 while (ib_wr) {
2058 inv_stag = false;
1890 memset(&info, 0, sizeof(info)); 2059 memset(&info, 0, sizeof(info));
1891 info.wr_id = (u64)(ib_wr->wr_id); 2060 info.wr_id = (u64)(ib_wr->wr_id);
1892 if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all) 2061 if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all)
@@ -1896,19 +2065,28 @@ static int i40iw_post_send(struct ib_qp *ibqp,
1896 2065
1897 switch (ib_wr->opcode) { 2066 switch (ib_wr->opcode) {
1898 case IB_WR_SEND: 2067 case IB_WR_SEND:
1899 if (ib_wr->send_flags & IB_SEND_SOLICITED) 2068 /* fall-through */
1900 info.op_type = I40IW_OP_TYPE_SEND_SOL; 2069 case IB_WR_SEND_WITH_INV:
1901 else 2070 if (ib_wr->opcode == IB_WR_SEND) {
1902 info.op_type = I40IW_OP_TYPE_SEND; 2071 if (ib_wr->send_flags & IB_SEND_SOLICITED)
2072 info.op_type = I40IW_OP_TYPE_SEND_SOL;
2073 else
2074 info.op_type = I40IW_OP_TYPE_SEND;
2075 } else {
2076 if (ib_wr->send_flags & IB_SEND_SOLICITED)
2077 info.op_type = I40IW_OP_TYPE_SEND_SOL_INV;
2078 else
2079 info.op_type = I40IW_OP_TYPE_SEND_INV;
2080 }
1903 2081
1904 if (ib_wr->send_flags & IB_SEND_INLINE) { 2082 if (ib_wr->send_flags & IB_SEND_INLINE) {
1905 info.op.inline_send.data = (void *)(unsigned long)ib_wr->sg_list[0].addr; 2083 info.op.inline_send.data = (void *)(unsigned long)ib_wr->sg_list[0].addr;
1906 info.op.inline_send.len = ib_wr->sg_list[0].length; 2084 info.op.inline_send.len = ib_wr->sg_list[0].length;
1907 ret = ukqp->ops.iw_inline_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false); 2085 ret = ukqp->ops.iw_inline_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
1908 } else { 2086 } else {
1909 info.op.send.num_sges = ib_wr->num_sge; 2087 info.op.send.num_sges = ib_wr->num_sge;
1910 info.op.send.sg_list = (struct i40iw_sge *)ib_wr->sg_list; 2088 info.op.send.sg_list = (struct i40iw_sge *)ib_wr->sg_list;
1911 ret = ukqp->ops.iw_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false); 2089 ret = ukqp->ops.iw_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
1912 } 2090 }
1913 2091
1914 if (ret) 2092 if (ret)
@@ -1936,7 +2114,14 @@ static int i40iw_post_send(struct ib_qp *ibqp,
1936 if (ret) 2114 if (ret)
1937 err = -EIO; 2115 err = -EIO;
1938 break; 2116 break;
2117 case IB_WR_RDMA_READ_WITH_INV:
2118 inv_stag = true;
2119 /* fall-through*/
1939 case IB_WR_RDMA_READ: 2120 case IB_WR_RDMA_READ:
2121 if (ib_wr->num_sge > I40IW_MAX_SGE_RD) {
2122 err = -EINVAL;
2123 break;
2124 }
1940 info.op_type = I40IW_OP_TYPE_RDMA_READ; 2125 info.op_type = I40IW_OP_TYPE_RDMA_READ;
1941 info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; 2126 info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
1942 info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey; 2127 info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey;
@@ -1944,10 +2129,47 @@ static int i40iw_post_send(struct ib_qp *ibqp,
1944 info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr; 2129 info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr;
1945 info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey; 2130 info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
1946 info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length; 2131 info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
1947 ret = ukqp->ops.iw_rdma_read(ukqp, &info, false, false); 2132 ret = ukqp->ops.iw_rdma_read(ukqp, &info, inv_stag, false);
1948 if (ret) 2133 if (ret)
1949 err = -EIO; 2134 err = -EIO;
1950 break; 2135 break;
2136 case IB_WR_LOCAL_INV:
2137 info.op_type = I40IW_OP_TYPE_INV_STAG;
2138 info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
2139 ret = ukqp->ops.iw_stag_local_invalidate(ukqp, &info, true);
2140 if (ret)
2141 err = -EIO;
2142 break;
2143 case IB_WR_REG_MR:
2144 {
2145 struct i40iw_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr);
2146 int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size);
2147 int flags = reg_wr(ib_wr)->access;
2148 struct i40iw_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc;
2149 struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
2150 struct i40iw_fast_reg_stag_info info;
2151
2152 info.access_rights = I40IW_ACCESS_FLAGS_LOCALREAD;
2153 info.access_rights |= i40iw_get_user_access(flags);
2154 info.stag_key = reg_wr(ib_wr)->key & 0xff;
2155 info.stag_idx = reg_wr(ib_wr)->key >> 8;
2156 info.wr_id = ib_wr->wr_id;
2157
2158 info.addr_type = I40IW_ADDR_TYPE_VA_BASED;
2159 info.va = (void *)(uintptr_t)iwmr->ibmr.iova;
2160 info.total_len = iwmr->ibmr.length;
2161 info.first_pm_pbl_index = palloc->level1.idx;
2162 info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
2163 info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED;
2164
2165 if (page_shift == 21)
2166 info.page_size = 1; /* 2M page */
2167
2168 ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true);
2169 if (ret)
2170 err = -EIO;
2171 break;
2172 }
1951 default: 2173 default:
1952 err = -EINVAL; 2174 err = -EINVAL;
1953 i40iw_pr_err(" upost_send bad opcode = 0x%x\n", 2175 i40iw_pr_err(" upost_send bad opcode = 0x%x\n",
@@ -2027,6 +2249,7 @@ static int i40iw_poll_cq(struct ib_cq *ibcq,
2027 enum i40iw_status_code ret; 2249 enum i40iw_status_code ret;
2028 struct i40iw_cq_uk *ukcq; 2250 struct i40iw_cq_uk *ukcq;
2029 struct i40iw_sc_qp *qp; 2251 struct i40iw_sc_qp *qp;
2252 struct i40iw_qp *iwqp;
2030 unsigned long flags; 2253 unsigned long flags;
2031 2254
2032 iwcq = (struct i40iw_cq *)ibcq; 2255 iwcq = (struct i40iw_cq *)ibcq;
@@ -2037,6 +2260,8 @@ static int i40iw_poll_cq(struct ib_cq *ibcq,
2037 ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info, true); 2260 ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info, true);
2038 if (ret == I40IW_ERR_QUEUE_EMPTY) { 2261 if (ret == I40IW_ERR_QUEUE_EMPTY) {
2039 break; 2262 break;
2263 } else if (ret == I40IW_ERR_QUEUE_DESTROYED) {
2264 continue;
2040 } else if (ret) { 2265 } else if (ret) {
2041 if (!cqe_count) 2266 if (!cqe_count)
2042 cqe_count = -1; 2267 cqe_count = -1;
@@ -2044,10 +2269,12 @@ static int i40iw_poll_cq(struct ib_cq *ibcq,
2044 } 2269 }
2045 entry->wc_flags = 0; 2270 entry->wc_flags = 0;
2046 entry->wr_id = cq_poll_info.wr_id; 2271 entry->wr_id = cq_poll_info.wr_id;
2047 if (!cq_poll_info.error) 2272 if (cq_poll_info.error) {
2048 entry->status = IB_WC_SUCCESS;
2049 else
2050 entry->status = IB_WC_WR_FLUSH_ERR; 2273 entry->status = IB_WC_WR_FLUSH_ERR;
2274 entry->vendor_err = cq_poll_info.major_err << 16 | cq_poll_info.minor_err;
2275 } else {
2276 entry->status = IB_WC_SUCCESS;
2277 }
2051 2278
2052 switch (cq_poll_info.op_type) { 2279 switch (cq_poll_info.op_type) {
2053 case I40IW_OP_TYPE_RDMA_WRITE: 2280 case I40IW_OP_TYPE_RDMA_WRITE:
@@ -2071,12 +2298,17 @@ static int i40iw_poll_cq(struct ib_cq *ibcq,
2071 break; 2298 break;
2072 } 2299 }
2073 2300
2074 entry->vendor_err =
2075 cq_poll_info.major_err << 16 | cq_poll_info.minor_err;
2076 entry->ex.imm_data = 0; 2301 entry->ex.imm_data = 0;
2077 qp = (struct i40iw_sc_qp *)cq_poll_info.qp_handle; 2302 qp = (struct i40iw_sc_qp *)cq_poll_info.qp_handle;
2078 entry->qp = (struct ib_qp *)qp->back_qp; 2303 entry->qp = (struct ib_qp *)qp->back_qp;
2079 entry->src_qp = cq_poll_info.qp_id; 2304 entry->src_qp = cq_poll_info.qp_id;
2305 iwqp = (struct i40iw_qp *)qp->back_qp;
2306 if (iwqp->iwarp_state > I40IW_QP_STATE_RTS) {
2307 if (!I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring))
2308 complete(&iwqp->sq_drained);
2309 if (!I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring))
2310 complete(&iwqp->rq_drained);
2311 }
2080 entry->byte_len = cq_poll_info.bytes_xfered; 2312 entry->byte_len = cq_poll_info.bytes_xfered;
2081 entry++; 2313 entry++;
2082 cqe_count++; 2314 cqe_count++;
@@ -2143,7 +2375,6 @@ static int i40iw_get_protocol_stats(struct ib_device *ibdev,
2143 struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats; 2375 struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
2144 struct timespec curr_time; 2376 struct timespec curr_time;
2145 static struct timespec last_rd_time = {0, 0}; 2377 static struct timespec last_rd_time = {0, 0};
2146 enum i40iw_status_code status = 0;
2147 unsigned long flags; 2378 unsigned long flags;
2148 2379
2149 curr_time = current_kernel_time(); 2380 curr_time = current_kernel_time();
@@ -2156,11 +2387,8 @@ static int i40iw_get_protocol_stats(struct ib_device *ibdev,
2156 spin_unlock_irqrestore(&devstat->stats_lock, flags); 2387 spin_unlock_irqrestore(&devstat->stats_lock, flags);
2157 } else { 2388 } else {
2158 if (((u64)curr_time.tv_sec - (u64)last_rd_time.tv_sec) > 1) 2389 if (((u64)curr_time.tv_sec - (u64)last_rd_time.tv_sec) > 1)
2159 status = i40iw_vchnl_vf_get_pe_stats(dev, 2390 if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats))
2160 &devstat->hw_stats); 2391 return -ENOSYS;
2161
2162 if (status)
2163 return -ENOSYS;
2164 } 2392 }
2165 2393
2166 stats->iw.ipInReceives = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] + 2394 stats->iw.ipInReceives = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] +
@@ -2327,6 +2555,10 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
2327 iwibdev->ibdev.query_device = i40iw_query_device; 2555 iwibdev->ibdev.query_device = i40iw_query_device;
2328 iwibdev->ibdev.create_ah = i40iw_create_ah; 2556 iwibdev->ibdev.create_ah = i40iw_create_ah;
2329 iwibdev->ibdev.destroy_ah = i40iw_destroy_ah; 2557 iwibdev->ibdev.destroy_ah = i40iw_destroy_ah;
2558 iwibdev->ibdev.drain_sq = i40iw_drain_sq;
2559 iwibdev->ibdev.drain_rq = i40iw_drain_rq;
2560 iwibdev->ibdev.alloc_mr = i40iw_alloc_mr;
2561 iwibdev->ibdev.map_mr_sg = i40iw_map_mr_sg;
2330 iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL); 2562 iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL);
2331 if (!iwibdev->ibdev.iwcm) { 2563 if (!iwibdev->ibdev.iwcm) {
2332 ib_dealloc_device(&iwibdev->ibdev); 2564 ib_dealloc_device(&iwibdev->ibdev);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
index 1101f77080e6..0069be8a5a38 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
@@ -92,6 +92,7 @@ struct i40iw_mr {
92 struct ib_umem *region; 92 struct ib_umem *region;
93 u16 type; 93 u16 type;
94 u32 page_cnt; 94 u32 page_cnt;
95 u32 npages;
95 u32 stag; 96 u32 stag;
96 u64 length; 97 u64 length;
97 u64 pgaddrmem[MAX_SAVE_PAGE_ADDRS]; 98 u64 pgaddrmem[MAX_SAVE_PAGE_ADDRS];
@@ -169,5 +170,7 @@ struct i40iw_qp {
169 struct i40iw_pbl *iwpbl; 170 struct i40iw_pbl *iwpbl;
170 struct i40iw_dma_mem q2_ctx_mem; 171 struct i40iw_dma_mem q2_ctx_mem;
171 struct i40iw_dma_mem ietf_mem; 172 struct i40iw_dma_mem ietf_mem;
173 struct completion sq_drained;
174 struct completion rq_drained;
172}; 175};
173#endif 176#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.c b/drivers/infiniband/hw/i40iw/i40iw_vf.c
index cb0f18340e14..e33d4810965c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_vf.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_vf.c
@@ -80,6 +80,6 @@ enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp,
80 return 0; 80 return 0;
81} 81}
82 82
83struct i40iw_vf_cqp_ops iw_vf_cqp_ops = { 83const struct i40iw_vf_cqp_ops iw_vf_cqp_ops = {
84 i40iw_manage_vf_pble_bp 84 i40iw_manage_vf_pble_bp
85}; 85};
diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.h b/drivers/infiniband/hw/i40iw/i40iw_vf.h
index f649f3a62e13..4359559ece9c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_vf.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_vf.h
@@ -57,6 +57,6 @@ enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp,
57 u64 scratch, 57 u64 scratch,
58 bool post_sq); 58 bool post_sq);
59 59
60extern struct i40iw_vf_cqp_ops iw_vf_cqp_ops; 60extern const struct i40iw_vf_cqp_ops iw_vf_cqp_ops;
61 61
62#endif 62#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
index 6b68f7890b76..3041003c94d2 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
@@ -254,7 +254,7 @@ static void vchnl_pf_send_get_hmc_fcn_resp(struct i40iw_sc_dev *dev,
254static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev, 254static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev,
255 u32 vf_id, 255 u32 vf_id,
256 struct i40iw_virtchnl_op_buf *vchnl_msg, 256 struct i40iw_virtchnl_op_buf *vchnl_msg,
257 struct i40iw_dev_hw_stats hw_stats) 257 struct i40iw_dev_hw_stats *hw_stats)
258{ 258{
259 enum i40iw_status_code ret_code; 259 enum i40iw_status_code ret_code;
260 u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(struct i40iw_dev_hw_stats) - 1]; 260 u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(struct i40iw_dev_hw_stats) - 1];
@@ -264,7 +264,7 @@ static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev,
264 vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx; 264 vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
265 vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer); 265 vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
266 vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS; 266 vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
267 *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = hw_stats; 267 *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = *hw_stats;
268 ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer)); 268 ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
269 if (ret_code) 269 if (ret_code)
270 i40iw_debug(dev, I40IW_DEBUG_VIRT, 270 i40iw_debug(dev, I40IW_DEBUG_VIRT,
@@ -437,11 +437,9 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
437 vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg); 437 vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
438 return I40IW_SUCCESS; 438 return I40IW_SUCCESS;
439 } 439 }
440 for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; 440 for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {
441 iw_vf_idx++) {
442 if (!dev->vf_dev[iw_vf_idx]) { 441 if (!dev->vf_dev[iw_vf_idx]) {
443 if (first_avail_iw_vf == 442 if (first_avail_iw_vf == I40IW_MAX_PE_ENABLED_VF_COUNT)
444 I40IW_MAX_PE_ENABLED_VF_COUNT)
445 first_avail_iw_vf = iw_vf_idx; 443 first_avail_iw_vf = iw_vf_idx;
446 continue; 444 continue;
447 } 445 }
@@ -541,7 +539,7 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
541 devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats); 539 devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats);
542 spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags); 540 spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags);
543 vf_dev->msg_count--; 541 vf_dev->msg_count--;
544 vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, devstat->hw_stats); 542 vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &devstat->hw_stats);
545 break; 543 break;
546 default: 544 default:
547 i40iw_debug(dev, I40IW_DEBUG_VIRT, 545 i40iw_debug(dev, I40IW_DEBUG_VIRT,
@@ -596,23 +594,25 @@ enum i40iw_status_code i40iw_vchnl_vf_get_ver(struct i40iw_sc_dev *dev,
596 struct i40iw_virtchnl_req vchnl_req; 594 struct i40iw_virtchnl_req vchnl_req;
597 enum i40iw_status_code ret_code; 595 enum i40iw_status_code ret_code;
598 596
597 if (!i40iw_vf_clear_to_send(dev))
598 return I40IW_ERR_TIMEOUT;
599 memset(&vchnl_req, 0, sizeof(vchnl_req)); 599 memset(&vchnl_req, 0, sizeof(vchnl_req));
600 vchnl_req.dev = dev; 600 vchnl_req.dev = dev;
601 vchnl_req.parm = vchnl_ver; 601 vchnl_req.parm = vchnl_ver;
602 vchnl_req.parm_len = sizeof(*vchnl_ver); 602 vchnl_req.parm_len = sizeof(*vchnl_ver);
603 vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg; 603 vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
604
604 ret_code = vchnl_vf_send_get_ver_req(dev, &vchnl_req); 605 ret_code = vchnl_vf_send_get_ver_req(dev, &vchnl_req);
605 if (!ret_code) { 606 if (ret_code) {
606 ret_code = i40iw_vf_wait_vchnl_resp(dev);
607 if (!ret_code)
608 ret_code = vchnl_req.ret_code;
609 else
610 dev->vchnl_up = false;
611 } else {
612 i40iw_debug(dev, I40IW_DEBUG_VIRT, 607 i40iw_debug(dev, I40IW_DEBUG_VIRT,
613 "%s Send message failed 0x%0x\n", __func__, ret_code); 608 "%s Send message failed 0x%0x\n", __func__, ret_code);
609 return ret_code;
614 } 610 }
615 return ret_code; 611 ret_code = i40iw_vf_wait_vchnl_resp(dev);
612 if (ret_code)
613 return ret_code;
614 else
615 return vchnl_req.ret_code;
616} 616}
617 617
618/** 618/**
@@ -626,23 +626,25 @@ enum i40iw_status_code i40iw_vchnl_vf_get_hmc_fcn(struct i40iw_sc_dev *dev,
626 struct i40iw_virtchnl_req vchnl_req; 626 struct i40iw_virtchnl_req vchnl_req;
627 enum i40iw_status_code ret_code; 627 enum i40iw_status_code ret_code;
628 628
629 if (!i40iw_vf_clear_to_send(dev))
630 return I40IW_ERR_TIMEOUT;
629 memset(&vchnl_req, 0, sizeof(vchnl_req)); 631 memset(&vchnl_req, 0, sizeof(vchnl_req));
630 vchnl_req.dev = dev; 632 vchnl_req.dev = dev;
631 vchnl_req.parm = hmc_fcn; 633 vchnl_req.parm = hmc_fcn;
632 vchnl_req.parm_len = sizeof(*hmc_fcn); 634 vchnl_req.parm_len = sizeof(*hmc_fcn);
633 vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg; 635 vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
636
634 ret_code = vchnl_vf_send_get_hmc_fcn_req(dev, &vchnl_req); 637 ret_code = vchnl_vf_send_get_hmc_fcn_req(dev, &vchnl_req);
635 if (!ret_code) { 638 if (ret_code) {
636 ret_code = i40iw_vf_wait_vchnl_resp(dev);
637 if (!ret_code)
638 ret_code = vchnl_req.ret_code;
639 else
640 dev->vchnl_up = false;
641 } else {
642 i40iw_debug(dev, I40IW_DEBUG_VIRT, 639 i40iw_debug(dev, I40IW_DEBUG_VIRT,
643 "%s Send message failed 0x%0x\n", __func__, ret_code); 640 "%s Send message failed 0x%0x\n", __func__, ret_code);
641 return ret_code;
644 } 642 }
645 return ret_code; 643 ret_code = i40iw_vf_wait_vchnl_resp(dev);
644 if (ret_code)
645 return ret_code;
646 else
647 return vchnl_req.ret_code;
646} 648}
647 649
648/** 650/**
@@ -660,25 +662,27 @@ enum i40iw_status_code i40iw_vchnl_vf_add_hmc_objs(struct i40iw_sc_dev *dev,
660 struct i40iw_virtchnl_req vchnl_req; 662 struct i40iw_virtchnl_req vchnl_req;
661 enum i40iw_status_code ret_code; 663 enum i40iw_status_code ret_code;
662 664
665 if (!i40iw_vf_clear_to_send(dev))
666 return I40IW_ERR_TIMEOUT;
663 memset(&vchnl_req, 0, sizeof(vchnl_req)); 667 memset(&vchnl_req, 0, sizeof(vchnl_req));
664 vchnl_req.dev = dev; 668 vchnl_req.dev = dev;
665 vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg; 669 vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
670
666 ret_code = vchnl_vf_send_add_hmc_objs_req(dev, 671 ret_code = vchnl_vf_send_add_hmc_objs_req(dev,
667 &vchnl_req, 672 &vchnl_req,
668 rsrc_type, 673 rsrc_type,
669 start_index, 674 start_index,
670 rsrc_count); 675 rsrc_count);
671 if (!ret_code) { 676 if (ret_code) {
672 ret_code = i40iw_vf_wait_vchnl_resp(dev);
673 if (!ret_code)
674 ret_code = vchnl_req.ret_code;
675 else
676 dev->vchnl_up = false;
677 } else {
678 i40iw_debug(dev, I40IW_DEBUG_VIRT, 677 i40iw_debug(dev, I40IW_DEBUG_VIRT,
679 "%s Send message failed 0x%0x\n", __func__, ret_code); 678 "%s Send message failed 0x%0x\n", __func__, ret_code);
679 return ret_code;
680 } 680 }
681 return ret_code; 681 ret_code = i40iw_vf_wait_vchnl_resp(dev);
682 if (ret_code)
683 return ret_code;
684 else
685 return vchnl_req.ret_code;
682} 686}
683 687
684/** 688/**
@@ -696,25 +700,27 @@ enum i40iw_status_code i40iw_vchnl_vf_del_hmc_obj(struct i40iw_sc_dev *dev,
696 struct i40iw_virtchnl_req vchnl_req; 700 struct i40iw_virtchnl_req vchnl_req;
697 enum i40iw_status_code ret_code; 701 enum i40iw_status_code ret_code;
698 702
703 if (!i40iw_vf_clear_to_send(dev))
704 return I40IW_ERR_TIMEOUT;
699 memset(&vchnl_req, 0, sizeof(vchnl_req)); 705 memset(&vchnl_req, 0, sizeof(vchnl_req));
700 vchnl_req.dev = dev; 706 vchnl_req.dev = dev;
701 vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg; 707 vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
708
702 ret_code = vchnl_vf_send_del_hmc_objs_req(dev, 709 ret_code = vchnl_vf_send_del_hmc_objs_req(dev,
703 &vchnl_req, 710 &vchnl_req,
704 rsrc_type, 711 rsrc_type,
705 start_index, 712 start_index,
706 rsrc_count); 713 rsrc_count);
707 if (!ret_code) { 714 if (ret_code) {
708 ret_code = i40iw_vf_wait_vchnl_resp(dev);
709 if (!ret_code)
710 ret_code = vchnl_req.ret_code;
711 else
712 dev->vchnl_up = false;
713 } else {
714 i40iw_debug(dev, I40IW_DEBUG_VIRT, 715 i40iw_debug(dev, I40IW_DEBUG_VIRT,
715 "%s Send message failed 0x%0x\n", __func__, ret_code); 716 "%s Send message failed 0x%0x\n", __func__, ret_code);
717 return ret_code;
716 } 718 }
717 return ret_code; 719 ret_code = i40iw_vf_wait_vchnl_resp(dev);
720 if (ret_code)
721 return ret_code;
722 else
723 return vchnl_req.ret_code;
718} 724}
719 725
720/** 726/**
@@ -728,21 +734,23 @@ enum i40iw_status_code i40iw_vchnl_vf_get_pe_stats(struct i40iw_sc_dev *dev,
728 struct i40iw_virtchnl_req vchnl_req; 734 struct i40iw_virtchnl_req vchnl_req;
729 enum i40iw_status_code ret_code; 735 enum i40iw_status_code ret_code;
730 736
737 if (!i40iw_vf_clear_to_send(dev))
738 return I40IW_ERR_TIMEOUT;
731 memset(&vchnl_req, 0, sizeof(vchnl_req)); 739 memset(&vchnl_req, 0, sizeof(vchnl_req));
732 vchnl_req.dev = dev; 740 vchnl_req.dev = dev;
733 vchnl_req.parm = hw_stats; 741 vchnl_req.parm = hw_stats;
734 vchnl_req.parm_len = sizeof(*hw_stats); 742 vchnl_req.parm_len = sizeof(*hw_stats);
735 vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg; 743 vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
744
736 ret_code = vchnl_vf_send_get_pe_stats_req(dev, &vchnl_req); 745 ret_code = vchnl_vf_send_get_pe_stats_req(dev, &vchnl_req);
737 if (!ret_code) { 746 if (ret_code) {
738 ret_code = i40iw_vf_wait_vchnl_resp(dev);
739 if (!ret_code)
740 ret_code = vchnl_req.ret_code;
741 else
742 dev->vchnl_up = false;
743 } else {
744 i40iw_debug(dev, I40IW_DEBUG_VIRT, 747 i40iw_debug(dev, I40IW_DEBUG_VIRT,
745 "%s Send message failed 0x%0x\n", __func__, ret_code); 748 "%s Send message failed 0x%0x\n", __func__, ret_code);
749 return ret_code;
746 } 750 }
747 return ret_code; 751 ret_code = i40iw_vf_wait_vchnl_resp(dev);
752 if (ret_code)
753 return ret_code;
754 else
755 return vchnl_req.ret_code;
748} 756}
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 99451d887266..8f7ad07915b0 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -96,7 +96,7 @@ struct ib_sa_mcmember_data {
96 u8 scope_join_state; 96 u8 scope_join_state;
97 u8 proxy_join; 97 u8 proxy_join;
98 u8 reserved[2]; 98 u8 reserved[2];
99}; 99} __packed __aligned(4);
100 100
101struct mcast_group { 101struct mcast_group {
102 struct ib_sa_mcmember_data rec; 102 struct ib_sa_mcmember_data rec;
@@ -747,14 +747,11 @@ static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx
747 __be64 tid, 747 __be64 tid,
748 union ib_gid *new_mgid) 748 union ib_gid *new_mgid)
749{ 749{
750 struct mcast_group *group = NULL, *cur_group; 750 struct mcast_group *group = NULL, *cur_group, *n;
751 struct mcast_req *req; 751 struct mcast_req *req;
752 struct list_head *pos;
753 struct list_head *n;
754 752
755 mutex_lock(&ctx->mcg_table_lock); 753 mutex_lock(&ctx->mcg_table_lock);
756 list_for_each_safe(pos, n, &ctx->mcg_mgid0_list) { 754 list_for_each_entry_safe(group, n, &ctx->mcg_mgid0_list, mgid0_list) {
757 group = list_entry(pos, struct mcast_group, mgid0_list);
758 mutex_lock(&group->lock); 755 mutex_lock(&group->lock);
759 if (group->last_req_tid == tid) { 756 if (group->last_req_tid == tid) {
760 if (memcmp(new_mgid, &mgid0, sizeof mgid0)) { 757 if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 1eca01cebe51..6c5ac5d8f32f 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -717,9 +717,8 @@ int mlx4_ib_dealloc_mw(struct ib_mw *mw);
717struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, 717struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
718 enum ib_mr_type mr_type, 718 enum ib_mr_type mr_type,
719 u32 max_num_sg); 719 u32 max_num_sg);
720int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, 720int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
721 struct scatterlist *sg, 721 unsigned int *sg_offset);
722 int sg_nents);
723int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); 722int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
724int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); 723int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
725struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, 724struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index ce0b5aa8eb9b..631272172a0b 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -528,9 +528,8 @@ static int mlx4_set_page(struct ib_mr *ibmr, u64 addr)
528 return 0; 528 return 0;
529} 529}
530 530
531int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, 531int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
532 struct scatterlist *sg, 532 unsigned int *sg_offset)
533 int sg_nents)
534{ 533{
535 struct mlx4_ib_mr *mr = to_mmr(ibmr); 534 struct mlx4_ib_mr *mr = to_mmr(ibmr);
536 int rc; 535 int rc;
@@ -541,7 +540,7 @@ int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
541 sizeof(u64) * mr->max_pages, 540 sizeof(u64) * mr->max_pages,
542 DMA_TO_DEVICE); 541 DMA_TO_DEVICE);
543 542
544 rc = ib_sg_to_pages(ibmr, sg, sg_nents, mlx4_set_page); 543 rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page);
545 544
546 ib_dma_sync_single_for_device(ibmr->device, mr->page_map, 545 ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
547 sizeof(u64) * mr->max_pages, 546 sizeof(u64) * mr->max_pages,
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index a00ba4418de9..dabcc65bd65e 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -879,7 +879,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
879 879
880 mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); 880 mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
881 cq->mcq.irqn = irqn; 881 cq->mcq.irqn = irqn;
882 cq->mcq.comp = mlx5_ib_cq_comp; 882 if (context)
883 cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
884 else
885 cq->mcq.comp = mlx5_ib_cq_comp;
883 cq->mcq.event = mlx5_ib_cq_event; 886 cq->mcq.event = mlx5_ib_cq_event;
884 887
885 INIT_LIST_HEAD(&cq->wc_list); 888 INIT_LIST_HEAD(&cq->wc_list);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 4cb81f68d850..c72797cd9e4f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -38,6 +38,9 @@
38#include <linux/dma-mapping.h> 38#include <linux/dma-mapping.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/io-mapping.h> 40#include <linux/io-mapping.h>
41#if defined(CONFIG_X86)
42#include <asm/pat.h>
43#endif
41#include <linux/sched.h> 44#include <linux/sched.h>
42#include <rdma/ib_user_verbs.h> 45#include <rdma/ib_user_verbs.h>
43#include <rdma/ib_addr.h> 46#include <rdma/ib_addr.h>
@@ -517,6 +520,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
517 props->device_cap_flags |= IB_DEVICE_UD_TSO; 520 props->device_cap_flags |= IB_DEVICE_UD_TSO;
518 } 521 }
519 522
523 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
524 MLX5_CAP_ETH(dev->mdev, scatter_fcs))
525 props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
526
520 props->vendor_part_id = mdev->pdev->device; 527 props->vendor_part_id = mdev->pdev->device;
521 props->hw_ver = mdev->pdev->revision; 528 props->hw_ver = mdev->pdev->revision;
522 529
@@ -1068,38 +1075,89 @@ static int get_index(unsigned long offset)
1068 return get_arg(offset); 1075 return get_arg(offset);
1069} 1076}
1070 1077
1078static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
1079{
1080 switch (cmd) {
1081 case MLX5_IB_MMAP_WC_PAGE:
1082 return "WC";
1083 case MLX5_IB_MMAP_REGULAR_PAGE:
1084 return "best effort WC";
1085 case MLX5_IB_MMAP_NC_PAGE:
1086 return "NC";
1087 default:
1088 return NULL;
1089 }
1090}
1091
1092static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
1093 struct vm_area_struct *vma, struct mlx5_uuar_info *uuari)
1094{
1095 int err;
1096 unsigned long idx;
1097 phys_addr_t pfn, pa;
1098 pgprot_t prot;
1099
1100 switch (cmd) {
1101 case MLX5_IB_MMAP_WC_PAGE:
1102/* Some architectures don't support WC memory */
1103#if defined(CONFIG_X86)
1104 if (!pat_enabled())
1105 return -EPERM;
1106#elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU)))
1107 return -EPERM;
1108#endif
1109 /* fall through */
1110 case MLX5_IB_MMAP_REGULAR_PAGE:
1111 /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */
1112 prot = pgprot_writecombine(vma->vm_page_prot);
1113 break;
1114 case MLX5_IB_MMAP_NC_PAGE:
1115 prot = pgprot_noncached(vma->vm_page_prot);
1116 break;
1117 default:
1118 return -EINVAL;
1119 }
1120
1121 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1122 return -EINVAL;
1123
1124 idx = get_index(vma->vm_pgoff);
1125 if (idx >= uuari->num_uars)
1126 return -EINVAL;
1127
1128 pfn = uar_index2pfn(dev, uuari->uars[idx].index);
1129 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
1130
1131 vma->vm_page_prot = prot;
1132 err = io_remap_pfn_range(vma, vma->vm_start, pfn,
1133 PAGE_SIZE, vma->vm_page_prot);
1134 if (err) {
1135 mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n",
1136 err, vma->vm_start, &pfn, mmap_cmd2str(cmd));
1137 return -EAGAIN;
1138 }
1139
1140 pa = pfn << PAGE_SHIFT;
1141 mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
1142 vma->vm_start, &pa);
1143
1144 return 0;
1145}
1146
1071static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) 1147static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
1072{ 1148{
1073 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 1149 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1074 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 1150 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1075 struct mlx5_uuar_info *uuari = &context->uuari; 1151 struct mlx5_uuar_info *uuari = &context->uuari;
1076 unsigned long command; 1152 unsigned long command;
1077 unsigned long idx;
1078 phys_addr_t pfn; 1153 phys_addr_t pfn;
1079 1154
1080 command = get_command(vma->vm_pgoff); 1155 command = get_command(vma->vm_pgoff);
1081 switch (command) { 1156 switch (command) {
1157 case MLX5_IB_MMAP_WC_PAGE:
1158 case MLX5_IB_MMAP_NC_PAGE:
1082 case MLX5_IB_MMAP_REGULAR_PAGE: 1159 case MLX5_IB_MMAP_REGULAR_PAGE:
1083 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1160 return uar_mmap(dev, command, vma, uuari);
1084 return -EINVAL;
1085
1086 idx = get_index(vma->vm_pgoff);
1087 if (idx >= uuari->num_uars)
1088 return -EINVAL;
1089
1090 pfn = uar_index2pfn(dev, uuari->uars[idx].index);
1091 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
1092 (unsigned long long)pfn);
1093
1094 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
1095 if (io_remap_pfn_range(vma, vma->vm_start, pfn,
1096 PAGE_SIZE, vma->vm_page_prot))
1097 return -EAGAIN;
1098
1099 mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n",
1100 vma->vm_start,
1101 (unsigned long long)pfn << PAGE_SHIFT);
1102 break;
1103 1161
1104 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: 1162 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
1105 return -ENOSYS; 1163 return -ENOSYS;
@@ -1108,7 +1166,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
1108 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1166 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1109 return -EINVAL; 1167 return -EINVAL;
1110 1168
1111 if (vma->vm_flags & (VM_WRITE | VM_EXEC)) 1169 if (vma->vm_flags & VM_WRITE)
1112 return -EPERM; 1170 return -EPERM;
1113 1171
1114 /* Don't expose to user-space information it shouldn't have */ 1172 /* Don't expose to user-space information it shouldn't have */
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index b46c25542a7c..c4a9825828bc 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -70,6 +70,8 @@ enum {
70enum mlx5_ib_mmap_cmd { 70enum mlx5_ib_mmap_cmd {
71 MLX5_IB_MMAP_REGULAR_PAGE = 0, 71 MLX5_IB_MMAP_REGULAR_PAGE = 0,
72 MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, 72 MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
73 MLX5_IB_MMAP_WC_PAGE = 2,
74 MLX5_IB_MMAP_NC_PAGE = 3,
73 /* 5 is chosen in order to be compatible with old versions of libmlx5 */ 75 /* 5 is chosen in order to be compatible with old versions of libmlx5 */
74 MLX5_IB_MMAP_CORE_CLOCK = 5, 76 MLX5_IB_MMAP_CORE_CLOCK = 5,
75}; 77};
@@ -356,6 +358,7 @@ enum mlx5_ib_qp_flags {
356 MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5, 358 MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5,
357 /* QP uses 1 as its source QP number */ 359 /* QP uses 1 as its source QP number */
358 MLX5_IB_QP_SQPN_QP1 = 1 << 6, 360 MLX5_IB_QP_SQPN_QP1 = 1 << 6,
361 MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
359}; 362};
360 363
361struct mlx5_umr_wr { 364struct mlx5_umr_wr {
@@ -712,9 +715,8 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
712struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, 715struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
713 enum ib_mr_type mr_type, 716 enum ib_mr_type mr_type,
714 u32 max_num_sg); 717 u32 max_num_sg);
715int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, 718int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
716 struct scatterlist *sg, 719 unsigned int *sg_offset);
717 int sg_nents);
718int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 720int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
719 const struct ib_wc *in_wc, const struct ib_grh *in_grh, 721 const struct ib_wc *in_wc, const struct ib_grh *in_grh,
720 const struct ib_mad_hdr *in, size_t in_mad_size, 722 const struct ib_mad_hdr *in, size_t in_mad_size,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 4d5bff151cdf..8cf2ce50511f 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1751,26 +1751,33 @@ done:
1751static int 1751static int
1752mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, 1752mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
1753 struct scatterlist *sgl, 1753 struct scatterlist *sgl,
1754 unsigned short sg_nents) 1754 unsigned short sg_nents,
1755 unsigned int *sg_offset_p)
1755{ 1756{
1756 struct scatterlist *sg = sgl; 1757 struct scatterlist *sg = sgl;
1757 struct mlx5_klm *klms = mr->descs; 1758 struct mlx5_klm *klms = mr->descs;
1759 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1758 u32 lkey = mr->ibmr.pd->local_dma_lkey; 1760 u32 lkey = mr->ibmr.pd->local_dma_lkey;
1759 int i; 1761 int i;
1760 1762
1761 mr->ibmr.iova = sg_dma_address(sg); 1763 mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
1762 mr->ibmr.length = 0; 1764 mr->ibmr.length = 0;
1763 mr->ndescs = sg_nents; 1765 mr->ndescs = sg_nents;
1764 1766
1765 for_each_sg(sgl, sg, sg_nents, i) { 1767 for_each_sg(sgl, sg, sg_nents, i) {
1766 if (unlikely(i > mr->max_descs)) 1768 if (unlikely(i > mr->max_descs))
1767 break; 1769 break;
1768 klms[i].va = cpu_to_be64(sg_dma_address(sg)); 1770 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
1769 klms[i].bcount = cpu_to_be32(sg_dma_len(sg)); 1771 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
1770 klms[i].key = cpu_to_be32(lkey); 1772 klms[i].key = cpu_to_be32(lkey);
1771 mr->ibmr.length += sg_dma_len(sg); 1773 mr->ibmr.length += sg_dma_len(sg);
1774
1775 sg_offset = 0;
1772 } 1776 }
1773 1777
1778 if (sg_offset_p)
1779 *sg_offset_p = sg_offset;
1780
1774 return i; 1781 return i;
1775} 1782}
1776 1783
@@ -1788,9 +1795,8 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
1788 return 0; 1795 return 0;
1789} 1796}
1790 1797
1791int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, 1798int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1792 struct scatterlist *sg, 1799 unsigned int *sg_offset)
1793 int sg_nents)
1794{ 1800{
1795 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1801 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1796 int n; 1802 int n;
@@ -1802,9 +1808,10 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
1802 DMA_TO_DEVICE); 1808 DMA_TO_DEVICE);
1803 1809
1804 if (mr->access_mode == MLX5_ACCESS_MODE_KLM) 1810 if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
1805 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents); 1811 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
1806 else 1812 else
1807 n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); 1813 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
1814 mlx5_set_page);
1808 1815
1809 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, 1816 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
1810 mr->desc_size * mr->max_descs, 1817 mr->desc_size * mr->max_descs,
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 8dee8bc1e0fe..504117657d41 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1028,6 +1028,7 @@ static int get_rq_pas_size(void *qpc)
1028static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, 1028static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
1029 struct mlx5_ib_rq *rq, void *qpin) 1029 struct mlx5_ib_rq *rq, void *qpin)
1030{ 1030{
1031 struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
1031 __be64 *pas; 1032 __be64 *pas;
1032 __be64 *qp_pas; 1033 __be64 *qp_pas;
1033 void *in; 1034 void *in;
@@ -1051,6 +1052,9 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
1051 MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index)); 1052 MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
1052 MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv)); 1053 MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
1053 1054
1055 if (mqp->flags & MLX5_IB_QP_CAP_SCATTER_FCS)
1056 MLX5_SET(rqc, rqc, scatter_fcs, 1);
1057
1054 wq = MLX5_ADDR_OF(rqc, rqc, wq); 1058 wq = MLX5_ADDR_OF(rqc, rqc, wq);
1055 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); 1059 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1056 MLX5_SET(wq, wq, end_padding_mode, 1060 MLX5_SET(wq, wq, end_padding_mode,
@@ -1136,11 +1140,12 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1136 } 1140 }
1137 1141
1138 if (qp->rq.wqe_cnt) { 1142 if (qp->rq.wqe_cnt) {
1143 rq->base.container_mibqp = qp;
1144
1139 err = create_raw_packet_qp_rq(dev, rq, in); 1145 err = create_raw_packet_qp_rq(dev, rq, in);
1140 if (err) 1146 if (err)
1141 goto err_destroy_sq; 1147 goto err_destroy_sq;
1142 1148
1143 rq->base.container_mibqp = qp;
1144 1149
1145 err = create_raw_packet_qp_tir(dev, rq, tdn); 1150 err = create_raw_packet_qp_tir(dev, rq, tdn);
1146 if (err) 1151 if (err)
@@ -1252,6 +1257,19 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1252 return -EOPNOTSUPP; 1257 return -EOPNOTSUPP;
1253 } 1258 }
1254 1259
1260 if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) {
1261 if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
1262 mlx5_ib_dbg(dev, "Scatter FCS is supported only for Raw Packet QPs");
1263 return -EOPNOTSUPP;
1264 }
1265 if (!MLX5_CAP_GEN(dev->mdev, eth_net_offloads) ||
1266 !MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
1267 mlx5_ib_dbg(dev, "Scatter FCS isn't supported\n");
1268 return -EOPNOTSUPP;
1269 }
1270 qp->flags |= MLX5_IB_QP_CAP_SCATTER_FCS;
1271 }
1272
1255 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 1273 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
1256 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; 1274 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
1257 1275
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index 6d3a169c049b..37331e2fdc5f 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -44,6 +44,7 @@
44#include <linux/ip.h> 44#include <linux/ip.h>
45#include <linux/tcp.h> 45#include <linux/tcp.h>
46#include <linux/init.h> 46#include <linux/init.h>
47#include <linux/kernel.h>
47 48
48#include <asm/io.h> 49#include <asm/io.h>
49#include <asm/irq.h> 50#include <asm/irq.h>
@@ -903,70 +904,15 @@ void nes_clc(unsigned long parm)
903 */ 904 */
904void nes_dump_mem(unsigned int dump_debug_level, void *addr, int length) 905void nes_dump_mem(unsigned int dump_debug_level, void *addr, int length)
905{ 906{
906 char xlate[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
907 'a', 'b', 'c', 'd', 'e', 'f'};
908 char *ptr;
909 char hex_buf[80];
910 char ascii_buf[20];
911 int num_char;
912 int num_ascii;
913 int num_hex;
914
915 if (!(nes_debug_level & dump_debug_level)) { 907 if (!(nes_debug_level & dump_debug_level)) {
916 return; 908 return;
917 } 909 }
918 910
919 ptr = addr;
920 if (length > 0x100) { 911 if (length > 0x100) {
921 nes_debug(dump_debug_level, "Length truncated from %x to %x\n", length, 0x100); 912 nes_debug(dump_debug_level, "Length truncated from %x to %x\n", length, 0x100);
922 length = 0x100; 913 length = 0x100;
923 } 914 }
924 nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", ptr, length, length); 915 nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", addr, length, length);
925
926 memset(ascii_buf, 0, 20);
927 memset(hex_buf, 0, 80);
928
929 num_ascii = 0;
930 num_hex = 0;
931 for (num_char = 0; num_char < length; num_char++) {
932 if (num_ascii == 8) {
933 ascii_buf[num_ascii++] = ' ';
934 hex_buf[num_hex++] = '-';
935 hex_buf[num_hex++] = ' ';
936 }
937
938 if (*ptr < 0x20 || *ptr > 0x7e)
939 ascii_buf[num_ascii++] = '.';
940 else
941 ascii_buf[num_ascii++] = *ptr;
942 hex_buf[num_hex++] = xlate[((*ptr & 0xf0) >> 4)];
943 hex_buf[num_hex++] = xlate[*ptr & 0x0f];
944 hex_buf[num_hex++] = ' ';
945 ptr++;
946
947 if (num_ascii >= 17) {
948 /* output line and reset */
949 nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf);
950 memset(ascii_buf, 0, 20);
951 memset(hex_buf, 0, 80);
952 num_ascii = 0;
953 num_hex = 0;
954 }
955 }
956 916
957 /* output the rest */ 917 print_hex_dump(KERN_ERR, PFX, DUMP_PREFIX_NONE, 16, 1, addr, length, true);
958 if (num_ascii) {
959 while (num_ascii < 17) {
960 if (num_ascii == 8) {
961 hex_buf[num_hex++] = ' ';
962 hex_buf[num_hex++] = ' ';
963 }
964 hex_buf[num_hex++] = ' ';
965 hex_buf[num_hex++] = ' ';
966 hex_buf[num_hex++] = ' ';
967 num_ascii++;
968 }
969
970 nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf);
971 }
972} 918}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index fba69a39a7eb..464d6da5fe91 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -402,15 +402,14 @@ static int nes_set_page(struct ib_mr *ibmr, u64 addr)
402 return 0; 402 return 0;
403} 403}
404 404
405static int nes_map_mr_sg(struct ib_mr *ibmr, 405static int nes_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
406 struct scatterlist *sg, 406 int sg_nents, unsigned int *sg_offset)
407 int sg_nents)
408{ 407{
409 struct nes_mr *nesmr = to_nesmr(ibmr); 408 struct nes_mr *nesmr = to_nesmr(ibmr);
410 409
411 nesmr->npages = 0; 410 nesmr->npages = 0;
412 411
413 return ib_sg_to_pages(ibmr, sg, sg_nents, nes_set_page); 412 return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, nes_set_page);
414} 413}
415 414
416/** 415/**
@@ -981,7 +980,7 @@ static int nes_setup_mmap_qp(struct nes_qp *nesqp, struct nes_vnic *nesvnic,
981/** 980/**
982 * nes_free_qp_mem() is to free up the qp's pci_alloc_consistent() memory. 981 * nes_free_qp_mem() is to free up the qp's pci_alloc_consistent() memory.
983 */ 982 */
984static inline void nes_free_qp_mem(struct nes_device *nesdev, 983static void nes_free_qp_mem(struct nes_device *nesdev,
985 struct nes_qp *nesqp, int virt_wqs) 984 struct nes_qp *nesqp, int virt_wqs)
986{ 985{
987 unsigned long flags; 986 unsigned long flags;
@@ -1315,6 +1314,8 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
1315 nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type); 1314 nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type);
1316 return ERR_PTR(-EINVAL); 1315 return ERR_PTR(-EINVAL);
1317 } 1316 }
1317 init_completion(&nesqp->sq_drained);
1318 init_completion(&nesqp->rq_drained);
1318 1319
1319 nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR); 1320 nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
1320 init_timer(&nesqp->terminate_timer); 1321 init_timer(&nesqp->terminate_timer);
@@ -3452,6 +3453,29 @@ out:
3452 return err; 3453 return err;
3453} 3454}
3454 3455
3456/**
3457 * nes_drain_sq - drain sq
3458 * @ibqp: pointer to ibqp
3459 */
3460static void nes_drain_sq(struct ib_qp *ibqp)
3461{
3462 struct nes_qp *nesqp = to_nesqp(ibqp);
3463
3464 if (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head)
3465 wait_for_completion(&nesqp->sq_drained);
3466}
3467
3468/**
3469 * nes_drain_rq - drain rq
3470 * @ibqp: pointer to ibqp
3471 */
3472static void nes_drain_rq(struct ib_qp *ibqp)
3473{
3474 struct nes_qp *nesqp = to_nesqp(ibqp);
3475
3476 if (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)
3477 wait_for_completion(&nesqp->rq_drained);
3478}
3455 3479
3456/** 3480/**
3457 * nes_poll_cq 3481 * nes_poll_cq
@@ -3582,6 +3606,13 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
3582 } 3606 }
3583 } 3607 }
3584 3608
3609 if (nesqp->iwarp_state > NES_CQP_QP_IWARP_STATE_RTS) {
3610 if (nesqp->hwqp.sq_tail == nesqp->hwqp.sq_head)
3611 complete(&nesqp->sq_drained);
3612 if (nesqp->hwqp.rq_tail == nesqp->hwqp.rq_head)
3613 complete(&nesqp->rq_drained);
3614 }
3615
3585 entry->wr_id = wrid; 3616 entry->wr_id = wrid;
3586 entry++; 3617 entry++;
3587 cqe_count++; 3618 cqe_count++;
@@ -3754,6 +3785,8 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
3754 nesibdev->ibdev.req_notify_cq = nes_req_notify_cq; 3785 nesibdev->ibdev.req_notify_cq = nes_req_notify_cq;
3755 nesibdev->ibdev.post_send = nes_post_send; 3786 nesibdev->ibdev.post_send = nes_post_send;
3756 nesibdev->ibdev.post_recv = nes_post_recv; 3787 nesibdev->ibdev.post_recv = nes_post_recv;
3788 nesibdev->ibdev.drain_sq = nes_drain_sq;
3789 nesibdev->ibdev.drain_rq = nes_drain_rq;
3757 3790
3758 nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL); 3791 nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL);
3759 if (nesibdev->ibdev.iwcm == NULL) { 3792 if (nesibdev->ibdev.iwcm == NULL) {
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 70290883d067..e02a5662dc20 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -189,6 +189,8 @@ struct nes_qp {
189 u8 pau_pending; 189 u8 pau_pending;
190 u8 pau_state; 190 u8 pau_state;
191 __u64 nesuqp_addr; 191 __u64 nesuqp_addr;
192 struct completion sq_drained;
193 struct completion rq_drained;
192}; 194};
193 195
194struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, 196struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index a8496a18e20d..b1a3d91fe8b9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -3081,13 +3081,12 @@ static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
3081 return 0; 3081 return 0;
3082} 3082}
3083 3083
3084int ocrdma_map_mr_sg(struct ib_mr *ibmr, 3084int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
3085 struct scatterlist *sg, 3085 unsigned int *sg_offset)
3086 int sg_nents)
3087{ 3086{
3088 struct ocrdma_mr *mr = get_ocrdma_mr(ibmr); 3087 struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
3089 3088
3090 mr->npages = 0; 3089 mr->npages = 0;
3091 3090
3092 return ib_sg_to_pages(ibmr, sg, sg_nents, ocrdma_set_page); 3091 return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, ocrdma_set_page);
3093} 3092}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 8b517fd36779..704ef1e9271b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -122,8 +122,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
122struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, 122struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
123 enum ib_mr_type mr_type, 123 enum ib_mr_type mr_type,
124 u32 max_num_sg); 124 u32 max_num_sg);
125int ocrdma_map_mr_sg(struct ib_mr *ibmr, 125int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
126 struct scatterlist *sg, 126 unsigned int *sg_offset);
127 int sg_nents);
128 127
129#endif /* __OCRDMA_VERBS_H__ */ 128#endif /* __OCRDMA_VERBS_H__ */
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 3f062f0dd9d8..f253111e682e 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1090,7 +1090,7 @@ void qib_free_devdata(struct qib_devdata *dd)
1090 qib_dbg_ibdev_exit(&dd->verbs_dev); 1090 qib_dbg_ibdev_exit(&dd->verbs_dev);
1091#endif 1091#endif
1092 free_percpu(dd->int_counter); 1092 free_percpu(dd->int_counter);
1093 ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); 1093 rvt_dealloc_device(&dd->verbs_dev.rdi);
1094} 1094}
1095 1095
1096u64 qib_int_counter(struct qib_devdata *dd) 1096u64 qib_int_counter(struct qib_devdata *dd)
@@ -1183,7 +1183,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
1183bail: 1183bail:
1184 if (!list_empty(&dd->list)) 1184 if (!list_empty(&dd->list))
1185 list_del_init(&dd->list); 1185 list_del_init(&dd->list);
1186 ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); 1186 rvt_dealloc_device(&dd->verbs_dev.rdi);
1187 return ERR_PTR(ret); 1187 return ERR_PTR(ret);
1188} 1188}
1189 1189
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 9088e26d3ac8..444028a3582a 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -230,7 +230,7 @@ bail:
230 * 230 *
231 * Return 1 if constructed; otherwise, return 0. 231 * Return 1 if constructed; otherwise, return 0.
232 */ 232 */
233int qib_make_rc_req(struct rvt_qp *qp) 233int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
234{ 234{
235 struct qib_qp_priv *priv = qp->priv; 235 struct qib_qp_priv *priv = qp->priv;
236 struct qib_ibdev *dev = to_idev(qp->ibqp.device); 236 struct qib_ibdev *dev = to_idev(qp->ibqp.device);
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index a5f07a64b228..b67779256297 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -739,7 +739,7 @@ void qib_do_send(struct rvt_qp *qp)
739 struct qib_qp_priv *priv = qp->priv; 739 struct qib_qp_priv *priv = qp->priv;
740 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 740 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
741 struct qib_pportdata *ppd = ppd_from_ibp(ibp); 741 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
742 int (*make_req)(struct rvt_qp *qp); 742 int (*make_req)(struct rvt_qp *qp, unsigned long *flags);
743 unsigned long flags; 743 unsigned long flags;
744 744
745 if ((qp->ibqp.qp_type == IB_QPT_RC || 745 if ((qp->ibqp.qp_type == IB_QPT_RC ||
@@ -781,7 +781,7 @@ void qib_do_send(struct rvt_qp *qp)
781 qp->s_hdrwords = 0; 781 qp->s_hdrwords = 0;
782 spin_lock_irqsave(&qp->s_lock, flags); 782 spin_lock_irqsave(&qp->s_lock, flags);
783 } 783 }
784 } while (make_req(qp)); 784 } while (make_req(qp, &flags));
785 785
786 spin_unlock_irqrestore(&qp->s_lock, flags); 786 spin_unlock_irqrestore(&qp->s_lock, flags);
787} 787}
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 7bdbc79ceaa3..1d61bd04f449 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -45,7 +45,7 @@
45 * 45 *
46 * Return 1 if constructed; otherwise, return 0. 46 * Return 1 if constructed; otherwise, return 0.
47 */ 47 */
48int qib_make_uc_req(struct rvt_qp *qp) 48int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags)
49{ 49{
50 struct qib_qp_priv *priv = qp->priv; 50 struct qib_qp_priv *priv = qp->priv;
51 struct qib_other_headers *ohdr; 51 struct qib_other_headers *ohdr;
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index d9502137de62..846e6c726df7 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -238,7 +238,7 @@ drop:
238 * 238 *
239 * Return 1 if constructed; otherwise, return 0. 239 * Return 1 if constructed; otherwise, return 0.
240 */ 240 */
241int qib_make_ud_req(struct rvt_qp *qp) 241int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
242{ 242{
243 struct qib_qp_priv *priv = qp->priv; 243 struct qib_qp_priv *priv = qp->priv;
244 struct qib_other_headers *ohdr; 244 struct qib_other_headers *ohdr;
@@ -294,7 +294,7 @@ int qib_make_ud_req(struct rvt_qp *qp)
294 this_cpu_inc(ibp->pmastats->n_unicast_xmit); 294 this_cpu_inc(ibp->pmastats->n_unicast_xmit);
295 lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); 295 lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
296 if (unlikely(lid == ppd->lid)) { 296 if (unlikely(lid == ppd->lid)) {
297 unsigned long flags; 297 unsigned long tflags = *flags;
298 /* 298 /*
299 * If DMAs are in progress, we can't generate 299 * If DMAs are in progress, we can't generate
300 * a completion for the loopback packet since 300 * a completion for the loopback packet since
@@ -307,10 +307,10 @@ int qib_make_ud_req(struct rvt_qp *qp)
307 goto bail; 307 goto bail;
308 } 308 }
309 qp->s_cur = next_cur; 309 qp->s_cur = next_cur;
310 local_irq_save(flags); 310 spin_unlock_irqrestore(&qp->s_lock, tflags);
311 spin_unlock_irqrestore(&qp->s_lock, flags);
312 qib_ud_loopback(qp, wqe); 311 qib_ud_loopback(qp, wqe);
313 spin_lock_irqsave(&qp->s_lock, flags); 312 spin_lock_irqsave(&qp->s_lock, tflags);
313 *flags = tflags;
314 qib_send_complete(qp, wqe, IB_WC_SUCCESS); 314 qib_send_complete(qp, wqe, IB_WC_SUCCESS);
315 goto done; 315 goto done;
316 } 316 }
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 4b76a8d59337..6888f03c6d61 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -430,11 +430,11 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
430 430
431void qib_send_rc_ack(struct rvt_qp *qp); 431void qib_send_rc_ack(struct rvt_qp *qp);
432 432
433int qib_make_rc_req(struct rvt_qp *qp); 433int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags);
434 434
435int qib_make_uc_req(struct rvt_qp *qp); 435int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags);
436 436
437int qib_make_ud_req(struct rvt_qp *qp); 437int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags);
438 438
439int qib_register_ib_device(struct qib_devdata *); 439int qib_register_ib_device(struct qib_devdata *);
440 440
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index a9e3bcc522c4..0f12c211c385 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -829,13 +829,13 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
829 case IB_QPT_SMI: 829 case IB_QPT_SMI:
830 case IB_QPT_GSI: 830 case IB_QPT_GSI:
831 case IB_QPT_UD: 831 case IB_QPT_UD:
832 qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK; 832 qp->allowed_ops = IB_OPCODE_UD;
833 break; 833 break;
834 case IB_QPT_RC: 834 case IB_QPT_RC:
835 qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK; 835 qp->allowed_ops = IB_OPCODE_RC;
836 break; 836 break;
837 case IB_QPT_UC: 837 case IB_QPT_UC:
838 qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK; 838 qp->allowed_ops = IB_OPCODE_UC;
839 break; 839 break;
840 default: 840 default:
841 ret = ERR_PTR(-EINVAL); 841 ret = ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 6caf5272ba1f..e1cc2cc42f25 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -106,6 +106,19 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports)
106} 106}
107EXPORT_SYMBOL(rvt_alloc_device); 107EXPORT_SYMBOL(rvt_alloc_device);
108 108
109/**
110 * rvt_dealloc_device - deallocate rdi
111 * @rdi: structure to free
112 *
113 * Free a structure allocated with rvt_alloc_device()
114 */
115void rvt_dealloc_device(struct rvt_dev_info *rdi)
116{
117 kfree(rdi->ports);
118 ib_dealloc_device(&rdi->ibdev);
119}
120EXPORT_SYMBOL(rvt_dealloc_device);
121
109static int rvt_query_device(struct ib_device *ibdev, 122static int rvt_query_device(struct ib_device *ibdev,
110 struct ib_device_attr *props, 123 struct ib_device_attr *props,
111 struct ib_udata *uhw) 124 struct ib_udata *uhw)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index a53fa5fc0dec..1502199c8e56 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -36,6 +36,27 @@
36 36
37#include "ipoib.h" 37#include "ipoib.h"
38 38
39struct ipoib_stats {
40 char stat_string[ETH_GSTRING_LEN];
41 int stat_offset;
42};
43
44#define IPOIB_NETDEV_STAT(m) { \
45 .stat_string = #m, \
46 .stat_offset = offsetof(struct rtnl_link_stats64, m) }
47
48static const struct ipoib_stats ipoib_gstrings_stats[] = {
49 IPOIB_NETDEV_STAT(rx_packets),
50 IPOIB_NETDEV_STAT(tx_packets),
51 IPOIB_NETDEV_STAT(rx_bytes),
52 IPOIB_NETDEV_STAT(tx_bytes),
53 IPOIB_NETDEV_STAT(tx_errors),
54 IPOIB_NETDEV_STAT(rx_dropped),
55 IPOIB_NETDEV_STAT(tx_dropped)
56};
57
58#define IPOIB_GLOBAL_STATS_LEN ARRAY_SIZE(ipoib_gstrings_stats)
59
39static void ipoib_get_drvinfo(struct net_device *netdev, 60static void ipoib_get_drvinfo(struct net_device *netdev,
40 struct ethtool_drvinfo *drvinfo) 61 struct ethtool_drvinfo *drvinfo)
41{ 62{
@@ -92,11 +113,57 @@ static int ipoib_set_coalesce(struct net_device *dev,
92 113
93 return 0; 114 return 0;
94} 115}
116static void ipoib_get_ethtool_stats(struct net_device *dev,
117 struct ethtool_stats __always_unused *stats,
118 u64 *data)
119{
120 int i;
121 struct net_device_stats *net_stats = &dev->stats;
122 u8 *p = (u8 *)net_stats;
123
124 for (i = 0; i < IPOIB_GLOBAL_STATS_LEN; i++)
125 data[i] = *(u64 *)(p + ipoib_gstrings_stats[i].stat_offset);
126
127}
128static void ipoib_get_strings(struct net_device __always_unused *dev,
129 u32 stringset, u8 *data)
130{
131 u8 *p = data;
132 int i;
133
134 switch (stringset) {
135 case ETH_SS_STATS:
136 for (i = 0; i < IPOIB_GLOBAL_STATS_LEN; i++) {
137 memcpy(p, ipoib_gstrings_stats[i].stat_string,
138 ETH_GSTRING_LEN);
139 p += ETH_GSTRING_LEN;
140 }
141 break;
142 case ETH_SS_TEST:
143 default:
144 break;
145 }
146}
147static int ipoib_get_sset_count(struct net_device __always_unused *dev,
148 int sset)
149{
150 switch (sset) {
151 case ETH_SS_STATS:
152 return IPOIB_GLOBAL_STATS_LEN;
153 case ETH_SS_TEST:
154 default:
155 break;
156 }
157 return -EOPNOTSUPP;
158}
95 159
96static const struct ethtool_ops ipoib_ethtool_ops = { 160static const struct ethtool_ops ipoib_ethtool_ops = {
97 .get_drvinfo = ipoib_get_drvinfo, 161 .get_drvinfo = ipoib_get_drvinfo,
98 .get_coalesce = ipoib_get_coalesce, 162 .get_coalesce = ipoib_get_coalesce,
99 .set_coalesce = ipoib_set_coalesce, 163 .set_coalesce = ipoib_set_coalesce,
164 .get_strings = ipoib_get_strings,
165 .get_ethtool_stats = ipoib_get_ethtool_stats,
166 .get_sset_count = ipoib_get_sset_count,
100}; 167};
101 168
102void ipoib_set_ethtool_ops(struct net_device *dev) 169void ipoib_set_ethtool_ops(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 3643d559ba31..418e5a1c8744 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -51,8 +51,6 @@ MODULE_PARM_DESC(data_debug_level,
51 "Enable data path debug tracing if > 0"); 51 "Enable data path debug tracing if > 0");
52#endif 52#endif
53 53
54static DEFINE_MUTEX(pkey_mutex);
55
56struct ipoib_ah *ipoib_create_ah(struct net_device *dev, 54struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
57 struct ib_pd *pd, struct ib_ah_attr *attr) 55 struct ib_pd *pd, struct ib_ah_attr *attr)
58{ 56{
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 9a391cc5b9b3..90be56893414 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -236,7 +236,7 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
236 page_vec->npages = 0; 236 page_vec->npages = 0;
237 page_vec->fake_mr.page_size = SIZE_4K; 237 page_vec->fake_mr.page_size = SIZE_4K;
238 plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg, 238 plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg,
239 mem->size, iser_set_page); 239 mem->size, NULL, iser_set_page);
240 if (unlikely(plen < mem->size)) { 240 if (unlikely(plen < mem->size)) {
241 iser_err("page vec too short to hold this SG\n"); 241 iser_err("page vec too short to hold this SG\n");
242 iser_data_buf_dump(mem, device->ib_device); 242 iser_data_buf_dump(mem, device->ib_device);
@@ -446,7 +446,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
446 446
447 ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); 447 ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
448 448
449 n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K); 449 n = ib_map_mr_sg(mr, mem->sg, mem->size, NULL, SIZE_4K);
450 if (unlikely(n != mem->size)) { 450 if (unlikely(n != mem->size)) {
451 iser_err("failed to map sg (%d/%d)\n", 451 iser_err("failed to map sg (%d/%d)\n",
452 n, mem->size); 452 n, mem->size);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 411e4464ca23..897b5a4993e8 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -33,7 +33,8 @@
33 33
34#define ISERT_MAX_CONN 8 34#define ISERT_MAX_CONN 8
35#define ISER_MAX_RX_CQ_LEN (ISERT_QP_MAX_RECV_DTOS * ISERT_MAX_CONN) 35#define ISER_MAX_RX_CQ_LEN (ISERT_QP_MAX_RECV_DTOS * ISERT_MAX_CONN)
36#define ISER_MAX_TX_CQ_LEN (ISERT_QP_MAX_REQ_DTOS * ISERT_MAX_CONN) 36#define ISER_MAX_TX_CQ_LEN \
37 ((ISERT_QP_MAX_REQ_DTOS + ISCSI_DEF_XMIT_CMDS_MAX) * ISERT_MAX_CONN)
37#define ISER_MAX_CQ_LEN (ISER_MAX_RX_CQ_LEN + ISER_MAX_TX_CQ_LEN + \ 38#define ISER_MAX_CQ_LEN (ISER_MAX_RX_CQ_LEN + ISER_MAX_TX_CQ_LEN + \
38 ISERT_MAX_CONN) 39 ISERT_MAX_CONN)
39 40
@@ -46,14 +47,6 @@ static LIST_HEAD(device_list);
46static struct workqueue_struct *isert_comp_wq; 47static struct workqueue_struct *isert_comp_wq;
47static struct workqueue_struct *isert_release_wq; 48static struct workqueue_struct *isert_release_wq;
48 49
49static void
50isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
51static int
52isert_map_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn);
53static void
54isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
55static int
56isert_reg_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn);
57static int 50static int
58isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd); 51isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd);
59static int 52static int
@@ -142,6 +135,7 @@ isert_create_qp(struct isert_conn *isert_conn,
142 attr.recv_cq = comp->cq; 135 attr.recv_cq = comp->cq;
143 attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; 136 attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1;
144 attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; 137 attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
138 attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX;
145 attr.cap.max_send_sge = device->ib_device->attrs.max_sge; 139 attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
146 isert_conn->max_sge = min(device->ib_device->attrs.max_sge, 140 isert_conn->max_sge = min(device->ib_device->attrs.max_sge,
147 device->ib_device->attrs.max_sge_rd); 141 device->ib_device->attrs.max_sge_rd);
@@ -270,9 +264,9 @@ isert_alloc_comps(struct isert_device *device)
270 device->ib_device->num_comp_vectors)); 264 device->ib_device->num_comp_vectors));
271 265
272 isert_info("Using %d CQs, %s supports %d vectors support " 266 isert_info("Using %d CQs, %s supports %d vectors support "
273 "Fast registration %d pi_capable %d\n", 267 "pi_capable %d\n",
274 device->comps_used, device->ib_device->name, 268 device->comps_used, device->ib_device->name,
275 device->ib_device->num_comp_vectors, device->use_fastreg, 269 device->ib_device->num_comp_vectors,
276 device->pi_capable); 270 device->pi_capable);
277 271
278 device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp), 272 device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp),
@@ -313,18 +307,6 @@ isert_create_device_ib_res(struct isert_device *device)
313 isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge); 307 isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge);
314 isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd); 308 isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
315 309
316 /* asign function handlers */
317 if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
318 ib_dev->attrs.device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
319 device->use_fastreg = 1;
320 device->reg_rdma_mem = isert_reg_rdma;
321 device->unreg_rdma_mem = isert_unreg_rdma;
322 } else {
323 device->use_fastreg = 0;
324 device->reg_rdma_mem = isert_map_rdma;
325 device->unreg_rdma_mem = isert_unmap_cmd;
326 }
327
328 ret = isert_alloc_comps(device); 310 ret = isert_alloc_comps(device);
329 if (ret) 311 if (ret)
330 goto out; 312 goto out;
@@ -417,146 +399,6 @@ isert_device_get(struct rdma_cm_id *cma_id)
417} 399}
418 400
419static void 401static void
420isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
421{
422 struct fast_reg_descriptor *fr_desc, *tmp;
423 int i = 0;
424
425 if (list_empty(&isert_conn->fr_pool))
426 return;
427
428 isert_info("Freeing conn %p fastreg pool", isert_conn);
429
430 list_for_each_entry_safe(fr_desc, tmp,
431 &isert_conn->fr_pool, list) {
432 list_del(&fr_desc->list);
433 ib_dereg_mr(fr_desc->data_mr);
434 if (fr_desc->pi_ctx) {
435 ib_dereg_mr(fr_desc->pi_ctx->prot_mr);
436 ib_dereg_mr(fr_desc->pi_ctx->sig_mr);
437 kfree(fr_desc->pi_ctx);
438 }
439 kfree(fr_desc);
440 ++i;
441 }
442
443 if (i < isert_conn->fr_pool_size)
444 isert_warn("Pool still has %d regions registered\n",
445 isert_conn->fr_pool_size - i);
446}
447
448static int
449isert_create_pi_ctx(struct fast_reg_descriptor *desc,
450 struct ib_device *device,
451 struct ib_pd *pd)
452{
453 struct pi_context *pi_ctx;
454 int ret;
455
456 pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
457 if (!pi_ctx) {
458 isert_err("Failed to allocate pi context\n");
459 return -ENOMEM;
460 }
461
462 pi_ctx->prot_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
463 ISCSI_ISER_SG_TABLESIZE);
464 if (IS_ERR(pi_ctx->prot_mr)) {
465 isert_err("Failed to allocate prot frmr err=%ld\n",
466 PTR_ERR(pi_ctx->prot_mr));
467 ret = PTR_ERR(pi_ctx->prot_mr);
468 goto err_pi_ctx;
469 }
470 desc->ind |= ISERT_PROT_KEY_VALID;
471
472 pi_ctx->sig_mr = ib_alloc_mr(pd, IB_MR_TYPE_SIGNATURE, 2);
473 if (IS_ERR(pi_ctx->sig_mr)) {
474 isert_err("Failed to allocate signature enabled mr err=%ld\n",
475 PTR_ERR(pi_ctx->sig_mr));
476 ret = PTR_ERR(pi_ctx->sig_mr);
477 goto err_prot_mr;
478 }
479
480 desc->pi_ctx = pi_ctx;
481 desc->ind |= ISERT_SIG_KEY_VALID;
482 desc->ind &= ~ISERT_PROTECTED;
483
484 return 0;
485
486err_prot_mr:
487 ib_dereg_mr(pi_ctx->prot_mr);
488err_pi_ctx:
489 kfree(pi_ctx);
490
491 return ret;
492}
493
494static int
495isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd,
496 struct fast_reg_descriptor *fr_desc)
497{
498 fr_desc->data_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
499 ISCSI_ISER_SG_TABLESIZE);
500 if (IS_ERR(fr_desc->data_mr)) {
501 isert_err("Failed to allocate data frmr err=%ld\n",
502 PTR_ERR(fr_desc->data_mr));
503 return PTR_ERR(fr_desc->data_mr);
504 }
505 fr_desc->ind |= ISERT_DATA_KEY_VALID;
506
507 isert_dbg("Created fr_desc %p\n", fr_desc);
508
509 return 0;
510}
511
512static int
513isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
514{
515 struct fast_reg_descriptor *fr_desc;
516 struct isert_device *device = isert_conn->device;
517 struct se_session *se_sess = isert_conn->conn->sess->se_sess;
518 struct se_node_acl *se_nacl = se_sess->se_node_acl;
519 int i, ret, tag_num;
520 /*
521 * Setup the number of FRMRs based upon the number of tags
522 * available to session in iscsi_target_locate_portal().
523 */
524 tag_num = max_t(u32, ISCSIT_MIN_TAGS, se_nacl->queue_depth);
525 tag_num = (tag_num * 2) + ISCSIT_EXTRA_TAGS;
526
527 isert_conn->fr_pool_size = 0;
528 for (i = 0; i < tag_num; i++) {
529 fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);
530 if (!fr_desc) {
531 isert_err("Failed to allocate fast_reg descriptor\n");
532 ret = -ENOMEM;
533 goto err;
534 }
535
536 ret = isert_create_fr_desc(device->ib_device,
537 device->pd, fr_desc);
538 if (ret) {
539 isert_err("Failed to create fastreg descriptor err=%d\n",
540 ret);
541 kfree(fr_desc);
542 goto err;
543 }
544
545 list_add_tail(&fr_desc->list, &isert_conn->fr_pool);
546 isert_conn->fr_pool_size++;
547 }
548
549 isert_dbg("Creating conn %p fastreg pool size=%d",
550 isert_conn, isert_conn->fr_pool_size);
551
552 return 0;
553
554err:
555 isert_conn_free_fastreg_pool(isert_conn);
556 return ret;
557}
558
559static void
560isert_init_conn(struct isert_conn *isert_conn) 402isert_init_conn(struct isert_conn *isert_conn)
561{ 403{
562 isert_conn->state = ISER_CONN_INIT; 404 isert_conn->state = ISER_CONN_INIT;
@@ -565,8 +407,6 @@ isert_init_conn(struct isert_conn *isert_conn)
565 init_completion(&isert_conn->login_req_comp); 407 init_completion(&isert_conn->login_req_comp);
566 kref_init(&isert_conn->kref); 408 kref_init(&isert_conn->kref);
567 mutex_init(&isert_conn->mutex); 409 mutex_init(&isert_conn->mutex);
568 spin_lock_init(&isert_conn->pool_lock);
569 INIT_LIST_HEAD(&isert_conn->fr_pool);
570 INIT_WORK(&isert_conn->release_work, isert_release_work); 410 INIT_WORK(&isert_conn->release_work, isert_release_work);
571} 411}
572 412
@@ -739,9 +579,6 @@ isert_connect_release(struct isert_conn *isert_conn)
739 579
740 BUG_ON(!device); 580 BUG_ON(!device);
741 581
742 if (device->use_fastreg)
743 isert_conn_free_fastreg_pool(isert_conn);
744
745 isert_free_rx_descriptors(isert_conn); 582 isert_free_rx_descriptors(isert_conn);
746 if (isert_conn->cm_id) 583 if (isert_conn->cm_id)
747 rdma_destroy_id(isert_conn->cm_id); 584 rdma_destroy_id(isert_conn->cm_id);
@@ -1080,7 +917,6 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
1080{ 917{
1081 struct iser_tx_desc *tx_desc = &isert_cmd->tx_desc; 918 struct iser_tx_desc *tx_desc = &isert_cmd->tx_desc;
1082 919
1083 isert_cmd->iser_ib_op = ISER_IB_SEND;
1084 tx_desc->tx_cqe.done = isert_send_done; 920 tx_desc->tx_cqe.done = isert_send_done;
1085 send_wr->wr_cqe = &tx_desc->tx_cqe; 921 send_wr->wr_cqe = &tx_desc->tx_cqe;
1086 922
@@ -1160,16 +996,6 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
1160 } 996 }
1161 if (!login->login_failed) { 997 if (!login->login_failed) {
1162 if (login->login_complete) { 998 if (login->login_complete) {
1163 if (!conn->sess->sess_ops->SessionType &&
1164 isert_conn->device->use_fastreg) {
1165 ret = isert_conn_create_fastreg_pool(isert_conn);
1166 if (ret) {
1167 isert_err("Conn: %p failed to create"
1168 " fastreg pool\n", isert_conn);
1169 return ret;
1170 }
1171 }
1172
1173 ret = isert_alloc_rx_descriptors(isert_conn); 999 ret = isert_alloc_rx_descriptors(isert_conn);
1174 if (ret) 1000 if (ret)
1175 return ret; 1001 return ret;
@@ -1633,97 +1459,26 @@ isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc)
1633 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); 1459 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
1634} 1460}
1635 1461
1636static int
1637isert_map_data_buf(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
1638 struct scatterlist *sg, u32 nents, u32 length, u32 offset,
1639 enum iser_ib_op_code op, struct isert_data_buf *data)
1640{
1641 struct ib_device *ib_dev = isert_conn->cm_id->device;
1642
1643 data->dma_dir = op == ISER_IB_RDMA_WRITE ?
1644 DMA_TO_DEVICE : DMA_FROM_DEVICE;
1645
1646 data->len = length - offset;
1647 data->offset = offset;
1648 data->sg_off = data->offset / PAGE_SIZE;
1649
1650 data->sg = &sg[data->sg_off];
1651 data->nents = min_t(unsigned int, nents - data->sg_off,
1652 ISCSI_ISER_SG_TABLESIZE);
1653 data->len = min_t(unsigned int, data->len, ISCSI_ISER_SG_TABLESIZE *
1654 PAGE_SIZE);
1655
1656 data->dma_nents = ib_dma_map_sg(ib_dev, data->sg, data->nents,
1657 data->dma_dir);
1658 if (unlikely(!data->dma_nents)) {
1659 isert_err("Cmd: unable to dma map SGs %p\n", sg);
1660 return -EINVAL;
1661 }
1662
1663 isert_dbg("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
1664 isert_cmd, data->dma_nents, data->sg, data->nents, data->len);
1665
1666 return 0;
1667}
1668
1669static void 1462static void
1670isert_unmap_data_buf(struct isert_conn *isert_conn, struct isert_data_buf *data) 1463isert_rdma_rw_ctx_destroy(struct isert_cmd *cmd, struct isert_conn *conn)
1671{ 1464{
1672 struct ib_device *ib_dev = isert_conn->cm_id->device; 1465 struct se_cmd *se_cmd = &cmd->iscsi_cmd->se_cmd;
1673 1466 enum dma_data_direction dir = target_reverse_dma_direction(se_cmd);
1674 ib_dma_unmap_sg(ib_dev, data->sg, data->nents, data->dma_dir);
1675 memset(data, 0, sizeof(*data));
1676}
1677
1678
1679
1680static void
1681isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
1682{
1683 isert_dbg("Cmd %p\n", isert_cmd);
1684 1467
1685 if (isert_cmd->data.sg) { 1468 if (!cmd->rw.nr_ops)
1686 isert_dbg("Cmd %p unmap_sg op\n", isert_cmd); 1469 return;
1687 isert_unmap_data_buf(isert_conn, &isert_cmd->data);
1688 }
1689
1690 if (isert_cmd->rdma_wr) {
1691 isert_dbg("Cmd %p free send_wr\n", isert_cmd);
1692 kfree(isert_cmd->rdma_wr);
1693 isert_cmd->rdma_wr = NULL;
1694 }
1695
1696 if (isert_cmd->ib_sge) {
1697 isert_dbg("Cmd %p free ib_sge\n", isert_cmd);
1698 kfree(isert_cmd->ib_sge);
1699 isert_cmd->ib_sge = NULL;
1700 }
1701}
1702
1703static void
1704isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
1705{
1706 isert_dbg("Cmd %p\n", isert_cmd);
1707
1708 if (isert_cmd->fr_desc) {
1709 isert_dbg("Cmd %p free fr_desc %p\n", isert_cmd, isert_cmd->fr_desc);
1710 if (isert_cmd->fr_desc->ind & ISERT_PROTECTED) {
1711 isert_unmap_data_buf(isert_conn, &isert_cmd->prot);
1712 isert_cmd->fr_desc->ind &= ~ISERT_PROTECTED;
1713 }
1714 spin_lock_bh(&isert_conn->pool_lock);
1715 list_add_tail(&isert_cmd->fr_desc->list, &isert_conn->fr_pool);
1716 spin_unlock_bh(&isert_conn->pool_lock);
1717 isert_cmd->fr_desc = NULL;
1718 }
1719 1470
1720 if (isert_cmd->data.sg) { 1471 if (isert_prot_cmd(conn, se_cmd)) {
1721 isert_dbg("Cmd %p unmap_sg op\n", isert_cmd); 1472 rdma_rw_ctx_destroy_signature(&cmd->rw, conn->qp,
1722 isert_unmap_data_buf(isert_conn, &isert_cmd->data); 1473 conn->cm_id->port_num, se_cmd->t_data_sg,
1474 se_cmd->t_data_nents, se_cmd->t_prot_sg,
1475 se_cmd->t_prot_nents, dir);
1476 } else {
1477 rdma_rw_ctx_destroy(&cmd->rw, conn->qp, conn->cm_id->port_num,
1478 se_cmd->t_data_sg, se_cmd->t_data_nents, dir);
1723 } 1479 }
1724 1480
1725 isert_cmd->ib_sge = NULL; 1481 cmd->rw.nr_ops = 0;
1726 isert_cmd->rdma_wr = NULL;
1727} 1482}
1728 1483
1729static void 1484static void
@@ -1732,7 +1487,6 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err)
1732 struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; 1487 struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
1733 struct isert_conn *isert_conn = isert_cmd->conn; 1488 struct isert_conn *isert_conn = isert_cmd->conn;
1734 struct iscsi_conn *conn = isert_conn->conn; 1489 struct iscsi_conn *conn = isert_conn->conn;
1735 struct isert_device *device = isert_conn->device;
1736 struct iscsi_text_rsp *hdr; 1490 struct iscsi_text_rsp *hdr;
1737 1491
1738 isert_dbg("Cmd %p\n", isert_cmd); 1492 isert_dbg("Cmd %p\n", isert_cmd);
@@ -1760,7 +1514,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err)
1760 } 1514 }
1761 } 1515 }
1762 1516
1763 device->unreg_rdma_mem(isert_cmd, isert_conn); 1517 isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
1764 transport_generic_free_cmd(&cmd->se_cmd, 0); 1518 transport_generic_free_cmd(&cmd->se_cmd, 0);
1765 break; 1519 break;
1766 case ISCSI_OP_SCSI_TMFUNC: 1520 case ISCSI_OP_SCSI_TMFUNC:
@@ -1894,14 +1648,9 @@ isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
1894 1648
1895 isert_dbg("Cmd %p\n", isert_cmd); 1649 isert_dbg("Cmd %p\n", isert_cmd);
1896 1650
1897 if (isert_cmd->fr_desc && isert_cmd->fr_desc->ind & ISERT_PROTECTED) { 1651 ret = isert_check_pi_status(cmd, isert_cmd->rw.sig->sig_mr);
1898 ret = isert_check_pi_status(cmd, 1652 isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
1899 isert_cmd->fr_desc->pi_ctx->sig_mr);
1900 isert_cmd->fr_desc->ind &= ~ISERT_PROTECTED;
1901 }
1902 1653
1903 device->unreg_rdma_mem(isert_cmd, isert_conn);
1904 isert_cmd->rdma_wr_num = 0;
1905 if (ret) 1654 if (ret)
1906 transport_send_check_condition_and_sense(cmd, cmd->pi_err, 0); 1655 transport_send_check_condition_and_sense(cmd, cmd->pi_err, 0);
1907 else 1656 else
@@ -1929,16 +1678,12 @@ isert_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
1929 1678
1930 isert_dbg("Cmd %p\n", isert_cmd); 1679 isert_dbg("Cmd %p\n", isert_cmd);
1931 1680
1932 if (isert_cmd->fr_desc && isert_cmd->fr_desc->ind & ISERT_PROTECTED) {
1933 ret = isert_check_pi_status(se_cmd,
1934 isert_cmd->fr_desc->pi_ctx->sig_mr);
1935 isert_cmd->fr_desc->ind &= ~ISERT_PROTECTED;
1936 }
1937
1938 iscsit_stop_dataout_timer(cmd); 1681 iscsit_stop_dataout_timer(cmd);
1939 device->unreg_rdma_mem(isert_cmd, isert_conn); 1682
1940 cmd->write_data_done = isert_cmd->data.len; 1683 if (isert_prot_cmd(isert_conn, se_cmd))
1941 isert_cmd->rdma_wr_num = 0; 1684 ret = isert_check_pi_status(se_cmd, isert_cmd->rw.sig->sig_mr);
1685 isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
1686 cmd->write_data_done = 0;
1942 1687
1943 isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd); 1688 isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
1944 spin_lock_bh(&cmd->istate_lock); 1689 spin_lock_bh(&cmd->istate_lock);
@@ -2111,7 +1856,6 @@ isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
2111{ 1856{
2112 struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); 1857 struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
2113 struct isert_conn *isert_conn = conn->context; 1858 struct isert_conn *isert_conn = conn->context;
2114 struct isert_device *device = isert_conn->device;
2115 1859
2116 spin_lock_bh(&conn->cmd_lock); 1860 spin_lock_bh(&conn->cmd_lock);
2117 if (!list_empty(&cmd->i_conn_node)) 1861 if (!list_empty(&cmd->i_conn_node))
@@ -2120,8 +1864,7 @@ isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
2120 1864
2121 if (cmd->data_direction == DMA_TO_DEVICE) 1865 if (cmd->data_direction == DMA_TO_DEVICE)
2122 iscsit_stop_dataout_timer(cmd); 1866 iscsit_stop_dataout_timer(cmd);
2123 1867 isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
2124 device->unreg_rdma_mem(isert_cmd, isert_conn);
2125} 1868}
2126 1869
2127static enum target_prot_op 1870static enum target_prot_op
@@ -2274,234 +2017,6 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
2274 return isert_post_response(isert_conn, isert_cmd); 2017 return isert_post_response(isert_conn, isert_cmd);
2275} 2018}
2276 2019
2277static int
2278isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
2279 struct ib_sge *ib_sge, struct ib_rdma_wr *rdma_wr,
2280 u32 data_left, u32 offset)
2281{
2282 struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
2283 struct scatterlist *sg_start, *tmp_sg;
2284 struct isert_device *device = isert_conn->device;
2285 struct ib_device *ib_dev = device->ib_device;
2286 u32 sg_off, page_off;
2287 int i = 0, sg_nents;
2288
2289 sg_off = offset / PAGE_SIZE;
2290 sg_start = &cmd->se_cmd.t_data_sg[sg_off];
2291 sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge);
2292 page_off = offset % PAGE_SIZE;
2293
2294 rdma_wr->wr.sg_list = ib_sge;
2295 rdma_wr->wr.wr_cqe = &isert_cmd->tx_desc.tx_cqe;
2296
2297 /*
2298 * Perform mapping of TCM scatterlist memory ib_sge dma_addr.
2299 */
2300 for_each_sg(sg_start, tmp_sg, sg_nents, i) {
2301 isert_dbg("RDMA from SGL dma_addr: 0x%llx dma_len: %u, "
2302 "page_off: %u\n",
2303 (unsigned long long)tmp_sg->dma_address,
2304 tmp_sg->length, page_off);
2305
2306 ib_sge->addr = ib_sg_dma_address(ib_dev, tmp_sg) + page_off;
2307 ib_sge->length = min_t(u32, data_left,
2308 ib_sg_dma_len(ib_dev, tmp_sg) - page_off);
2309 ib_sge->lkey = device->pd->local_dma_lkey;
2310
2311 isert_dbg("RDMA ib_sge: addr: 0x%llx length: %u lkey: %x\n",
2312 ib_sge->addr, ib_sge->length, ib_sge->lkey);
2313 page_off = 0;
2314 data_left -= ib_sge->length;
2315 if (!data_left)
2316 break;
2317 ib_sge++;
2318 isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge);
2319 }
2320
2321 rdma_wr->wr.num_sge = ++i;
2322 isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n",
2323 rdma_wr->wr.sg_list, rdma_wr->wr.num_sge);
2324
2325 return rdma_wr->wr.num_sge;
2326}
2327
2328static int
2329isert_map_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn)
2330{
2331 struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
2332 struct se_cmd *se_cmd = &cmd->se_cmd;
2333 struct isert_conn *isert_conn = conn->context;
2334 struct isert_data_buf *data = &isert_cmd->data;
2335 struct ib_rdma_wr *rdma_wr;
2336 struct ib_sge *ib_sge;
2337 u32 offset, data_len, data_left, rdma_write_max, va_offset = 0;
2338 int ret = 0, i, ib_sge_cnt;
2339
2340 offset = isert_cmd->iser_ib_op == ISER_IB_RDMA_READ ?
2341 cmd->write_data_done : 0;
2342 ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg,
2343 se_cmd->t_data_nents, se_cmd->data_length,
2344 offset, isert_cmd->iser_ib_op,
2345 &isert_cmd->data);
2346 if (ret)
2347 return ret;
2348
2349 data_left = data->len;
2350 offset = data->offset;
2351
2352 ib_sge = kzalloc(sizeof(struct ib_sge) * data->nents, GFP_KERNEL);
2353 if (!ib_sge) {
2354 isert_warn("Unable to allocate ib_sge\n");
2355 ret = -ENOMEM;
2356 goto unmap_cmd;
2357 }
2358 isert_cmd->ib_sge = ib_sge;
2359
2360 isert_cmd->rdma_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);
2361 isert_cmd->rdma_wr = kzalloc(sizeof(struct ib_rdma_wr) *
2362 isert_cmd->rdma_wr_num, GFP_KERNEL);
2363 if (!isert_cmd->rdma_wr) {
2364 isert_dbg("Unable to allocate isert_cmd->rdma_wr\n");
2365 ret = -ENOMEM;
2366 goto unmap_cmd;
2367 }
2368
2369 rdma_write_max = isert_conn->max_sge * PAGE_SIZE;
2370
2371 for (i = 0; i < isert_cmd->rdma_wr_num; i++) {
2372 rdma_wr = &isert_cmd->rdma_wr[i];
2373 data_len = min(data_left, rdma_write_max);
2374
2375 rdma_wr->wr.send_flags = 0;
2376 if (isert_cmd->iser_ib_op == ISER_IB_RDMA_WRITE) {
2377 isert_cmd->tx_desc.tx_cqe.done = isert_rdma_write_done;
2378
2379 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
2380 rdma_wr->remote_addr = isert_cmd->read_va + offset;
2381 rdma_wr->rkey = isert_cmd->read_stag;
2382 if (i + 1 == isert_cmd->rdma_wr_num)
2383 rdma_wr->wr.next = &isert_cmd->tx_desc.send_wr;
2384 else
2385 rdma_wr->wr.next = &isert_cmd->rdma_wr[i + 1].wr;
2386 } else {
2387 isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done;
2388
2389 rdma_wr->wr.opcode = IB_WR_RDMA_READ;
2390 rdma_wr->remote_addr = isert_cmd->write_va + va_offset;
2391 rdma_wr->rkey = isert_cmd->write_stag;
2392 if (i + 1 == isert_cmd->rdma_wr_num)
2393 rdma_wr->wr.send_flags = IB_SEND_SIGNALED;
2394 else
2395 rdma_wr->wr.next = &isert_cmd->rdma_wr[i + 1].wr;
2396 }
2397
2398 ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge,
2399 rdma_wr, data_len, offset);
2400 ib_sge += ib_sge_cnt;
2401
2402 offset += data_len;
2403 va_offset += data_len;
2404 data_left -= data_len;
2405 }
2406
2407 return 0;
2408unmap_cmd:
2409 isert_unmap_data_buf(isert_conn, data);
2410
2411 return ret;
2412}
2413
2414static inline void
2415isert_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
2416{
2417 u32 rkey;
2418
2419 memset(inv_wr, 0, sizeof(*inv_wr));
2420 inv_wr->wr_cqe = NULL;
2421 inv_wr->opcode = IB_WR_LOCAL_INV;
2422 inv_wr->ex.invalidate_rkey = mr->rkey;
2423
2424 /* Bump the key */
2425 rkey = ib_inc_rkey(mr->rkey);
2426 ib_update_fast_reg_key(mr, rkey);
2427}
2428
2429static int
2430isert_fast_reg_mr(struct isert_conn *isert_conn,
2431 struct fast_reg_descriptor *fr_desc,
2432 struct isert_data_buf *mem,
2433 enum isert_indicator ind,
2434 struct ib_sge *sge)
2435{
2436 struct isert_device *device = isert_conn->device;
2437 struct ib_device *ib_dev = device->ib_device;
2438 struct ib_mr *mr;
2439 struct ib_reg_wr reg_wr;
2440 struct ib_send_wr inv_wr, *bad_wr, *wr = NULL;
2441 int ret, n;
2442
2443 if (mem->dma_nents == 1) {
2444 sge->lkey = device->pd->local_dma_lkey;
2445 sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]);
2446 sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]);
2447 isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n",
2448 sge->addr, sge->length, sge->lkey);
2449 return 0;
2450 }
2451
2452 if (ind == ISERT_DATA_KEY_VALID)
2453 /* Registering data buffer */
2454 mr = fr_desc->data_mr;
2455 else
2456 /* Registering protection buffer */
2457 mr = fr_desc->pi_ctx->prot_mr;
2458
2459 if (!(fr_desc->ind & ind)) {
2460 isert_inv_rkey(&inv_wr, mr);
2461 wr = &inv_wr;
2462 }
2463
2464 n = ib_map_mr_sg(mr, mem->sg, mem->nents, PAGE_SIZE);
2465 if (unlikely(n != mem->nents)) {
2466 isert_err("failed to map mr sg (%d/%d)\n",
2467 n, mem->nents);
2468 return n < 0 ? n : -EINVAL;
2469 }
2470
2471 isert_dbg("Use fr_desc %p sg_nents %d offset %u\n",
2472 fr_desc, mem->nents, mem->offset);
2473
2474 reg_wr.wr.next = NULL;
2475 reg_wr.wr.opcode = IB_WR_REG_MR;
2476 reg_wr.wr.wr_cqe = NULL;
2477 reg_wr.wr.send_flags = 0;
2478 reg_wr.wr.num_sge = 0;
2479 reg_wr.mr = mr;
2480 reg_wr.key = mr->lkey;
2481 reg_wr.access = IB_ACCESS_LOCAL_WRITE;
2482
2483 if (!wr)
2484 wr = &reg_wr.wr;
2485 else
2486 wr->next = &reg_wr.wr;
2487
2488 ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
2489 if (ret) {
2490 isert_err("fast registration failed, ret:%d\n", ret);
2491 return ret;
2492 }
2493 fr_desc->ind &= ~ind;
2494
2495 sge->lkey = mr->lkey;
2496 sge->addr = mr->iova;
2497 sge->length = mr->length;
2498
2499 isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n",
2500 sge->addr, sge->length, sge->lkey);
2501
2502 return ret;
2503}
2504
2505static inline void 2020static inline void
2506isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs, 2021isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs,
2507 struct ib_sig_domain *domain) 2022 struct ib_sig_domain *domain)
@@ -2526,6 +2041,8 @@ isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs,
2526static int 2041static int
2527isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs) 2042isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
2528{ 2043{
2044 memset(sig_attrs, 0, sizeof(*sig_attrs));
2045
2529 switch (se_cmd->prot_op) { 2046 switch (se_cmd->prot_op) {
2530 case TARGET_PROT_DIN_INSERT: 2047 case TARGET_PROT_DIN_INSERT:
2531 case TARGET_PROT_DOUT_STRIP: 2048 case TARGET_PROT_DOUT_STRIP:
@@ -2547,228 +2064,59 @@ isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
2547 return -EINVAL; 2064 return -EINVAL;
2548 } 2065 }
2549 2066
2067 sig_attrs->check_mask =
2068 (se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD ? 0xc0 : 0) |
2069 (se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x30 : 0) |
2070 (se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x0f : 0);
2550 return 0; 2071 return 0;
2551} 2072}
2552 2073
2553static inline u8
2554isert_set_prot_checks(u8 prot_checks)
2555{
2556 return (prot_checks & TARGET_DIF_CHECK_GUARD ? 0xc0 : 0) |
2557 (prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x30 : 0) |
2558 (prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x0f : 0);
2559}
2560
2561static int
2562isert_reg_sig_mr(struct isert_conn *isert_conn,
2563 struct isert_cmd *isert_cmd,
2564 struct fast_reg_descriptor *fr_desc)
2565{
2566 struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd;
2567 struct ib_sig_handover_wr sig_wr;
2568 struct ib_send_wr inv_wr, *bad_wr, *wr = NULL;
2569 struct pi_context *pi_ctx = fr_desc->pi_ctx;
2570 struct ib_sig_attrs sig_attrs;
2571 int ret;
2572
2573 memset(&sig_attrs, 0, sizeof(sig_attrs));
2574 ret = isert_set_sig_attrs(se_cmd, &sig_attrs);
2575 if (ret)
2576 goto err;
2577
2578 sig_attrs.check_mask = isert_set_prot_checks(se_cmd->prot_checks);
2579
2580 if (!(fr_desc->ind & ISERT_SIG_KEY_VALID)) {
2581 isert_inv_rkey(&inv_wr, pi_ctx->sig_mr);
2582 wr = &inv_wr;
2583 }
2584
2585 memset(&sig_wr, 0, sizeof(sig_wr));
2586 sig_wr.wr.opcode = IB_WR_REG_SIG_MR;
2587 sig_wr.wr.wr_cqe = NULL;
2588 sig_wr.wr.sg_list = &isert_cmd->ib_sg[DATA];
2589 sig_wr.wr.num_sge = 1;
2590 sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE;
2591 sig_wr.sig_attrs = &sig_attrs;
2592 sig_wr.sig_mr = pi_ctx->sig_mr;
2593 if (se_cmd->t_prot_sg)
2594 sig_wr.prot = &isert_cmd->ib_sg[PROT];
2595
2596 if (!wr)
2597 wr = &sig_wr.wr;
2598 else
2599 wr->next = &sig_wr.wr;
2600
2601 ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
2602 if (ret) {
2603 isert_err("fast registration failed, ret:%d\n", ret);
2604 goto err;
2605 }
2606 fr_desc->ind &= ~ISERT_SIG_KEY_VALID;
2607
2608 isert_cmd->ib_sg[SIG].lkey = pi_ctx->sig_mr->lkey;
2609 isert_cmd->ib_sg[SIG].addr = 0;
2610 isert_cmd->ib_sg[SIG].length = se_cmd->data_length;
2611 if (se_cmd->prot_op != TARGET_PROT_DIN_STRIP &&
2612 se_cmd->prot_op != TARGET_PROT_DOUT_INSERT)
2613 /*
2614 * We have protection guards on the wire
2615 * so we need to set a larget transfer
2616 */
2617 isert_cmd->ib_sg[SIG].length += se_cmd->prot_length;
2618
2619 isert_dbg("sig_sge: addr: 0x%llx length: %u lkey: %x\n",
2620 isert_cmd->ib_sg[SIG].addr, isert_cmd->ib_sg[SIG].length,
2621 isert_cmd->ib_sg[SIG].lkey);
2622err:
2623 return ret;
2624}
2625
2626static int 2074static int
2627isert_handle_prot_cmd(struct isert_conn *isert_conn, 2075isert_rdma_rw_ctx_post(struct isert_cmd *cmd, struct isert_conn *conn,
2628 struct isert_cmd *isert_cmd) 2076 struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
2629{ 2077{
2630 struct isert_device *device = isert_conn->device; 2078 struct se_cmd *se_cmd = &cmd->iscsi_cmd->se_cmd;
2631 struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd; 2079 enum dma_data_direction dir = target_reverse_dma_direction(se_cmd);
2080 u8 port_num = conn->cm_id->port_num;
2081 u64 addr;
2082 u32 rkey, offset;
2632 int ret; 2083 int ret;
2633 2084
2634 if (!isert_cmd->fr_desc->pi_ctx) { 2085 if (dir == DMA_FROM_DEVICE) {
2635 ret = isert_create_pi_ctx(isert_cmd->fr_desc, 2086 addr = cmd->write_va;
2636 device->ib_device, 2087 rkey = cmd->write_stag;
2637 device->pd); 2088 offset = cmd->iscsi_cmd->write_data_done;
2638 if (ret) {
2639 isert_err("conn %p failed to allocate pi_ctx\n",
2640 isert_conn);
2641 return ret;
2642 }
2643 }
2644
2645 if (se_cmd->t_prot_sg) {
2646 ret = isert_map_data_buf(isert_conn, isert_cmd,
2647 se_cmd->t_prot_sg,
2648 se_cmd->t_prot_nents,
2649 se_cmd->prot_length,
2650 0,
2651 isert_cmd->iser_ib_op,
2652 &isert_cmd->prot);
2653 if (ret) {
2654 isert_err("conn %p failed to map protection buffer\n",
2655 isert_conn);
2656 return ret;
2657 }
2658
2659 memset(&isert_cmd->ib_sg[PROT], 0, sizeof(isert_cmd->ib_sg[PROT]));
2660 ret = isert_fast_reg_mr(isert_conn, isert_cmd->fr_desc,
2661 &isert_cmd->prot,
2662 ISERT_PROT_KEY_VALID,
2663 &isert_cmd->ib_sg[PROT]);
2664 if (ret) {
2665 isert_err("conn %p failed to fast reg mr\n",
2666 isert_conn);
2667 goto unmap_prot_cmd;
2668 }
2669 }
2670
2671 ret = isert_reg_sig_mr(isert_conn, isert_cmd, isert_cmd->fr_desc);
2672 if (ret) {
2673 isert_err("conn %p failed to fast reg mr\n",
2674 isert_conn);
2675 goto unmap_prot_cmd;
2676 }
2677 isert_cmd->fr_desc->ind |= ISERT_PROTECTED;
2678
2679 return 0;
2680
2681unmap_prot_cmd:
2682 if (se_cmd->t_prot_sg)
2683 isert_unmap_data_buf(isert_conn, &isert_cmd->prot);
2684
2685 return ret;
2686}
2687
2688static int
2689isert_reg_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn)
2690{
2691 struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
2692 struct se_cmd *se_cmd = &cmd->se_cmd;
2693 struct isert_conn *isert_conn = conn->context;
2694 struct fast_reg_descriptor *fr_desc = NULL;
2695 struct ib_rdma_wr *rdma_wr;
2696 struct ib_sge *ib_sg;
2697 u32 offset;
2698 int ret = 0;
2699 unsigned long flags;
2700
2701 offset = isert_cmd->iser_ib_op == ISER_IB_RDMA_READ ?
2702 cmd->write_data_done : 0;
2703 ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg,
2704 se_cmd->t_data_nents, se_cmd->data_length,
2705 offset, isert_cmd->iser_ib_op,
2706 &isert_cmd->data);
2707 if (ret)
2708 return ret;
2709
2710 if (isert_cmd->data.dma_nents != 1 ||
2711 isert_prot_cmd(isert_conn, se_cmd)) {
2712 spin_lock_irqsave(&isert_conn->pool_lock, flags);
2713 fr_desc = list_first_entry(&isert_conn->fr_pool,
2714 struct fast_reg_descriptor, list);
2715 list_del(&fr_desc->list);
2716 spin_unlock_irqrestore(&isert_conn->pool_lock, flags);
2717 isert_cmd->fr_desc = fr_desc;
2718 }
2719
2720 ret = isert_fast_reg_mr(isert_conn, fr_desc, &isert_cmd->data,
2721 ISERT_DATA_KEY_VALID, &isert_cmd->ib_sg[DATA]);
2722 if (ret)
2723 goto unmap_cmd;
2724
2725 if (isert_prot_cmd(isert_conn, se_cmd)) {
2726 ret = isert_handle_prot_cmd(isert_conn, isert_cmd);
2727 if (ret)
2728 goto unmap_cmd;
2729
2730 ib_sg = &isert_cmd->ib_sg[SIG];
2731 } else { 2089 } else {
2732 ib_sg = &isert_cmd->ib_sg[DATA]; 2090 addr = cmd->read_va;
2091 rkey = cmd->read_stag;
2092 offset = 0;
2733 } 2093 }
2734 2094
2735 memcpy(&isert_cmd->s_ib_sge, ib_sg, sizeof(*ib_sg)); 2095 if (isert_prot_cmd(conn, se_cmd)) {
2736 isert_cmd->ib_sge = &isert_cmd->s_ib_sge; 2096 struct ib_sig_attrs sig_attrs;
2737 isert_cmd->rdma_wr_num = 1;
2738 memset(&isert_cmd->s_rdma_wr, 0, sizeof(isert_cmd->s_rdma_wr));
2739 isert_cmd->rdma_wr = &isert_cmd->s_rdma_wr;
2740 2097
2741 rdma_wr = &isert_cmd->s_rdma_wr; 2098 ret = isert_set_sig_attrs(se_cmd, &sig_attrs);
2742 rdma_wr->wr.sg_list = &isert_cmd->s_ib_sge; 2099 if (ret)
2743 rdma_wr->wr.num_sge = 1; 2100 return ret;
2744 rdma_wr->wr.wr_cqe = &isert_cmd->tx_desc.tx_cqe;
2745 if (isert_cmd->iser_ib_op == ISER_IB_RDMA_WRITE) {
2746 isert_cmd->tx_desc.tx_cqe.done = isert_rdma_write_done;
2747 2101
2748 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 2102 WARN_ON_ONCE(offset);
2749 rdma_wr->remote_addr = isert_cmd->read_va; 2103 ret = rdma_rw_ctx_signature_init(&cmd->rw, conn->qp, port_num,
2750 rdma_wr->rkey = isert_cmd->read_stag; 2104 se_cmd->t_data_sg, se_cmd->t_data_nents,
2751 rdma_wr->wr.send_flags = !isert_prot_cmd(isert_conn, se_cmd) ? 2105 se_cmd->t_prot_sg, se_cmd->t_prot_nents,
2752 0 : IB_SEND_SIGNALED; 2106 &sig_attrs, addr, rkey, dir);
2753 } else { 2107 } else {
2754 isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done; 2108 ret = rdma_rw_ctx_init(&cmd->rw, conn->qp, port_num,
2755 2109 se_cmd->t_data_sg, se_cmd->t_data_nents,
2756 rdma_wr->wr.opcode = IB_WR_RDMA_READ; 2110 offset, addr, rkey, dir);
2757 rdma_wr->remote_addr = isert_cmd->write_va;
2758 rdma_wr->rkey = isert_cmd->write_stag;
2759 rdma_wr->wr.send_flags = IB_SEND_SIGNALED;
2760 } 2111 }
2761 2112 if (ret < 0) {
2762 return 0; 2113 isert_err("Cmd: %p failed to prepare RDMA res\n", cmd);
2763 2114 return ret;
2764unmap_cmd:
2765 if (fr_desc) {
2766 spin_lock_irqsave(&isert_conn->pool_lock, flags);
2767 list_add_tail(&fr_desc->list, &isert_conn->fr_pool);
2768 spin_unlock_irqrestore(&isert_conn->pool_lock, flags);
2769 } 2115 }
2770 isert_unmap_data_buf(isert_conn, &isert_cmd->data);
2771 2116
2117 ret = rdma_rw_ctx_post(&cmd->rw, conn->qp, port_num, cqe, chain_wr);
2118 if (ret < 0)
2119 isert_err("Cmd: %p failed to post RDMA res\n", cmd);
2772 return ret; 2120 return ret;
2773} 2121}
2774 2122
@@ -2778,21 +2126,17 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
2778 struct se_cmd *se_cmd = &cmd->se_cmd; 2126 struct se_cmd *se_cmd = &cmd->se_cmd;
2779 struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); 2127 struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
2780 struct isert_conn *isert_conn = conn->context; 2128 struct isert_conn *isert_conn = conn->context;
2781 struct isert_device *device = isert_conn->device; 2129 struct ib_cqe *cqe = NULL;
2782 struct ib_send_wr *wr_failed; 2130 struct ib_send_wr *chain_wr = NULL;
2783 int rc; 2131 int rc;
2784 2132
2785 isert_dbg("Cmd: %p RDMA_WRITE data_length: %u\n", 2133 isert_dbg("Cmd: %p RDMA_WRITE data_length: %u\n",
2786 isert_cmd, se_cmd->data_length); 2134 isert_cmd, se_cmd->data_length);
2787 2135
2788 isert_cmd->iser_ib_op = ISER_IB_RDMA_WRITE; 2136 if (isert_prot_cmd(isert_conn, se_cmd)) {
2789 rc = device->reg_rdma_mem(isert_cmd, conn); 2137 isert_cmd->tx_desc.tx_cqe.done = isert_rdma_write_done;
2790 if (rc) { 2138 cqe = &isert_cmd->tx_desc.tx_cqe;
2791 isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd); 2139 } else {
2792 return rc;
2793 }
2794
2795 if (!isert_prot_cmd(isert_conn, se_cmd)) {
2796 /* 2140 /*
2797 * Build isert_conn->tx_desc for iSCSI response PDU and attach 2141 * Build isert_conn->tx_desc for iSCSI response PDU and attach
2798 */ 2142 */
@@ -2803,56 +2147,35 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
2803 isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); 2147 isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
2804 isert_init_send_wr(isert_conn, isert_cmd, 2148 isert_init_send_wr(isert_conn, isert_cmd,
2805 &isert_cmd->tx_desc.send_wr); 2149 &isert_cmd->tx_desc.send_wr);
2806 isert_cmd->s_rdma_wr.wr.next = &isert_cmd->tx_desc.send_wr;
2807 isert_cmd->rdma_wr_num += 1;
2808 2150
2809 rc = isert_post_recv(isert_conn, isert_cmd->rx_desc); 2151 rc = isert_post_recv(isert_conn, isert_cmd->rx_desc);
2810 if (rc) { 2152 if (rc) {
2811 isert_err("ib_post_recv failed with %d\n", rc); 2153 isert_err("ib_post_recv failed with %d\n", rc);
2812 return rc; 2154 return rc;
2813 } 2155 }
2814 }
2815 2156
2816 rc = ib_post_send(isert_conn->qp, &isert_cmd->rdma_wr->wr, &wr_failed); 2157 chain_wr = &isert_cmd->tx_desc.send_wr;
2817 if (rc) 2158 }
2818 isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
2819
2820 if (!isert_prot_cmd(isert_conn, se_cmd))
2821 isert_dbg("Cmd: %p posted RDMA_WRITE + Response for iSER Data "
2822 "READ\n", isert_cmd);
2823 else
2824 isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n",
2825 isert_cmd);
2826 2159
2160 isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr);
2161 isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n", isert_cmd);
2827 return 1; 2162 return 1;
2828} 2163}
2829 2164
2830static int 2165static int
2831isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) 2166isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
2832{ 2167{
2833 struct se_cmd *se_cmd = &cmd->se_cmd;
2834 struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); 2168 struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
2835 struct isert_conn *isert_conn = conn->context;
2836 struct isert_device *device = isert_conn->device;
2837 struct ib_send_wr *wr_failed;
2838 int rc;
2839 2169
2840 isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n", 2170 isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
2841 isert_cmd, se_cmd->data_length, cmd->write_data_done); 2171 isert_cmd, cmd->se_cmd.data_length, cmd->write_data_done);
2842 isert_cmd->iser_ib_op = ISER_IB_RDMA_READ;
2843 rc = device->reg_rdma_mem(isert_cmd, conn);
2844 if (rc) {
2845 isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
2846 return rc;
2847 }
2848 2172
2849 rc = ib_post_send(isert_conn->qp, &isert_cmd->rdma_wr->wr, &wr_failed); 2173 isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done;
2850 if (rc) 2174 isert_rdma_rw_ctx_post(isert_cmd, conn->context,
2851 isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n"); 2175 &isert_cmd->tx_desc.tx_cqe, NULL);
2852 2176
2853 isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n", 2177 isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
2854 isert_cmd); 2178 isert_cmd);
2855
2856 return 0; 2179 return 0;
2857} 2180}
2858 2181
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 147900cbb578..e512ba941f2f 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -3,6 +3,7 @@
3#include <linux/in6.h> 3#include <linux/in6.h>
4#include <rdma/ib_verbs.h> 4#include <rdma/ib_verbs.h>
5#include <rdma/rdma_cm.h> 5#include <rdma/rdma_cm.h>
6#include <rdma/rw.h>
6#include <scsi/iser.h> 7#include <scsi/iser.h>
7 8
8 9
@@ -53,10 +54,7 @@
53 54
54#define ISERT_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2) 55#define ISERT_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
55 56
56#define ISERT_INFLIGHT_DATAOUTS 8 57#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX + \
57
58#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
59 (1 + ISERT_INFLIGHT_DATAOUTS) + \
60 ISERT_MAX_TX_MISC_PDUS + \ 58 ISERT_MAX_TX_MISC_PDUS + \
61 ISERT_MAX_RX_MISC_PDUS) 59 ISERT_MAX_RX_MISC_PDUS)
62 60
@@ -71,13 +69,6 @@ enum isert_desc_type {
71 ISCSI_TX_DATAIN 69 ISCSI_TX_DATAIN
72}; 70};
73 71
74enum iser_ib_op_code {
75 ISER_IB_RECV,
76 ISER_IB_SEND,
77 ISER_IB_RDMA_WRITE,
78 ISER_IB_RDMA_READ,
79};
80
81enum iser_conn_state { 72enum iser_conn_state {
82 ISER_CONN_INIT, 73 ISER_CONN_INIT,
83 ISER_CONN_UP, 74 ISER_CONN_UP,
@@ -118,42 +109,6 @@ static inline struct iser_tx_desc *cqe_to_tx_desc(struct ib_cqe *cqe)
118 return container_of(cqe, struct iser_tx_desc, tx_cqe); 109 return container_of(cqe, struct iser_tx_desc, tx_cqe);
119} 110}
120 111
121
122enum isert_indicator {
123 ISERT_PROTECTED = 1 << 0,
124 ISERT_DATA_KEY_VALID = 1 << 1,
125 ISERT_PROT_KEY_VALID = 1 << 2,
126 ISERT_SIG_KEY_VALID = 1 << 3,
127};
128
129struct pi_context {
130 struct ib_mr *prot_mr;
131 struct ib_mr *sig_mr;
132};
133
134struct fast_reg_descriptor {
135 struct list_head list;
136 struct ib_mr *data_mr;
137 u8 ind;
138 struct pi_context *pi_ctx;
139};
140
141struct isert_data_buf {
142 struct scatterlist *sg;
143 int nents;
144 u32 sg_off;
145 u32 len; /* cur_rdma_length */
146 u32 offset;
147 unsigned int dma_nents;
148 enum dma_data_direction dma_dir;
149};
150
151enum {
152 DATA = 0,
153 PROT = 1,
154 SIG = 2,
155};
156
157struct isert_cmd { 112struct isert_cmd {
158 uint32_t read_stag; 113 uint32_t read_stag;
159 uint32_t write_stag; 114 uint32_t write_stag;
@@ -166,16 +121,7 @@ struct isert_cmd {
166 struct iscsi_cmd *iscsi_cmd; 121 struct iscsi_cmd *iscsi_cmd;
167 struct iser_tx_desc tx_desc; 122 struct iser_tx_desc tx_desc;
168 struct iser_rx_desc *rx_desc; 123 struct iser_rx_desc *rx_desc;
169 enum iser_ib_op_code iser_ib_op; 124 struct rdma_rw_ctx rw;
170 struct ib_sge *ib_sge;
171 struct ib_sge s_ib_sge;
172 int rdma_wr_num;
173 struct ib_rdma_wr *rdma_wr;
174 struct ib_rdma_wr s_rdma_wr;
175 struct ib_sge ib_sg[3];
176 struct isert_data_buf data;
177 struct isert_data_buf prot;
178 struct fast_reg_descriptor *fr_desc;
179 struct work_struct comp_work; 125 struct work_struct comp_work;
180 struct scatterlist sg; 126 struct scatterlist sg;
181}; 127};
@@ -210,10 +156,6 @@ struct isert_conn {
210 struct isert_device *device; 156 struct isert_device *device;
211 struct mutex mutex; 157 struct mutex mutex;
212 struct kref kref; 158 struct kref kref;
213 struct list_head fr_pool;
214 int fr_pool_size;
215 /* lock to protect fastreg pool */
216 spinlock_t pool_lock;
217 struct work_struct release_work; 159 struct work_struct release_work;
218 bool logout_posted; 160 bool logout_posted;
219 bool snd_w_inv; 161 bool snd_w_inv;
@@ -236,7 +178,6 @@ struct isert_comp {
236}; 178};
237 179
238struct isert_device { 180struct isert_device {
239 int use_fastreg;
240 bool pi_capable; 181 bool pi_capable;
241 int refcount; 182 int refcount;
242 struct ib_device *ib_device; 183 struct ib_device *ib_device;
@@ -244,10 +185,6 @@ struct isert_device {
244 struct isert_comp *comps; 185 struct isert_comp *comps;
245 int comps_used; 186 int comps_used;
246 struct list_head dev_node; 187 struct list_head dev_node;
247 int (*reg_rdma_mem)(struct isert_cmd *isert_cmd,
248 struct iscsi_conn *conn);
249 void (*unreg_rdma_mem)(struct isert_cmd *isert_cmd,
250 struct isert_conn *isert_conn);
251}; 188};
252 189
253struct isert_np { 190struct isert_np {
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 369a75e1f44e..646de170ec12 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -70,6 +70,7 @@ static unsigned int indirect_sg_entries;
70static bool allow_ext_sg; 70static bool allow_ext_sg;
71static bool prefer_fr = true; 71static bool prefer_fr = true;
72static bool register_always = true; 72static bool register_always = true;
73static bool never_register;
73static int topspin_workarounds = 1; 74static int topspin_workarounds = 1;
74 75
75module_param(srp_sg_tablesize, uint, 0444); 76module_param(srp_sg_tablesize, uint, 0444);
@@ -99,6 +100,9 @@ module_param(register_always, bool, 0444);
99MODULE_PARM_DESC(register_always, 100MODULE_PARM_DESC(register_always,
100 "Use memory registration even for contiguous memory regions"); 101 "Use memory registration even for contiguous memory regions");
101 102
103module_param(never_register, bool, 0444);
104MODULE_PARM_DESC(never_register, "Never register memory");
105
102static const struct kernel_param_ops srp_tmo_ops; 106static const struct kernel_param_ops srp_tmo_ops;
103 107
104static int srp_reconnect_delay = 10; 108static int srp_reconnect_delay = 10;
@@ -316,7 +320,7 @@ static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
316 struct ib_fmr_pool_param fmr_param; 320 struct ib_fmr_pool_param fmr_param;
317 321
318 memset(&fmr_param, 0, sizeof(fmr_param)); 322 memset(&fmr_param, 0, sizeof(fmr_param));
319 fmr_param.pool_size = target->scsi_host->can_queue; 323 fmr_param.pool_size = target->mr_pool_size;
320 fmr_param.dirty_watermark = fmr_param.pool_size / 4; 324 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
321 fmr_param.cache = 1; 325 fmr_param.cache = 1;
322 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; 326 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
@@ -441,23 +445,22 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
441{ 445{
442 struct srp_device *dev = target->srp_host->srp_dev; 446 struct srp_device *dev = target->srp_host->srp_dev;
443 447
444 return srp_create_fr_pool(dev->dev, dev->pd, 448 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
445 target->scsi_host->can_queue,
446 dev->max_pages_per_mr); 449 dev->max_pages_per_mr);
447} 450}
448 451
449/** 452/**
450 * srp_destroy_qp() - destroy an RDMA queue pair 453 * srp_destroy_qp() - destroy an RDMA queue pair
451 * @ch: SRP RDMA channel. 454 * @qp: RDMA queue pair.
452 * 455 *
453 * Drain the qp before destroying it. This avoids that the receive 456 * Drain the qp before destroying it. This avoids that the receive
454 * completion handler can access the queue pair while it is 457 * completion handler can access the queue pair while it is
455 * being destroyed. 458 * being destroyed.
456 */ 459 */
457static void srp_destroy_qp(struct srp_rdma_ch *ch) 460static void srp_destroy_qp(struct ib_qp *qp)
458{ 461{
459 ib_drain_rq(ch->qp); 462 ib_drain_rq(qp);
460 ib_destroy_qp(ch->qp); 463 ib_destroy_qp(qp);
461} 464}
462 465
463static int srp_create_ch_ib(struct srp_rdma_ch *ch) 466static int srp_create_ch_ib(struct srp_rdma_ch *ch)
@@ -469,7 +472,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
469 struct ib_qp *qp; 472 struct ib_qp *qp;
470 struct ib_fmr_pool *fmr_pool = NULL; 473 struct ib_fmr_pool *fmr_pool = NULL;
471 struct srp_fr_pool *fr_pool = NULL; 474 struct srp_fr_pool *fr_pool = NULL;
472 const int m = dev->use_fast_reg ? 3 : 1; 475 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
473 int ret; 476 int ret;
474 477
475 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); 478 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
@@ -530,7 +533,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
530 } 533 }
531 534
532 if (ch->qp) 535 if (ch->qp)
533 srp_destroy_qp(ch); 536 srp_destroy_qp(ch->qp);
534 if (ch->recv_cq) 537 if (ch->recv_cq)
535 ib_free_cq(ch->recv_cq); 538 ib_free_cq(ch->recv_cq);
536 if (ch->send_cq) 539 if (ch->send_cq)
@@ -554,7 +557,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
554 return 0; 557 return 0;
555 558
556err_qp: 559err_qp:
557 srp_destroy_qp(ch); 560 srp_destroy_qp(qp);
558 561
559err_send_cq: 562err_send_cq:
560 ib_free_cq(send_cq); 563 ib_free_cq(send_cq);
@@ -597,7 +600,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
597 ib_destroy_fmr_pool(ch->fmr_pool); 600 ib_destroy_fmr_pool(ch->fmr_pool);
598 } 601 }
599 602
600 srp_destroy_qp(ch); 603 srp_destroy_qp(ch->qp);
601 ib_free_cq(ch->send_cq); 604 ib_free_cq(ch->send_cq);
602 ib_free_cq(ch->recv_cq); 605 ib_free_cq(ch->recv_cq);
603 606
@@ -850,7 +853,7 @@ static int srp_alloc_req_data(struct srp_rdma_ch *ch)
850 853
851 for (i = 0; i < target->req_ring_size; ++i) { 854 for (i = 0; i < target->req_ring_size; ++i) {
852 req = &ch->req_ring[i]; 855 req = &ch->req_ring[i];
853 mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *), 856 mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
854 GFP_KERNEL); 857 GFP_KERNEL);
855 if (!mr_list) 858 if (!mr_list)
856 goto out; 859 goto out;
@@ -1112,7 +1115,7 @@ static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1112} 1115}
1113 1116
1114/** 1117/**
1115 * srp_free_req() - Unmap data and add request to the free request list. 1118 * srp_free_req() - Unmap data and adjust ch->req_lim.
1116 * @ch: SRP RDMA channel. 1119 * @ch: SRP RDMA channel.
1117 * @req: Request to be freed. 1120 * @req: Request to be freed.
1118 * @scmnd: SCSI command associated with @req. 1121 * @scmnd: SCSI command associated with @req.
@@ -1299,9 +1302,16 @@ static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1299 srp_handle_qp_err(cq, wc, "FAST REG"); 1302 srp_handle_qp_err(cq, wc, "FAST REG");
1300} 1303}
1301 1304
1305/*
1306 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1307 * where to start in the first element. If sg_offset_p != NULL then
1308 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1309 * byte that has not yet been mapped.
1310 */
1302static int srp_map_finish_fr(struct srp_map_state *state, 1311static int srp_map_finish_fr(struct srp_map_state *state,
1303 struct srp_request *req, 1312 struct srp_request *req,
1304 struct srp_rdma_ch *ch, int sg_nents) 1313 struct srp_rdma_ch *ch, int sg_nents,
1314 unsigned int *sg_offset_p)
1305{ 1315{
1306 struct srp_target_port *target = ch->target; 1316 struct srp_target_port *target = ch->target;
1307 struct srp_device *dev = target->srp_host->srp_dev; 1317 struct srp_device *dev = target->srp_host->srp_dev;
@@ -1316,13 +1326,14 @@ static int srp_map_finish_fr(struct srp_map_state *state,
1316 1326
1317 WARN_ON_ONCE(!dev->use_fast_reg); 1327 WARN_ON_ONCE(!dev->use_fast_reg);
1318 1328
1319 if (sg_nents == 0)
1320 return 0;
1321
1322 if (sg_nents == 1 && target->global_mr) { 1329 if (sg_nents == 1 && target->global_mr) {
1323 srp_map_desc(state, sg_dma_address(state->sg), 1330 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1324 sg_dma_len(state->sg), 1331
1332 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1333 sg_dma_len(state->sg) - sg_offset,
1325 target->global_mr->rkey); 1334 target->global_mr->rkey);
1335 if (sg_offset_p)
1336 *sg_offset_p = 0;
1326 return 1; 1337 return 1;
1327 } 1338 }
1328 1339
@@ -1333,9 +1344,17 @@ static int srp_map_finish_fr(struct srp_map_state *state,
1333 rkey = ib_inc_rkey(desc->mr->rkey); 1344 rkey = ib_inc_rkey(desc->mr->rkey);
1334 ib_update_fast_reg_key(desc->mr, rkey); 1345 ib_update_fast_reg_key(desc->mr, rkey);
1335 1346
1336 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size); 1347 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1337 if (unlikely(n < 0)) 1348 dev->mr_page_size);
1349 if (unlikely(n < 0)) {
1350 srp_fr_pool_put(ch->fr_pool, &desc, 1);
1351 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1352 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1353 sg_offset_p ? *sg_offset_p : -1, n);
1338 return n; 1354 return n;
1355 }
1356
1357 WARN_ON_ONCE(desc->mr->length == 0);
1339 1358
1340 req->reg_cqe.done = srp_reg_mr_err_done; 1359 req->reg_cqe.done = srp_reg_mr_err_done;
1341 1360
@@ -1357,8 +1376,10 @@ static int srp_map_finish_fr(struct srp_map_state *state,
1357 desc->mr->length, desc->mr->rkey); 1376 desc->mr->length, desc->mr->rkey);
1358 1377
1359 err = ib_post_send(ch->qp, &wr.wr, &bad_wr); 1378 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1360 if (unlikely(err)) 1379 if (unlikely(err)) {
1380 WARN_ON_ONCE(err == -ENOMEM);
1361 return err; 1381 return err;
1382 }
1362 1383
1363 return n; 1384 return n;
1364} 1385}
@@ -1398,7 +1419,7 @@ static int srp_map_sg_entry(struct srp_map_state *state,
1398 /* 1419 /*
1399 * If the last entry of the MR wasn't a full page, then we need to 1420 * If the last entry of the MR wasn't a full page, then we need to
1400 * close it out and start a new one -- we can only merge at page 1421 * close it out and start a new one -- we can only merge at page
1401 * boundries. 1422 * boundaries.
1402 */ 1423 */
1403 ret = 0; 1424 ret = 0;
1404 if (len != dev->mr_page_size) 1425 if (len != dev->mr_page_size)
@@ -1413,10 +1434,9 @@ static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1413 struct scatterlist *sg; 1434 struct scatterlist *sg;
1414 int i, ret; 1435 int i, ret;
1415 1436
1416 state->desc = req->indirect_desc;
1417 state->pages = req->map_page; 1437 state->pages = req->map_page;
1418 state->fmr.next = req->fmr_list; 1438 state->fmr.next = req->fmr_list;
1419 state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt; 1439 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1420 1440
1421 for_each_sg(scat, sg, count, i) { 1441 for_each_sg(scat, sg, count, i) {
1422 ret = srp_map_sg_entry(state, ch, sg, i); 1442 ret = srp_map_sg_entry(state, ch, sg, i);
@@ -1428,8 +1448,6 @@ static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1428 if (ret) 1448 if (ret)
1429 return ret; 1449 return ret;
1430 1450
1431 req->nmdesc = state->nmdesc;
1432
1433 return 0; 1451 return 0;
1434} 1452}
1435 1453
@@ -1437,15 +1455,20 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1437 struct srp_request *req, struct scatterlist *scat, 1455 struct srp_request *req, struct scatterlist *scat,
1438 int count) 1456 int count)
1439{ 1457{
1458 unsigned int sg_offset = 0;
1459
1440 state->desc = req->indirect_desc; 1460 state->desc = req->indirect_desc;
1441 state->fr.next = req->fr_list; 1461 state->fr.next = req->fr_list;
1442 state->fr.end = req->fr_list + ch->target->cmd_sg_cnt; 1462 state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1443 state->sg = scat; 1463 state->sg = scat;
1444 1464
1465 if (count == 0)
1466 return 0;
1467
1445 while (count) { 1468 while (count) {
1446 int i, n; 1469 int i, n;
1447 1470
1448 n = srp_map_finish_fr(state, req, ch, count); 1471 n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1449 if (unlikely(n < 0)) 1472 if (unlikely(n < 0))
1450 return n; 1473 return n;
1451 1474
@@ -1454,8 +1477,6 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1454 state->sg = sg_next(state->sg); 1477 state->sg = sg_next(state->sg);
1455 } 1478 }
1456 1479
1457 req->nmdesc = state->nmdesc;
1458
1459 return 0; 1480 return 0;
1460} 1481}
1461 1482
@@ -1475,8 +1496,6 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1475 target->global_mr->rkey); 1496 target->global_mr->rkey);
1476 } 1497 }
1477 1498
1478 req->nmdesc = state->nmdesc;
1479
1480 return 0; 1499 return 0;
1481} 1500}
1482 1501
@@ -1509,14 +1528,15 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1509 1528
1510 if (dev->use_fast_reg) { 1529 if (dev->use_fast_reg) {
1511 state.sg = idb_sg; 1530 state.sg = idb_sg;
1512 sg_set_buf(idb_sg, req->indirect_desc, idb_len); 1531 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1513 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */ 1532 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1514#ifdef CONFIG_NEED_SG_DMA_LENGTH 1533#ifdef CONFIG_NEED_SG_DMA_LENGTH
1515 idb_sg->dma_length = idb_sg->length; /* hack^2 */ 1534 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1516#endif 1535#endif
1517 ret = srp_map_finish_fr(&state, req, ch, 1); 1536 ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1518 if (ret < 0) 1537 if (ret < 0)
1519 return ret; 1538 return ret;
1539 WARN_ON_ONCE(ret < 1);
1520 } else if (dev->use_fmr) { 1540 } else if (dev->use_fmr) {
1521 state.pages = idb_pages; 1541 state.pages = idb_pages;
1522 state.pages[0] = (req->indirect_dma_addr & 1542 state.pages[0] = (req->indirect_dma_addr &
@@ -1534,6 +1554,41 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1534 return 0; 1554 return 0;
1535} 1555}
1536 1556
1557#if defined(DYNAMIC_DATA_DEBUG)
1558static void srp_check_mapping(struct srp_map_state *state,
1559 struct srp_rdma_ch *ch, struct srp_request *req,
1560 struct scatterlist *scat, int count)
1561{
1562 struct srp_device *dev = ch->target->srp_host->srp_dev;
1563 struct srp_fr_desc **pfr;
1564 u64 desc_len = 0, mr_len = 0;
1565 int i;
1566
1567 for (i = 0; i < state->ndesc; i++)
1568 desc_len += be32_to_cpu(req->indirect_desc[i].len);
1569 if (dev->use_fast_reg)
1570 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1571 mr_len += (*pfr)->mr->length;
1572 else if (dev->use_fmr)
1573 for (i = 0; i < state->nmdesc; i++)
1574 mr_len += be32_to_cpu(req->indirect_desc[i].len);
1575 if (desc_len != scsi_bufflen(req->scmnd) ||
1576 mr_len > scsi_bufflen(req->scmnd))
1577 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1578 scsi_bufflen(req->scmnd), desc_len, mr_len,
1579 state->ndesc, state->nmdesc);
1580}
1581#endif
1582
1583/**
1584 * srp_map_data() - map SCSI data buffer onto an SRP request
1585 * @scmnd: SCSI command to map
1586 * @ch: SRP RDMA channel
1587 * @req: SRP request
1588 *
1589 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1590 * mapping failed.
1591 */
1537static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, 1592static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1538 struct srp_request *req) 1593 struct srp_request *req)
1539{ 1594{
@@ -1601,11 +1656,23 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1601 1656
1602 memset(&state, 0, sizeof(state)); 1657 memset(&state, 0, sizeof(state));
1603 if (dev->use_fast_reg) 1658 if (dev->use_fast_reg)
1604 srp_map_sg_fr(&state, ch, req, scat, count); 1659 ret = srp_map_sg_fr(&state, ch, req, scat, count);
1605 else if (dev->use_fmr) 1660 else if (dev->use_fmr)
1606 srp_map_sg_fmr(&state, ch, req, scat, count); 1661 ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1607 else 1662 else
1608 srp_map_sg_dma(&state, ch, req, scat, count); 1663 ret = srp_map_sg_dma(&state, ch, req, scat, count);
1664 req->nmdesc = state.nmdesc;
1665 if (ret < 0)
1666 goto unmap;
1667
1668#if defined(DYNAMIC_DEBUG)
1669 {
1670 DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1671 "Memory mapping consistency check");
1672 if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT))
1673 srp_check_mapping(&state, ch, req, scat, count);
1674 }
1675#endif
1609 1676
1610 /* We've mapped the request, now pull as much of the indirect 1677 /* We've mapped the request, now pull as much of the indirect
1611 * descriptor table as we can into the command buffer. If this 1678 * descriptor table as we can into the command buffer. If this
@@ -1628,7 +1695,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1628 !target->allow_ext_sg)) { 1695 !target->allow_ext_sg)) {
1629 shost_printk(KERN_ERR, target->scsi_host, 1696 shost_printk(KERN_ERR, target->scsi_host,
1630 "Could not fit S/G list into SRP_CMD\n"); 1697 "Could not fit S/G list into SRP_CMD\n");
1631 return -EIO; 1698 ret = -EIO;
1699 goto unmap;
1632 } 1700 }
1633 1701
1634 count = min(state.ndesc, target->cmd_sg_cnt); 1702 count = min(state.ndesc, target->cmd_sg_cnt);
@@ -1646,7 +1714,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1646 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end, 1714 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1647 idb_len, &idb_rkey); 1715 idb_len, &idb_rkey);
1648 if (ret < 0) 1716 if (ret < 0)
1649 return ret; 1717 goto unmap;
1650 req->nmdesc++; 1718 req->nmdesc++;
1651 } else { 1719 } else {
1652 idb_rkey = cpu_to_be32(target->global_mr->rkey); 1720 idb_rkey = cpu_to_be32(target->global_mr->rkey);
@@ -1672,6 +1740,12 @@ map_complete:
1672 cmd->buf_fmt = fmt; 1740 cmd->buf_fmt = fmt;
1673 1741
1674 return len; 1742 return len;
1743
1744unmap:
1745 srp_unmap_data(scmnd, ch, req);
1746 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1747 ret = -E2BIG;
1748 return ret;
1675} 1749}
1676 1750
1677/* 1751/*
@@ -2564,6 +2638,20 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
2564 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; 2638 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2565} 2639}
2566 2640
2641static int srp_slave_alloc(struct scsi_device *sdev)
2642{
2643 struct Scsi_Host *shost = sdev->host;
2644 struct srp_target_port *target = host_to_target(shost);
2645 struct srp_device *srp_dev = target->srp_host->srp_dev;
2646 struct ib_device *ibdev = srp_dev->dev;
2647
2648 if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
2649 blk_queue_virt_boundary(sdev->request_queue,
2650 ~srp_dev->mr_page_mask);
2651
2652 return 0;
2653}
2654
2567static int srp_slave_configure(struct scsi_device *sdev) 2655static int srp_slave_configure(struct scsi_device *sdev)
2568{ 2656{
2569 struct Scsi_Host *shost = sdev->host; 2657 struct Scsi_Host *shost = sdev->host;
@@ -2755,6 +2843,7 @@ static struct scsi_host_template srp_template = {
2755 .module = THIS_MODULE, 2843 .module = THIS_MODULE,
2756 .name = "InfiniBand SRP initiator", 2844 .name = "InfiniBand SRP initiator",
2757 .proc_name = DRV_NAME, 2845 .proc_name = DRV_NAME,
2846 .slave_alloc = srp_slave_alloc,
2758 .slave_configure = srp_slave_configure, 2847 .slave_configure = srp_slave_configure,
2759 .info = srp_target_info, 2848 .info = srp_target_info,
2760 .queuecommand = srp_queuecommand, 2849 .queuecommand = srp_queuecommand,
@@ -2829,7 +2918,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2829 goto out; 2918 goto out;
2830 } 2919 }
2831 2920
2832 pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n", 2921 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
2833 dev_name(&target->scsi_host->shost_gendev), 2922 dev_name(&target->scsi_host->shost_gendev),
2834 srp_sdev_count(target->scsi_host)); 2923 srp_sdev_count(target->scsi_host));
2835 2924
@@ -3161,6 +3250,7 @@ static ssize_t srp_create_target(struct device *dev,
3161 struct srp_device *srp_dev = host->srp_dev; 3250 struct srp_device *srp_dev = host->srp_dev;
3162 struct ib_device *ibdev = srp_dev->dev; 3251 struct ib_device *ibdev = srp_dev->dev;
3163 int ret, node_idx, node, cpu, i; 3252 int ret, node_idx, node, cpu, i;
3253 unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3164 bool multich = false; 3254 bool multich = false;
3165 3255
3166 target_host = scsi_host_alloc(&srp_template, 3256 target_host = scsi_host_alloc(&srp_template,
@@ -3217,7 +3307,33 @@ static ssize_t srp_create_target(struct device *dev,
3217 target->sg_tablesize = target->cmd_sg_cnt; 3307 target->sg_tablesize = target->cmd_sg_cnt;
3218 } 3308 }
3219 3309
3310 if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3311 /*
3312 * FR and FMR can only map one HCA page per entry. If the
3313 * start address is not aligned on a HCA page boundary two
3314 * entries will be used for the head and the tail although
3315 * these two entries combined contain at most one HCA page of
3316 * data. Hence the "+ 1" in the calculation below.
3317 *
3318 * The indirect data buffer descriptor is contiguous so the
3319 * memory for that buffer will only be registered if
3320 * register_always is true. Hence add one to mr_per_cmd if
3321 * register_always has been set.
3322 */
3323 max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3324 (ilog2(srp_dev->mr_page_size) - 9);
3325 mr_per_cmd = register_always +
3326 (target->scsi_host->max_sectors + 1 +
3327 max_sectors_per_mr - 1) / max_sectors_per_mr;
3328 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3329 target->scsi_host->max_sectors,
3330 srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3331 max_sectors_per_mr, mr_per_cmd);
3332 }
3333
3220 target_host->sg_tablesize = target->sg_tablesize; 3334 target_host->sg_tablesize = target->sg_tablesize;
3335 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3336 target->mr_per_cmd = mr_per_cmd;
3221 target->indirect_size = target->sg_tablesize * 3337 target->indirect_size = target->sg_tablesize *
3222 sizeof (struct srp_direct_buf); 3338 sizeof (struct srp_direct_buf);
3223 target->max_iu_len = sizeof (struct srp_cmd) + 3339 target->max_iu_len = sizeof (struct srp_cmd) +
@@ -3414,17 +3530,6 @@ static void srp_add_one(struct ib_device *device)
3414 if (!srp_dev) 3530 if (!srp_dev)
3415 return; 3531 return;
3416 3532
3417 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3418 device->map_phys_fmr && device->unmap_fmr);
3419 srp_dev->has_fr = (device->attrs.device_cap_flags &
3420 IB_DEVICE_MEM_MGT_EXTENSIONS);
3421 if (!srp_dev->has_fmr && !srp_dev->has_fr)
3422 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3423
3424 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3425 (!srp_dev->has_fmr || prefer_fr));
3426 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3427
3428 /* 3533 /*
3429 * Use the smallest page size supported by the HCA, down to a 3534 * Use the smallest page size supported by the HCA, down to a
3430 * minimum of 4096 bytes. We're unlikely to build large sglists 3535 * minimum of 4096 bytes. We're unlikely to build large sglists
@@ -3435,8 +3540,25 @@ static void srp_add_one(struct ib_device *device)
3435 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); 3540 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3436 max_pages_per_mr = device->attrs.max_mr_size; 3541 max_pages_per_mr = device->attrs.max_mr_size;
3437 do_div(max_pages_per_mr, srp_dev->mr_page_size); 3542 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3543 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
3544 device->attrs.max_mr_size, srp_dev->mr_page_size,
3545 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
3438 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, 3546 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3439 max_pages_per_mr); 3547 max_pages_per_mr);
3548
3549 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3550 device->map_phys_fmr && device->unmap_fmr);
3551 srp_dev->has_fr = (device->attrs.device_cap_flags &
3552 IB_DEVICE_MEM_MGT_EXTENSIONS);
3553 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
3554 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3555 } else if (!never_register &&
3556 device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) {
3557 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3558 (!srp_dev->has_fmr || prefer_fr));
3559 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3560 }
3561
3440 if (srp_dev->use_fast_reg) { 3562 if (srp_dev->use_fast_reg) {
3441 srp_dev->max_pages_per_mr = 3563 srp_dev->max_pages_per_mr =
3442 min_t(u32, srp_dev->max_pages_per_mr, 3564 min_t(u32, srp_dev->max_pages_per_mr,
@@ -3456,7 +3578,8 @@ static void srp_add_one(struct ib_device *device)
3456 if (IS_ERR(srp_dev->pd)) 3578 if (IS_ERR(srp_dev->pd))
3457 goto free_dev; 3579 goto free_dev;
3458 3580
3459 if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) { 3581 if (never_register || !register_always ||
3582 (!srp_dev->has_fmr && !srp_dev->has_fr)) {
3460 srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd, 3583 srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
3461 IB_ACCESS_LOCAL_WRITE | 3584 IB_ACCESS_LOCAL_WRITE |
3462 IB_ACCESS_REMOTE_READ | 3585 IB_ACCESS_REMOTE_READ |
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 9e05ce4a04fd..26bb9b0a7a63 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -202,6 +202,8 @@ struct srp_target_port {
202 char target_name[32]; 202 char target_name[32];
203 unsigned int scsi_id; 203 unsigned int scsi_id;
204 unsigned int sg_tablesize; 204 unsigned int sg_tablesize;
205 int mr_pool_size;
206 int mr_per_cmd;
205 int queue_size; 207 int queue_size;
206 int req_ring_size; 208 int req_ring_size;
207 int comp_vector; 209 int comp_vector;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 8b42401d4795..2843f1ae75bd 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -765,52 +765,6 @@ static int srpt_post_recv(struct srpt_device *sdev,
765} 765}
766 766
767/** 767/**
768 * srpt_post_send() - Post an IB send request.
769 *
770 * Returns zero upon success and a non-zero value upon failure.
771 */
772static int srpt_post_send(struct srpt_rdma_ch *ch,
773 struct srpt_send_ioctx *ioctx, int len)
774{
775 struct ib_sge list;
776 struct ib_send_wr wr, *bad_wr;
777 struct srpt_device *sdev = ch->sport->sdev;
778 int ret;
779
780 atomic_inc(&ch->req_lim);
781
782 ret = -ENOMEM;
783 if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) {
784 pr_warn("IB send queue full (needed 1)\n");
785 goto out;
786 }
787
788 ib_dma_sync_single_for_device(sdev->device, ioctx->ioctx.dma, len,
789 DMA_TO_DEVICE);
790
791 list.addr = ioctx->ioctx.dma;
792 list.length = len;
793 list.lkey = sdev->pd->local_dma_lkey;
794
795 ioctx->ioctx.cqe.done = srpt_send_done;
796 wr.next = NULL;
797 wr.wr_cqe = &ioctx->ioctx.cqe;
798 wr.sg_list = &list;
799 wr.num_sge = 1;
800 wr.opcode = IB_WR_SEND;
801 wr.send_flags = IB_SEND_SIGNALED;
802
803 ret = ib_post_send(ch->qp, &wr, &bad_wr);
804
805out:
806 if (ret < 0) {
807 atomic_inc(&ch->sq_wr_avail);
808 atomic_dec(&ch->req_lim);
809 }
810 return ret;
811}
812
813/**
814 * srpt_zerolength_write() - Perform a zero-length RDMA write. 768 * srpt_zerolength_write() - Perform a zero-length RDMA write.
815 * 769 *
816 * A quote from the InfiniBand specification: C9-88: For an HCA responder 770 * A quote from the InfiniBand specification: C9-88: For an HCA responder
@@ -843,6 +797,110 @@ static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
843 } 797 }
844} 798}
845 799
800static int srpt_alloc_rw_ctxs(struct srpt_send_ioctx *ioctx,
801 struct srp_direct_buf *db, int nbufs, struct scatterlist **sg,
802 unsigned *sg_cnt)
803{
804 enum dma_data_direction dir = target_reverse_dma_direction(&ioctx->cmd);
805 struct srpt_rdma_ch *ch = ioctx->ch;
806 struct scatterlist *prev = NULL;
807 unsigned prev_nents;
808 int ret, i;
809
810 if (nbufs == 1) {
811 ioctx->rw_ctxs = &ioctx->s_rw_ctx;
812 } else {
813 ioctx->rw_ctxs = kmalloc_array(nbufs, sizeof(*ioctx->rw_ctxs),
814 GFP_KERNEL);
815 if (!ioctx->rw_ctxs)
816 return -ENOMEM;
817 }
818
819 for (i = ioctx->n_rw_ctx; i < nbufs; i++, db++) {
820 struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
821 u64 remote_addr = be64_to_cpu(db->va);
822 u32 size = be32_to_cpu(db->len);
823 u32 rkey = be32_to_cpu(db->key);
824
825 ret = target_alloc_sgl(&ctx->sg, &ctx->nents, size, false,
826 i < nbufs - 1);
827 if (ret)
828 goto unwind;
829
830 ret = rdma_rw_ctx_init(&ctx->rw, ch->qp, ch->sport->port,
831 ctx->sg, ctx->nents, 0, remote_addr, rkey, dir);
832 if (ret < 0) {
833 target_free_sgl(ctx->sg, ctx->nents);
834 goto unwind;
835 }
836
837 ioctx->n_rdma += ret;
838 ioctx->n_rw_ctx++;
839
840 if (prev) {
841 sg_unmark_end(&prev[prev_nents - 1]);
842 sg_chain(prev, prev_nents + 1, ctx->sg);
843 } else {
844 *sg = ctx->sg;
845 }
846
847 prev = ctx->sg;
848 prev_nents = ctx->nents;
849
850 *sg_cnt += ctx->nents;
851 }
852
853 return 0;
854
855unwind:
856 while (--i >= 0) {
857 struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
858
859 rdma_rw_ctx_destroy(&ctx->rw, ch->qp, ch->sport->port,
860 ctx->sg, ctx->nents, dir);
861 target_free_sgl(ctx->sg, ctx->nents);
862 }
863 if (ioctx->rw_ctxs != &ioctx->s_rw_ctx)
864 kfree(ioctx->rw_ctxs);
865 return ret;
866}
867
868static void srpt_free_rw_ctxs(struct srpt_rdma_ch *ch,
869 struct srpt_send_ioctx *ioctx)
870{
871 enum dma_data_direction dir = target_reverse_dma_direction(&ioctx->cmd);
872 int i;
873
874 for (i = 0; i < ioctx->n_rw_ctx; i++) {
875 struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
876
877 rdma_rw_ctx_destroy(&ctx->rw, ch->qp, ch->sport->port,
878 ctx->sg, ctx->nents, dir);
879 target_free_sgl(ctx->sg, ctx->nents);
880 }
881
882 if (ioctx->rw_ctxs != &ioctx->s_rw_ctx)
883 kfree(ioctx->rw_ctxs);
884}
885
886static inline void *srpt_get_desc_buf(struct srp_cmd *srp_cmd)
887{
888 /*
889 * The pointer computations below will only be compiled correctly
890 * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check
891 * whether srp_cmd::add_data has been declared as a byte pointer.
892 */
893 BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0) &&
894 !__same_type(srp_cmd->add_data[0], (u8)0));
895
896 /*
897 * According to the SRP spec, the lower two bits of the 'ADDITIONAL
898 * CDB LENGTH' field are reserved and the size in bytes of this field
899 * is four times the value specified in bits 3..7. Hence the "& ~3".
900 */
901 return srp_cmd->add_data + (srp_cmd->add_cdb_len & ~3);
902}
903
846/** 904/**
847 * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request. 905 * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request.
848 * @ioctx: Pointer to the I/O context associated with the request. 906 * @ioctx: Pointer to the I/O context associated with the request.
@@ -858,94 +916,59 @@ static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
858 * -ENOMEM when memory allocation fails and zero upon success. 916 * -ENOMEM when memory allocation fails and zero upon success.
859 */ 917 */
860static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, 918static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
861 struct srp_cmd *srp_cmd, 919 struct srp_cmd *srp_cmd, enum dma_data_direction *dir,
862 enum dma_data_direction *dir, u64 *data_len) 920 struct scatterlist **sg, unsigned *sg_cnt, u64 *data_len)
863{ 921{
864 struct srp_indirect_buf *idb;
865 struct srp_direct_buf *db;
866 unsigned add_cdb_offset;
867 int ret;
868
869 /*
870 * The pointer computations below will only be compiled correctly
871 * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check
872 * whether srp_cmd::add_data has been declared as a byte pointer.
873 */
874 BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0)
875 && !__same_type(srp_cmd->add_data[0], (u8)0));
876
877 BUG_ON(!dir); 922 BUG_ON(!dir);
878 BUG_ON(!data_len); 923 BUG_ON(!data_len);
879 924
880 ret = 0;
881 *data_len = 0;
882
883 /* 925 /*
884 * The lower four bits of the buffer format field contain the DATA-IN 926 * The lower four bits of the buffer format field contain the DATA-IN
885 * buffer descriptor format, and the highest four bits contain the 927 * buffer descriptor format, and the highest four bits contain the
886 * DATA-OUT buffer descriptor format. 928 * DATA-OUT buffer descriptor format.
887 */ 929 */
888 *dir = DMA_NONE;
889 if (srp_cmd->buf_fmt & 0xf) 930 if (srp_cmd->buf_fmt & 0xf)
890 /* DATA-IN: transfer data from target to initiator (read). */ 931 /* DATA-IN: transfer data from target to initiator (read). */
891 *dir = DMA_FROM_DEVICE; 932 *dir = DMA_FROM_DEVICE;
892 else if (srp_cmd->buf_fmt >> 4) 933 else if (srp_cmd->buf_fmt >> 4)
893 /* DATA-OUT: transfer data from initiator to target (write). */ 934 /* DATA-OUT: transfer data from initiator to target (write). */
894 *dir = DMA_TO_DEVICE; 935 *dir = DMA_TO_DEVICE;
936 else
937 *dir = DMA_NONE;
938
939 /* initialize data_direction early as srpt_alloc_rw_ctxs needs it */
940 ioctx->cmd.data_direction = *dir;
895 941
896 /*
897 * According to the SRP spec, the lower two bits of the 'ADDITIONAL
898 * CDB LENGTH' field are reserved and the size in bytes of this field
899 * is four times the value specified in bits 3..7. Hence the "& ~3".
900 */
901 add_cdb_offset = srp_cmd->add_cdb_len & ~3;
902 if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) || 942 if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
903 ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) { 943 ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
904 ioctx->n_rbuf = 1; 944 struct srp_direct_buf *db = srpt_get_desc_buf(srp_cmd);
905 ioctx->rbufs = &ioctx->single_rbuf;
906 945
907 db = (struct srp_direct_buf *)(srp_cmd->add_data
908 + add_cdb_offset);
909 memcpy(ioctx->rbufs, db, sizeof(*db));
910 *data_len = be32_to_cpu(db->len); 946 *data_len = be32_to_cpu(db->len);
947 return srpt_alloc_rw_ctxs(ioctx, db, 1, sg, sg_cnt);
911 } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) || 948 } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) ||
912 ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) { 949 ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) {
913 idb = (struct srp_indirect_buf *)(srp_cmd->add_data 950 struct srp_indirect_buf *idb = srpt_get_desc_buf(srp_cmd);
914 + add_cdb_offset); 951 int nbufs = be32_to_cpu(idb->table_desc.len) /
952 sizeof(struct srp_direct_buf);
915 953
916 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof(*db); 954 if (nbufs >
917
918 if (ioctx->n_rbuf >
919 (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) { 955 (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
920 pr_err("received unsupported SRP_CMD request" 956 pr_err("received unsupported SRP_CMD request"
921 " type (%u out + %u in != %u / %zu)\n", 957 " type (%u out + %u in != %u / %zu)\n",
922 srp_cmd->data_out_desc_cnt, 958 srp_cmd->data_out_desc_cnt,
923 srp_cmd->data_in_desc_cnt, 959 srp_cmd->data_in_desc_cnt,
924 be32_to_cpu(idb->table_desc.len), 960 be32_to_cpu(idb->table_desc.len),
925 sizeof(*db)); 961 sizeof(struct srp_direct_buf));
926 ioctx->n_rbuf = 0; 962 return -EINVAL;
927 ret = -EINVAL;
928 goto out;
929 }
930
931 if (ioctx->n_rbuf == 1)
932 ioctx->rbufs = &ioctx->single_rbuf;
933 else {
934 ioctx->rbufs =
935 kmalloc(ioctx->n_rbuf * sizeof(*db), GFP_ATOMIC);
936 if (!ioctx->rbufs) {
937 ioctx->n_rbuf = 0;
938 ret = -ENOMEM;
939 goto out;
940 }
941 } 963 }
942 964
943 db = idb->desc_list;
944 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof(*db));
945 *data_len = be32_to_cpu(idb->len); 965 *data_len = be32_to_cpu(idb->len);
966 return srpt_alloc_rw_ctxs(ioctx, idb->desc_list, nbufs,
967 sg, sg_cnt);
968 } else {
969 *data_len = 0;
970 return 0;
946 } 971 }
947out:
948 return ret;
949} 972}
950 973
951/** 974/**
@@ -1049,217 +1072,6 @@ static int srpt_ch_qp_err(struct srpt_rdma_ch *ch)
1049} 1072}
1050 1073
1051/** 1074/**
1052 * srpt_unmap_sg_to_ib_sge() - Unmap an IB SGE list.
1053 */
1054static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1055 struct srpt_send_ioctx *ioctx)
1056{
1057 struct scatterlist *sg;
1058 enum dma_data_direction dir;
1059
1060 BUG_ON(!ch);
1061 BUG_ON(!ioctx);
1062 BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs);
1063
1064 while (ioctx->n_rdma)
1065 kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list);
1066
1067 kfree(ioctx->rdma_wrs);
1068 ioctx->rdma_wrs = NULL;
1069
1070 if (ioctx->mapped_sg_count) {
1071 sg = ioctx->sg;
1072 WARN_ON(!sg);
1073 dir = ioctx->cmd.data_direction;
1074 BUG_ON(dir == DMA_NONE);
1075 ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt,
1076 target_reverse_dma_direction(&ioctx->cmd));
1077 ioctx->mapped_sg_count = 0;
1078 }
1079}
1080
1081/**
1082 * srpt_map_sg_to_ib_sge() - Map an SG list to an IB SGE list.
1083 */
1084static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1085 struct srpt_send_ioctx *ioctx)
1086{
1087 struct ib_device *dev = ch->sport->sdev->device;
1088 struct se_cmd *cmd;
1089 struct scatterlist *sg, *sg_orig;
1090 int sg_cnt;
1091 enum dma_data_direction dir;
1092 struct ib_rdma_wr *riu;
1093 struct srp_direct_buf *db;
1094 dma_addr_t dma_addr;
1095 struct ib_sge *sge;
1096 u64 raddr;
1097 u32 rsize;
1098 u32 tsize;
1099 u32 dma_len;
1100 int count, nrdma;
1101 int i, j, k;
1102
1103 BUG_ON(!ch);
1104 BUG_ON(!ioctx);
1105 cmd = &ioctx->cmd;
1106 dir = cmd->data_direction;
1107 BUG_ON(dir == DMA_NONE);
1108
1109 ioctx->sg = sg = sg_orig = cmd->t_data_sg;
1110 ioctx->sg_cnt = sg_cnt = cmd->t_data_nents;
1111
1112 count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt,
1113 target_reverse_dma_direction(cmd));
1114 if (unlikely(!count))
1115 return -EAGAIN;
1116
1117 ioctx->mapped_sg_count = count;
1118
1119 if (ioctx->rdma_wrs && ioctx->n_rdma_wrs)
1120 nrdma = ioctx->n_rdma_wrs;
1121 else {
1122 nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE
1123 + ioctx->n_rbuf;
1124
1125 ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs),
1126 GFP_KERNEL);
1127 if (!ioctx->rdma_wrs)
1128 goto free_mem;
1129
1130 ioctx->n_rdma_wrs = nrdma;
1131 }
1132
1133 db = ioctx->rbufs;
1134 tsize = cmd->data_length;
1135 dma_len = ib_sg_dma_len(dev, &sg[0]);
1136 riu = ioctx->rdma_wrs;
1137
1138 /*
1139 * For each remote desc - calculate the #ib_sge.
1140 * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
1141 * each remote desc rdma_iu is required a rdma wr;
1142 * else
1143 * we need to allocate extra rdma_iu to carry extra #ib_sge in
1144 * another rdma wr
1145 */
1146 for (i = 0, j = 0;
1147 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1148 rsize = be32_to_cpu(db->len);
1149 raddr = be64_to_cpu(db->va);
1150 riu->remote_addr = raddr;
1151 riu->rkey = be32_to_cpu(db->key);
1152 riu->wr.num_sge = 0;
1153
1154 /* calculate how many sge required for this remote_buf */
1155 while (rsize > 0 && tsize > 0) {
1156
1157 if (rsize >= dma_len) {
1158 tsize -= dma_len;
1159 rsize -= dma_len;
1160 raddr += dma_len;
1161
1162 if (tsize > 0) {
1163 ++j;
1164 if (j < count) {
1165 sg = sg_next(sg);
1166 dma_len = ib_sg_dma_len(
1167 dev, sg);
1168 }
1169 }
1170 } else {
1171 tsize -= rsize;
1172 dma_len -= rsize;
1173 rsize = 0;
1174 }
1175
1176 ++riu->wr.num_sge;
1177
1178 if (rsize > 0 &&
1179 riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) {
1180 ++ioctx->n_rdma;
1181 riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
1182 sizeof(*riu->wr.sg_list),
1183 GFP_KERNEL);
1184 if (!riu->wr.sg_list)
1185 goto free_mem;
1186
1187 ++riu;
1188 riu->wr.num_sge = 0;
1189 riu->remote_addr = raddr;
1190 riu->rkey = be32_to_cpu(db->key);
1191 }
1192 }
1193
1194 ++ioctx->n_rdma;
1195 riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
1196 sizeof(*riu->wr.sg_list),
1197 GFP_KERNEL);
1198 if (!riu->wr.sg_list)
1199 goto free_mem;
1200 }
1201
1202 db = ioctx->rbufs;
1203 tsize = cmd->data_length;
1204 riu = ioctx->rdma_wrs;
1205 sg = sg_orig;
1206 dma_len = ib_sg_dma_len(dev, &sg[0]);
1207 dma_addr = ib_sg_dma_address(dev, &sg[0]);
1208
1209 /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
1210 for (i = 0, j = 0;
1211 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1212 rsize = be32_to_cpu(db->len);
1213 sge = riu->wr.sg_list;
1214 k = 0;
1215
1216 while (rsize > 0 && tsize > 0) {
1217 sge->addr = dma_addr;
1218 sge->lkey = ch->sport->sdev->pd->local_dma_lkey;
1219
1220 if (rsize >= dma_len) {
1221 sge->length =
1222 (tsize < dma_len) ? tsize : dma_len;
1223 tsize -= dma_len;
1224 rsize -= dma_len;
1225
1226 if (tsize > 0) {
1227 ++j;
1228 if (j < count) {
1229 sg = sg_next(sg);
1230 dma_len = ib_sg_dma_len(
1231 dev, sg);
1232 dma_addr = ib_sg_dma_address(
1233 dev, sg);
1234 }
1235 }
1236 } else {
1237 sge->length = (tsize < rsize) ? tsize : rsize;
1238 tsize -= rsize;
1239 dma_len -= rsize;
1240 dma_addr += rsize;
1241 rsize = 0;
1242 }
1243
1244 ++k;
1245 if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) {
1246 ++riu;
1247 sge = riu->wr.sg_list;
1248 k = 0;
1249 } else if (rsize > 0 && tsize > 0)
1250 ++sge;
1251 }
1252 }
1253
1254 return 0;
1255
1256free_mem:
1257 srpt_unmap_sg_to_ib_sge(ch, ioctx);
1258
1259 return -ENOMEM;
1260}
1261
1262/**
1263 * srpt_get_send_ioctx() - Obtain an I/O context for sending to the initiator. 1075 * srpt_get_send_ioctx() - Obtain an I/O context for sending to the initiator.
1264 */ 1076 */
1265static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch) 1077static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
@@ -1284,12 +1096,8 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
1284 BUG_ON(ioctx->ch != ch); 1096 BUG_ON(ioctx->ch != ch);
1285 spin_lock_init(&ioctx->spinlock); 1097 spin_lock_init(&ioctx->spinlock);
1286 ioctx->state = SRPT_STATE_NEW; 1098 ioctx->state = SRPT_STATE_NEW;
1287 ioctx->n_rbuf = 0;
1288 ioctx->rbufs = NULL;
1289 ioctx->n_rdma = 0; 1099 ioctx->n_rdma = 0;
1290 ioctx->n_rdma_wrs = 0; 1100 ioctx->n_rw_ctx = 0;
1291 ioctx->rdma_wrs = NULL;
1292 ioctx->mapped_sg_count = 0;
1293 init_completion(&ioctx->tx_done); 1101 init_completion(&ioctx->tx_done);
1294 ioctx->queue_status_only = false; 1102 ioctx->queue_status_only = false;
1295 /* 1103 /*
@@ -1359,7 +1167,6 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
1359 * SRP_RSP sending failed or the SRP_RSP send completion has 1167 * SRP_RSP sending failed or the SRP_RSP send completion has
1360 * not been received in time. 1168 * not been received in time.
1361 */ 1169 */
1362 srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx);
1363 transport_generic_free_cmd(&ioctx->cmd, 0); 1170 transport_generic_free_cmd(&ioctx->cmd, 0);
1364 break; 1171 break;
1365 case SRPT_STATE_MGMT_RSP_SENT: 1172 case SRPT_STATE_MGMT_RSP_SENT:
@@ -1387,6 +1194,7 @@ static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
1387 1194
1388 WARN_ON(ioctx->n_rdma <= 0); 1195 WARN_ON(ioctx->n_rdma <= 0);
1389 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); 1196 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
1197 ioctx->n_rdma = 0;
1390 1198
1391 if (unlikely(wc->status != IB_WC_SUCCESS)) { 1199 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1392 pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n", 1200 pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n",
@@ -1403,23 +1211,6 @@ static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
1403 __LINE__, srpt_get_cmd_state(ioctx)); 1211 __LINE__, srpt_get_cmd_state(ioctx));
1404} 1212}
1405 1213
1406static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
1407{
1408 struct srpt_send_ioctx *ioctx =
1409 container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
1410
1411 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1412 /*
1413 * Note: if an RDMA write error completion is received that
1414 * means that a SEND also has been posted. Defer further
1415 * processing of the associated command until the send error
1416 * completion has been received.
1417 */
1418 pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
1419 ioctx, wc->status);
1420 }
1421}
1422
1423/** 1214/**
1424 * srpt_build_cmd_rsp() - Build an SRP_RSP response. 1215 * srpt_build_cmd_rsp() - Build an SRP_RSP response.
1425 * @ch: RDMA channel through which the request has been received. 1216 * @ch: RDMA channel through which the request has been received.
@@ -1537,6 +1328,8 @@ static void srpt_handle_cmd(struct srpt_rdma_ch *ch,
1537{ 1328{
1538 struct se_cmd *cmd; 1329 struct se_cmd *cmd;
1539 struct srp_cmd *srp_cmd; 1330 struct srp_cmd *srp_cmd;
1331 struct scatterlist *sg = NULL;
1332 unsigned sg_cnt = 0;
1540 u64 data_len; 1333 u64 data_len;
1541 enum dma_data_direction dir; 1334 enum dma_data_direction dir;
1542 int rc; 1335 int rc;
@@ -1563,16 +1356,21 @@ static void srpt_handle_cmd(struct srpt_rdma_ch *ch,
1563 break; 1356 break;
1564 } 1357 }
1565 1358
1566 if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) { 1359 rc = srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &sg, &sg_cnt,
1567 pr_err("0x%llx: parsing SRP descriptor table failed.\n", 1360 &data_len);
1568 srp_cmd->tag); 1361 if (rc) {
1362 if (rc != -EAGAIN) {
1363 pr_err("0x%llx: parsing SRP descriptor table failed.\n",
1364 srp_cmd->tag);
1365 }
1569 goto release_ioctx; 1366 goto release_ioctx;
1570 } 1367 }
1571 1368
1572 rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb, 1369 rc = target_submit_cmd_map_sgls(cmd, ch->sess, srp_cmd->cdb,
1573 &send_ioctx->sense_data[0], 1370 &send_ioctx->sense_data[0],
1574 scsilun_to_int(&srp_cmd->lun), data_len, 1371 scsilun_to_int(&srp_cmd->lun), data_len,
1575 TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF); 1372 TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF,
1373 sg, sg_cnt, NULL, 0, NULL, 0);
1576 if (rc != 0) { 1374 if (rc != 0) {
1577 pr_debug("target_submit_cmd() returned %d for tag %#llx\n", rc, 1375 pr_debug("target_submit_cmd() returned %d for tag %#llx\n", rc,
1578 srp_cmd->tag); 1376 srp_cmd->tag);
@@ -1664,23 +1462,21 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1664 recv_ioctx->ioctx.dma, srp_max_req_size, 1462 recv_ioctx->ioctx.dma, srp_max_req_size,
1665 DMA_FROM_DEVICE); 1463 DMA_FROM_DEVICE);
1666 1464
1667 if (unlikely(ch->state == CH_CONNECTING)) { 1465 if (unlikely(ch->state == CH_CONNECTING))
1668 list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list); 1466 goto out_wait;
1669 goto out;
1670 }
1671 1467
1672 if (unlikely(ch->state != CH_LIVE)) 1468 if (unlikely(ch->state != CH_LIVE))
1673 goto out; 1469 return;
1674 1470
1675 srp_cmd = recv_ioctx->ioctx.buf; 1471 srp_cmd = recv_ioctx->ioctx.buf;
1676 if (srp_cmd->opcode == SRP_CMD || srp_cmd->opcode == SRP_TSK_MGMT) { 1472 if (srp_cmd->opcode == SRP_CMD || srp_cmd->opcode == SRP_TSK_MGMT) {
1677 if (!send_ioctx) 1473 if (!send_ioctx) {
1474 if (!list_empty(&ch->cmd_wait_list))
1475 goto out_wait;
1678 send_ioctx = srpt_get_send_ioctx(ch); 1476 send_ioctx = srpt_get_send_ioctx(ch);
1679 if (unlikely(!send_ioctx)) {
1680 list_add_tail(&recv_ioctx->wait_list,
1681 &ch->cmd_wait_list);
1682 goto out;
1683 } 1477 }
1478 if (unlikely(!send_ioctx))
1479 goto out_wait;
1684 } 1480 }
1685 1481
1686 switch (srp_cmd->opcode) { 1482 switch (srp_cmd->opcode) {
@@ -1709,8 +1505,10 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1709 } 1505 }
1710 1506
1711 srpt_post_recv(ch->sport->sdev, recv_ioctx); 1507 srpt_post_recv(ch->sport->sdev, recv_ioctx);
1712out:
1713 return; 1508 return;
1509
1510out_wait:
1511 list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list);
1714} 1512}
1715 1513
1716static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc) 1514static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1779,14 +1577,13 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
1779 WARN_ON(state != SRPT_STATE_CMD_RSP_SENT && 1577 WARN_ON(state != SRPT_STATE_CMD_RSP_SENT &&
1780 state != SRPT_STATE_MGMT_RSP_SENT); 1578 state != SRPT_STATE_MGMT_RSP_SENT);
1781 1579
1782 atomic_inc(&ch->sq_wr_avail); 1580 atomic_add(1 + ioctx->n_rdma, &ch->sq_wr_avail);
1783 1581
1784 if (wc->status != IB_WC_SUCCESS) 1582 if (wc->status != IB_WC_SUCCESS)
1785 pr_info("sending response for ioctx 0x%p failed" 1583 pr_info("sending response for ioctx 0x%p failed"
1786 " with status %d\n", ioctx, wc->status); 1584 " with status %d\n", ioctx, wc->status);
1787 1585
1788 if (state != SRPT_STATE_DONE) { 1586 if (state != SRPT_STATE_DONE) {
1789 srpt_unmap_sg_to_ib_sge(ch, ioctx);
1790 transport_generic_free_cmd(&ioctx->cmd, 0); 1587 transport_generic_free_cmd(&ioctx->cmd, 0);
1791 } else { 1588 } else {
1792 pr_err("IB completion has been received too late for" 1589 pr_err("IB completion has been received too late for"
@@ -1832,8 +1629,18 @@ retry:
1832 qp_init->srq = sdev->srq; 1629 qp_init->srq = sdev->srq;
1833 qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; 1630 qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1834 qp_init->qp_type = IB_QPT_RC; 1631 qp_init->qp_type = IB_QPT_RC;
1835 qp_init->cap.max_send_wr = srp_sq_size; 1632 /*
1836 qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE; 1633 * We divide up our send queue size into half SEND WRs to send the
1634 * completions, and half R/W contexts to actually do the RDMA
1635 * READ/WRITE transfers. Note that we need to allocate CQ slots for
1636 * both both, as RDMA contexts will also post completions for the
1637 * RDMA READ case.
1638 */
1639 qp_init->cap.max_send_wr = srp_sq_size / 2;
1640 qp_init->cap.max_rdma_ctxs = srp_sq_size / 2;
1641 qp_init->cap.max_send_sge = max(sdev->device->attrs.max_sge_rd,
1642 sdev->device->attrs.max_sge);
1643 qp_init->port_num = ch->sport->port;
1837 1644
1838 ch->qp = ib_create_qp(sdev->pd, qp_init); 1645 ch->qp = ib_create_qp(sdev->pd, qp_init);
1839 if (IS_ERR(ch->qp)) { 1646 if (IS_ERR(ch->qp)) {
@@ -2386,95 +2193,6 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2386 return ret; 2193 return ret;
2387} 2194}
2388 2195
2389/**
2390 * srpt_perform_rdmas() - Perform IB RDMA.
2391 *
2392 * Returns zero upon success or a negative number upon failure.
2393 */
2394static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
2395 struct srpt_send_ioctx *ioctx)
2396{
2397 struct ib_send_wr *bad_wr;
2398 int sq_wr_avail, ret, i;
2399 enum dma_data_direction dir;
2400 const int n_rdma = ioctx->n_rdma;
2401
2402 dir = ioctx->cmd.data_direction;
2403 if (dir == DMA_TO_DEVICE) {
2404 /* write */
2405 ret = -ENOMEM;
2406 sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail);
2407 if (sq_wr_avail < 0) {
2408 pr_warn("IB send queue full (needed %d)\n",
2409 n_rdma);
2410 goto out;
2411 }
2412 }
2413
2414 for (i = 0; i < n_rdma; i++) {
2415 struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr;
2416
2417 wr->opcode = (dir == DMA_FROM_DEVICE) ?
2418 IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
2419
2420 if (i == n_rdma - 1) {
2421 /* only get completion event for the last rdma read */
2422 if (dir == DMA_TO_DEVICE) {
2423 wr->send_flags = IB_SEND_SIGNALED;
2424 ioctx->rdma_cqe.done = srpt_rdma_read_done;
2425 } else {
2426 ioctx->rdma_cqe.done = srpt_rdma_write_done;
2427 }
2428 wr->wr_cqe = &ioctx->rdma_cqe;
2429 wr->next = NULL;
2430 } else {
2431 wr->wr_cqe = NULL;
2432 wr->next = &ioctx->rdma_wrs[i + 1].wr;
2433 }
2434 }
2435
2436 ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr);
2437 if (ret)
2438 pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
2439 __func__, __LINE__, ret, i, n_rdma);
2440out:
2441 if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
2442 atomic_add(n_rdma, &ch->sq_wr_avail);
2443 return ret;
2444}
2445
2446/**
2447 * srpt_xfer_data() - Start data transfer from initiator to target.
2448 */
2449static int srpt_xfer_data(struct srpt_rdma_ch *ch,
2450 struct srpt_send_ioctx *ioctx)
2451{
2452 int ret;
2453
2454 ret = srpt_map_sg_to_ib_sge(ch, ioctx);
2455 if (ret) {
2456 pr_err("%s[%d] ret=%d\n", __func__, __LINE__, ret);
2457 goto out;
2458 }
2459
2460 ret = srpt_perform_rdmas(ch, ioctx);
2461 if (ret) {
2462 if (ret == -EAGAIN || ret == -ENOMEM)
2463 pr_info("%s[%d] queue full -- ret=%d\n",
2464 __func__, __LINE__, ret);
2465 else
2466 pr_err("%s[%d] fatal error -- ret=%d\n",
2467 __func__, __LINE__, ret);
2468 goto out_unmap;
2469 }
2470
2471out:
2472 return ret;
2473out_unmap:
2474 srpt_unmap_sg_to_ib_sge(ch, ioctx);
2475 goto out;
2476}
2477
2478static int srpt_write_pending_status(struct se_cmd *se_cmd) 2196static int srpt_write_pending_status(struct se_cmd *se_cmd)
2479{ 2197{
2480 struct srpt_send_ioctx *ioctx; 2198 struct srpt_send_ioctx *ioctx;
@@ -2491,11 +2209,42 @@ static int srpt_write_pending(struct se_cmd *se_cmd)
2491 struct srpt_send_ioctx *ioctx = 2209 struct srpt_send_ioctx *ioctx =
2492 container_of(se_cmd, struct srpt_send_ioctx, cmd); 2210 container_of(se_cmd, struct srpt_send_ioctx, cmd);
2493 struct srpt_rdma_ch *ch = ioctx->ch; 2211 struct srpt_rdma_ch *ch = ioctx->ch;
2212 struct ib_send_wr *first_wr = NULL, *bad_wr;
2213 struct ib_cqe *cqe = &ioctx->rdma_cqe;
2494 enum srpt_command_state new_state; 2214 enum srpt_command_state new_state;
2215 int ret, i;
2495 2216
2496 new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA); 2217 new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA);
2497 WARN_ON(new_state == SRPT_STATE_DONE); 2218 WARN_ON(new_state == SRPT_STATE_DONE);
2498 return srpt_xfer_data(ch, ioctx); 2219
2220 if (atomic_sub_return(ioctx->n_rdma, &ch->sq_wr_avail) < 0) {
2221 pr_warn("%s: IB send queue full (needed %d)\n",
2222 __func__, ioctx->n_rdma);
2223 ret = -ENOMEM;
2224 goto out_undo;
2225 }
2226
2227 cqe->done = srpt_rdma_read_done;
2228 for (i = ioctx->n_rw_ctx - 1; i >= 0; i--) {
2229 struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
2230
2231 first_wr = rdma_rw_ctx_wrs(&ctx->rw, ch->qp, ch->sport->port,
2232 cqe, first_wr);
2233 cqe = NULL;
2234 }
2235
2236 ret = ib_post_send(ch->qp, first_wr, &bad_wr);
2237 if (ret) {
2238 pr_err("%s: ib_post_send() returned %d for %d (avail: %d)\n",
2239 __func__, ret, ioctx->n_rdma,
2240 atomic_read(&ch->sq_wr_avail));
2241 goto out_undo;
2242 }
2243
2244 return 0;
2245out_undo:
2246 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
2247 return ret;
2499} 2248}
2500 2249
2501static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status) 2250static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status)
@@ -2517,17 +2266,17 @@ static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status)
2517 */ 2266 */
2518static void srpt_queue_response(struct se_cmd *cmd) 2267static void srpt_queue_response(struct se_cmd *cmd)
2519{ 2268{
2520 struct srpt_rdma_ch *ch; 2269 struct srpt_send_ioctx *ioctx =
2521 struct srpt_send_ioctx *ioctx; 2270 container_of(cmd, struct srpt_send_ioctx, cmd);
2271 struct srpt_rdma_ch *ch = ioctx->ch;
2272 struct srpt_device *sdev = ch->sport->sdev;
2273 struct ib_send_wr send_wr, *first_wr = NULL, *bad_wr;
2274 struct ib_sge sge;
2522 enum srpt_command_state state; 2275 enum srpt_command_state state;
2523 unsigned long flags; 2276 unsigned long flags;
2524 int ret; 2277 int resp_len, ret, i;
2525 enum dma_data_direction dir;
2526 int resp_len;
2527 u8 srp_tm_status; 2278 u8 srp_tm_status;
2528 2279
2529 ioctx = container_of(cmd, struct srpt_send_ioctx, cmd);
2530 ch = ioctx->ch;
2531 BUG_ON(!ch); 2280 BUG_ON(!ch);
2532 2281
2533 spin_lock_irqsave(&ioctx->spinlock, flags); 2282 spin_lock_irqsave(&ioctx->spinlock, flags);
@@ -2554,17 +2303,19 @@ static void srpt_queue_response(struct se_cmd *cmd)
2554 return; 2303 return;
2555 } 2304 }
2556 2305
2557 dir = ioctx->cmd.data_direction;
2558
2559 /* For read commands, transfer the data to the initiator. */ 2306 /* For read commands, transfer the data to the initiator. */
2560 if (dir == DMA_FROM_DEVICE && ioctx->cmd.data_length && 2307 if (ioctx->cmd.data_direction == DMA_FROM_DEVICE &&
2308 ioctx->cmd.data_length &&
2561 !ioctx->queue_status_only) { 2309 !ioctx->queue_status_only) {
2562 ret = srpt_xfer_data(ch, ioctx); 2310 for (i = ioctx->n_rw_ctx - 1; i >= 0; i--) {
2563 if (ret) { 2311 struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
2564 pr_err("xfer_data failed for tag %llu\n", 2312
2565 ioctx->cmd.tag); 2313 first_wr = rdma_rw_ctx_wrs(&ctx->rw, ch->qp,
2566 return; 2314 ch->sport->port, NULL,
2315 first_wr ? first_wr : &send_wr);
2567 } 2316 }
2317 } else {
2318 first_wr = &send_wr;
2568 } 2319 }
2569 2320
2570 if (state != SRPT_STATE_MGMT) 2321 if (state != SRPT_STATE_MGMT)
@@ -2576,14 +2327,46 @@ static void srpt_queue_response(struct se_cmd *cmd)
2576 resp_len = srpt_build_tskmgmt_rsp(ch, ioctx, srp_tm_status, 2327 resp_len = srpt_build_tskmgmt_rsp(ch, ioctx, srp_tm_status,
2577 ioctx->cmd.tag); 2328 ioctx->cmd.tag);
2578 } 2329 }
2579 ret = srpt_post_send(ch, ioctx, resp_len); 2330
2580 if (ret) { 2331 atomic_inc(&ch->req_lim);
2581 pr_err("sending cmd response failed for tag %llu\n", 2332
2582 ioctx->cmd.tag); 2333 if (unlikely(atomic_sub_return(1 + ioctx->n_rdma,
2583 srpt_unmap_sg_to_ib_sge(ch, ioctx); 2334 &ch->sq_wr_avail) < 0)) {
2584 srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); 2335 pr_warn("%s: IB send queue full (needed %d)\n",
2585 target_put_sess_cmd(&ioctx->cmd); 2336 __func__, ioctx->n_rdma);
2337 ret = -ENOMEM;
2338 goto out;
2339 }
2340
2341 ib_dma_sync_single_for_device(sdev->device, ioctx->ioctx.dma, resp_len,
2342 DMA_TO_DEVICE);
2343
2344 sge.addr = ioctx->ioctx.dma;
2345 sge.length = resp_len;
2346 sge.lkey = sdev->pd->local_dma_lkey;
2347
2348 ioctx->ioctx.cqe.done = srpt_send_done;
2349 send_wr.next = NULL;
2350 send_wr.wr_cqe = &ioctx->ioctx.cqe;
2351 send_wr.sg_list = &sge;
2352 send_wr.num_sge = 1;
2353 send_wr.opcode = IB_WR_SEND;
2354 send_wr.send_flags = IB_SEND_SIGNALED;
2355
2356 ret = ib_post_send(ch->qp, first_wr, &bad_wr);
2357 if (ret < 0) {
2358 pr_err("%s: sending cmd response failed for tag %llu (%d)\n",
2359 __func__, ioctx->cmd.tag, ret);
2360 goto out;
2586 } 2361 }
2362
2363 return;
2364
2365out:
2366 atomic_add(1 + ioctx->n_rdma, &ch->sq_wr_avail);
2367 atomic_dec(&ch->req_lim);
2368 srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
2369 target_put_sess_cmd(&ioctx->cmd);
2587} 2370}
2588 2371
2589static int srpt_queue_data_in(struct se_cmd *cmd) 2372static int srpt_queue_data_in(struct se_cmd *cmd)
@@ -2599,10 +2382,6 @@ static void srpt_queue_tm_rsp(struct se_cmd *cmd)
2599 2382
2600static void srpt_aborted_task(struct se_cmd *cmd) 2383static void srpt_aborted_task(struct se_cmd *cmd)
2601{ 2384{
2602 struct srpt_send_ioctx *ioctx = container_of(cmd,
2603 struct srpt_send_ioctx, cmd);
2604
2605 srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx);
2606} 2385}
2607 2386
2608static int srpt_queue_status(struct se_cmd *cmd) 2387static int srpt_queue_status(struct se_cmd *cmd)
@@ -2903,12 +2682,10 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
2903 unsigned long flags; 2682 unsigned long flags;
2904 2683
2905 WARN_ON(ioctx->state != SRPT_STATE_DONE); 2684 WARN_ON(ioctx->state != SRPT_STATE_DONE);
2906 WARN_ON(ioctx->mapped_sg_count != 0);
2907 2685
2908 if (ioctx->n_rbuf > 1) { 2686 if (ioctx->n_rw_ctx) {
2909 kfree(ioctx->rbufs); 2687 srpt_free_rw_ctxs(ch, ioctx);
2910 ioctx->rbufs = NULL; 2688 ioctx->n_rw_ctx = 0;
2911 ioctx->n_rbuf = 0;
2912 } 2689 }
2913 2690
2914 spin_lock_irqsave(&ch->spinlock, flags); 2691 spin_lock_irqsave(&ch->spinlock, flags);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index af9b8b527340..fee6bfd7ca21 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -42,6 +42,7 @@
42#include <rdma/ib_verbs.h> 42#include <rdma/ib_verbs.h>
43#include <rdma/ib_sa.h> 43#include <rdma/ib_sa.h>
44#include <rdma/ib_cm.h> 44#include <rdma/ib_cm.h>
45#include <rdma/rw.h>
45 46
46#include <scsi/srp.h> 47#include <scsi/srp.h>
47 48
@@ -105,7 +106,6 @@ enum {
105 SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2, 106 SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2,
106 107
107 SRPT_DEF_SG_TABLESIZE = 128, 108 SRPT_DEF_SG_TABLESIZE = 128,
108 SRPT_DEF_SG_PER_WQE = 16,
109 109
110 MIN_SRPT_SQ_SIZE = 16, 110 MIN_SRPT_SQ_SIZE = 16,
111 DEF_SRPT_SQ_SIZE = 4096, 111 DEF_SRPT_SQ_SIZE = 4096,
@@ -174,21 +174,17 @@ struct srpt_recv_ioctx {
174 struct srpt_ioctx ioctx; 174 struct srpt_ioctx ioctx;
175 struct list_head wait_list; 175 struct list_head wait_list;
176}; 176};
177
178struct srpt_rw_ctx {
179 struct rdma_rw_ctx rw;
180 struct scatterlist *sg;
181 unsigned int nents;
182};
177 183
178/** 184/**
179 * struct srpt_send_ioctx - SRPT send I/O context. 185 * struct srpt_send_ioctx - SRPT send I/O context.
180 * @ioctx: See above. 186 * @ioctx: See above.
181 * @ch: Channel pointer. 187 * @ch: Channel pointer.
182 * @free_list: Node in srpt_rdma_ch.free_list.
183 * @n_rbuf: Number of data buffers in the received SRP command.
184 * @rbufs: Pointer to SRP data buffer array.
185 * @single_rbuf: SRP data buffer if the command has only a single buffer.
186 * @sg: Pointer to sg-list associated with this I/O context.
187 * @sg_cnt: SG-list size.
188 * @mapped_sg_count: ib_dma_map_sg() return value.
189 * @n_rdma_wrs: Number of elements in the rdma_wrs array.
190 * @rdma_wrs: Array with information about the RDMA mapping.
191 * @tag: Tag of the received SRP information unit.
192 * @spinlock: Protects 'state'. 188 * @spinlock: Protects 'state'.
193 * @state: I/O context state. 189 * @state: I/O context state.
194 * @cmd: Target core command data structure. 190 * @cmd: Target core command data structure.
@@ -197,21 +193,18 @@ struct srpt_recv_ioctx {
197struct srpt_send_ioctx { 193struct srpt_send_ioctx {
198 struct srpt_ioctx ioctx; 194 struct srpt_ioctx ioctx;
199 struct srpt_rdma_ch *ch; 195 struct srpt_rdma_ch *ch;
200 struct ib_rdma_wr *rdma_wrs; 196
197 struct srpt_rw_ctx s_rw_ctx;
198 struct srpt_rw_ctx *rw_ctxs;
199
201 struct ib_cqe rdma_cqe; 200 struct ib_cqe rdma_cqe;
202 struct srp_direct_buf *rbufs;
203 struct srp_direct_buf single_rbuf;
204 struct scatterlist *sg;
205 struct list_head free_list; 201 struct list_head free_list;
206 spinlock_t spinlock; 202 spinlock_t spinlock;
207 enum srpt_command_state state; 203 enum srpt_command_state state;
208 struct se_cmd cmd; 204 struct se_cmd cmd;
209 struct completion tx_done; 205 struct completion tx_done;
210 int sg_cnt;
211 int mapped_sg_count;
212 u16 n_rdma_wrs;
213 u8 n_rdma; 206 u8 n_rdma;
214 u8 n_rbuf; 207 u8 n_rw_ctx;
215 bool queue_status_only; 208 bool queue_status_only;
216 u8 sense_data[TRANSPORT_SENSE_BUFFER]; 209 u8 sense_data[TRANSPORT_SENSE_BUFFER];
217}; 210};
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index 80417fc564d4..4705e2dea423 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -1392,6 +1392,10 @@ struct ulp_mem_io {
1392#define T5_ULP_MEMIO_ORDER_V(x) ((x) << T5_ULP_MEMIO_ORDER_S) 1392#define T5_ULP_MEMIO_ORDER_V(x) ((x) << T5_ULP_MEMIO_ORDER_S)
1393#define T5_ULP_MEMIO_ORDER_F T5_ULP_MEMIO_ORDER_V(1U) 1393#define T5_ULP_MEMIO_ORDER_F T5_ULP_MEMIO_ORDER_V(1U)
1394 1394
1395#define T5_ULP_MEMIO_FID_S 4
1396#define T5_ULP_MEMIO_FID_M 0x7ff
1397#define T5_ULP_MEMIO_FID_V(x) ((x) << T5_ULP_MEMIO_FID_S)
1398
1395/* ulp_mem_io.lock_addr fields */ 1399/* ulp_mem_io.lock_addr fields */
1396#define ULP_MEMIO_ADDR_S 0 1400#define ULP_MEMIO_ADDR_S 0
1397#define ULP_MEMIO_ADDR_V(x) ((x) << ULP_MEMIO_ADDR_S) 1401#define ULP_MEMIO_ADDR_V(x) ((x) << ULP_MEMIO_ADDR_S)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index b51e42d6fbec..873a631ad155 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -39,6 +39,53 @@
39#include <linux/mlx5/cq.h> 39#include <linux/mlx5/cq.h>
40#include "mlx5_core.h" 40#include "mlx5_core.h"
41 41
42#define TASKLET_MAX_TIME 2
43#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
44
45void mlx5_cq_tasklet_cb(unsigned long data)
46{
47 unsigned long flags;
48 unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
49 struct mlx5_eq_tasklet *ctx = (struct mlx5_eq_tasklet *)data;
50 struct mlx5_core_cq *mcq;
51 struct mlx5_core_cq *temp;
52
53 spin_lock_irqsave(&ctx->lock, flags);
54 list_splice_tail_init(&ctx->list, &ctx->process_list);
55 spin_unlock_irqrestore(&ctx->lock, flags);
56
57 list_for_each_entry_safe(mcq, temp, &ctx->process_list,
58 tasklet_ctx.list) {
59 list_del_init(&mcq->tasklet_ctx.list);
60 mcq->tasklet_ctx.comp(mcq);
61 if (atomic_dec_and_test(&mcq->refcount))
62 complete(&mcq->free);
63 if (time_after(jiffies, end))
64 break;
65 }
66
67 if (!list_empty(&ctx->process_list))
68 tasklet_schedule(&ctx->task);
69}
70
71static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
72{
73 unsigned long flags;
74 struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
75
76 spin_lock_irqsave(&tasklet_ctx->lock, flags);
77 /* When migrating CQs between EQs will be implemented, please note
78 * that you need to sync this point. It is possible that
79 * while migrating a CQ, completions on the old EQs could
80 * still arrive.
81 */
82 if (list_empty_careful(&cq->tasklet_ctx.list)) {
83 atomic_inc(&cq->refcount);
84 list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
85 }
86 spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
87}
88
42void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn) 89void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
43{ 90{
44 struct mlx5_core_cq *cq; 91 struct mlx5_core_cq *cq;
@@ -96,6 +143,13 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
96 struct mlx5_create_cq_mbox_out out; 143 struct mlx5_create_cq_mbox_out out;
97 struct mlx5_destroy_cq_mbox_in din; 144 struct mlx5_destroy_cq_mbox_in din;
98 struct mlx5_destroy_cq_mbox_out dout; 145 struct mlx5_destroy_cq_mbox_out dout;
146 int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
147 c_eqn);
148 struct mlx5_eq *eq;
149
150 eq = mlx5_eqn2eq(dev, eqn);
151 if (IS_ERR(eq))
152 return PTR_ERR(eq);
99 153
100 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ); 154 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ);
101 memset(&out, 0, sizeof(out)); 155 memset(&out, 0, sizeof(out));
@@ -111,6 +165,11 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
111 cq->arm_sn = 0; 165 cq->arm_sn = 0;
112 atomic_set(&cq->refcount, 1); 166 atomic_set(&cq->refcount, 1);
113 init_completion(&cq->free); 167 init_completion(&cq->free);
168 if (!cq->comp)
169 cq->comp = mlx5_add_cq_to_tasklet;
170 /* assuming CQ will be deleted before the EQ */
171 cq->tasklet_ctx.priv = &eq->tasklet_ctx;
172 INIT_LIST_HEAD(&cq->tasklet_ctx.list);
114 173
115 spin_lock_irq(&table->lock); 174 spin_lock_irq(&table->lock);
116 err = radix_tree_insert(&table->tree, cq->cqn, cq); 175 err = radix_tree_insert(&table->tree, cq->cqn, cq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 18fccec72c5d..0e30602ef76d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -202,7 +202,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
202 struct mlx5_eqe *eqe; 202 struct mlx5_eqe *eqe;
203 int eqes_found = 0; 203 int eqes_found = 0;
204 int set_ci = 0; 204 int set_ci = 0;
205 u32 cqn; 205 u32 cqn = -1;
206 u32 rsn; 206 u32 rsn;
207 u8 port; 207 u8 port;
208 208
@@ -320,6 +320,9 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
320 320
321 eq_update_ci(eq, 1); 321 eq_update_ci(eq, 1);
322 322
323 if (cqn != -1)
324 tasklet_schedule(&eq->tasklet_ctx.task);
325
323 return eqes_found; 326 return eqes_found;
324} 327}
325 328
@@ -403,6 +406,12 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
403 if (err) 406 if (err)
404 goto err_irq; 407 goto err_irq;
405 408
409 INIT_LIST_HEAD(&eq->tasklet_ctx.list);
410 INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
411 spin_lock_init(&eq->tasklet_ctx.lock);
412 tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
413 (unsigned long)&eq->tasklet_ctx);
414
406 /* EQs are created in ARMED state 415 /* EQs are created in ARMED state
407 */ 416 */
408 eq_update_ci(eq, 1); 417 eq_update_ci(eq, 1);
@@ -436,6 +445,7 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
436 mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", 445 mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
437 eq->eqn); 446 eq->eqn);
438 synchronize_irq(eq->irqn); 447 synchronize_irq(eq->irqn);
448 tasklet_disable(&eq->tasklet_ctx.task);
439 mlx5_buf_free(dev, &eq->buf); 449 mlx5_buf_free(dev, &eq->buf);
440 450
441 return err; 451 return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 6feef7fb9d6a..a19b59348dd6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -663,6 +663,23 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
663} 663}
664EXPORT_SYMBOL(mlx5_vector2eqn); 664EXPORT_SYMBOL(mlx5_vector2eqn);
665 665
666struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn)
667{
668 struct mlx5_eq_table *table = &dev->priv.eq_table;
669 struct mlx5_eq *eq;
670
671 spin_lock(&table->lock);
672 list_for_each_entry(eq, &table->comp_eqs_list, list)
673 if (eq->eqn == eqn) {
674 spin_unlock(&table->lock);
675 return eq;
676 }
677
678 spin_unlock(&table->lock);
679
680 return ERR_PTR(-ENOENT);
681}
682
666static void free_comp_eqs(struct mlx5_core_dev *dev) 683static void free_comp_eqs(struct mlx5_core_dev *dev)
667{ 684{
668 struct mlx5_eq_table *table = &dev->priv.eq_table; 685 struct mlx5_eq_table *table = &dev->priv.eq_table;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 482604bd051c..2f86ec6fcf25 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -102,6 +102,8 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
102int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); 102int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
103cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev); 103cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev);
104u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx); 104u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx);
105struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
106void mlx5_cq_tasklet_cb(unsigned long data);
105 107
106void mlx5e_init(void); 108void mlx5e_init(void);
107void mlx5e_cleanup(void); 109void mlx5e_cleanup(void);
diff --git a/drivers/staging/rdma/hfi1/affinity.c b/drivers/staging/rdma/hfi1/affinity.c
index 2cb8ca77f876..6e7050ab9e16 100644
--- a/drivers/staging/rdma/hfi1/affinity.c
+++ b/drivers/staging/rdma/hfi1/affinity.c
@@ -53,20 +53,6 @@
53#include "sdma.h" 53#include "sdma.h"
54#include "trace.h" 54#include "trace.h"
55 55
56struct cpu_mask_set {
57 struct cpumask mask;
58 struct cpumask used;
59 uint gen;
60};
61
62struct hfi1_affinity {
63 struct cpu_mask_set def_intr;
64 struct cpu_mask_set rcv_intr;
65 struct cpu_mask_set proc;
66 /* spin lock to protect affinity struct */
67 spinlock_t lock;
68};
69
70/* Name of IRQ types, indexed by enum irq_type */ 56/* Name of IRQ types, indexed by enum irq_type */
71static const char * const irq_type_names[] = { 57static const char * const irq_type_names[] = {
72 "SDMA", 58 "SDMA",
@@ -82,6 +68,48 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set)
82 set->gen = 0; 68 set->gen = 0;
83} 69}
84 70
71/* Initialize non-HT cpu cores mask */
72int init_real_cpu_mask(struct hfi1_devdata *dd)
73{
74 struct hfi1_affinity *info;
75 int possible, curr_cpu, i, ht;
76
77 info = kzalloc(sizeof(*info), GFP_KERNEL);
78 if (!info)
79 return -ENOMEM;
80
81 cpumask_clear(&info->real_cpu_mask);
82
83 /* Start with cpu online mask as the real cpu mask */
84 cpumask_copy(&info->real_cpu_mask, cpu_online_mask);
85
86 /*
87 * Remove HT cores from the real cpu mask. Do this in two steps below.
88 */
89 possible = cpumask_weight(&info->real_cpu_mask);
90 ht = cpumask_weight(topology_sibling_cpumask(
91 cpumask_first(&info->real_cpu_mask)));
92 /*
93 * Step 1. Skip over the first N HT siblings and use them as the
94 * "real" cores. Assumes that HT cores are not enumerated in
95 * succession (except in the single core case).
96 */
97 curr_cpu = cpumask_first(&info->real_cpu_mask);
98 for (i = 0; i < possible / ht; i++)
99 curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
100 /*
101 * Step 2. Remove the remaining HT siblings. Use cpumask_next() to
102 * skip any gaps.
103 */
104 for (; i < possible; i++) {
105 cpumask_clear_cpu(curr_cpu, &info->real_cpu_mask);
106 curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
107 }
108
109 dd->affinity = info;
110 return 0;
111}
112
85/* 113/*
86 * Interrupt affinity. 114 * Interrupt affinity.
87 * 115 *
@@ -93,20 +121,17 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set)
93 * to the node relative 1 as necessary. 121 * to the node relative 1 as necessary.
94 * 122 *
95 */ 123 */
96int hfi1_dev_affinity_init(struct hfi1_devdata *dd) 124void hfi1_dev_affinity_init(struct hfi1_devdata *dd)
97{ 125{
98 int node = pcibus_to_node(dd->pcidev->bus); 126 int node = pcibus_to_node(dd->pcidev->bus);
99 struct hfi1_affinity *info; 127 struct hfi1_affinity *info = dd->affinity;
100 const struct cpumask *local_mask; 128 const struct cpumask *local_mask;
101 int curr_cpu, possible, i, ht; 129 int curr_cpu, possible, i;
102 130
103 if (node < 0) 131 if (node < 0)
104 node = numa_node_id(); 132 node = numa_node_id();
105 dd->node = node; 133 dd->node = node;
106 134
107 info = kzalloc(sizeof(*info), GFP_KERNEL);
108 if (!info)
109 return -ENOMEM;
110 spin_lock_init(&info->lock); 135 spin_lock_init(&info->lock);
111 136
112 init_cpu_mask_set(&info->def_intr); 137 init_cpu_mask_set(&info->def_intr);
@@ -116,30 +141,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
116 local_mask = cpumask_of_node(dd->node); 141 local_mask = cpumask_of_node(dd->node);
117 if (cpumask_first(local_mask) >= nr_cpu_ids) 142 if (cpumask_first(local_mask) >= nr_cpu_ids)
118 local_mask = topology_core_cpumask(0); 143 local_mask = topology_core_cpumask(0);
119 /* use local mask as default */ 144 /* Use the "real" cpu mask of this node as the default */
120 cpumask_copy(&info->def_intr.mask, local_mask); 145 cpumask_and(&info->def_intr.mask, &info->real_cpu_mask, local_mask);
121 /*
122 * Remove HT cores from the default mask. Do this in two steps below.
123 */
124 possible = cpumask_weight(&info->def_intr.mask);
125 ht = cpumask_weight(topology_sibling_cpumask(
126 cpumask_first(&info->def_intr.mask)));
127 /*
128 * Step 1. Skip over the first N HT siblings and use them as the
129 * "real" cores. Assumes that HT cores are not enumerated in
130 * succession (except in the single core case).
131 */
132 curr_cpu = cpumask_first(&info->def_intr.mask);
133 for (i = 0; i < possible / ht; i++)
134 curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
135 /*
136 * Step 2. Remove the remaining HT siblings. Use cpumask_next() to
137 * skip any gaps.
138 */
139 for (; i < possible; i++) {
140 cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
141 curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
142 }
143 146
144 /* fill in the receive list */ 147 /* fill in the receive list */
145 possible = cpumask_weight(&info->def_intr.mask); 148 possible = cpumask_weight(&info->def_intr.mask);
@@ -167,8 +170,6 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
167 } 170 }
168 171
169 cpumask_copy(&info->proc.mask, cpu_online_mask); 172 cpumask_copy(&info->proc.mask, cpu_online_mask);
170 dd->affinity = info;
171 return 0;
172} 173}
173 174
174void hfi1_dev_affinity_free(struct hfi1_devdata *dd) 175void hfi1_dev_affinity_free(struct hfi1_devdata *dd)
diff --git a/drivers/staging/rdma/hfi1/affinity.h b/drivers/staging/rdma/hfi1/affinity.h
index b287e4963024..20f52fe74091 100644
--- a/drivers/staging/rdma/hfi1/affinity.h
+++ b/drivers/staging/rdma/hfi1/affinity.h
@@ -64,10 +64,27 @@ enum affinity_flags {
64 AFF_IRQ_LOCAL 64 AFF_IRQ_LOCAL
65}; 65};
66 66
67struct cpu_mask_set {
68 struct cpumask mask;
69 struct cpumask used;
70 uint gen;
71};
72
73struct hfi1_affinity {
74 struct cpu_mask_set def_intr;
75 struct cpu_mask_set rcv_intr;
76 struct cpu_mask_set proc;
77 struct cpumask real_cpu_mask;
78 /* spin lock to protect affinity struct */
79 spinlock_t lock;
80};
81
67struct hfi1_msix_entry; 82struct hfi1_msix_entry;
68 83
84/* Initialize non-HT cpu cores mask */
85int init_real_cpu_mask(struct hfi1_devdata *);
69/* Initialize driver affinity data */ 86/* Initialize driver affinity data */
70int hfi1_dev_affinity_init(struct hfi1_devdata *); 87void hfi1_dev_affinity_init(struct hfi1_devdata *);
71/* Free driver affinity data */ 88/* Free driver affinity data */
72void hfi1_dev_affinity_free(struct hfi1_devdata *); 89void hfi1_dev_affinity_free(struct hfi1_devdata *);
73/* 90/*
diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c
index 16eb653903e0..dcae8e723f98 100644
--- a/drivers/staging/rdma/hfi1/chip.c
+++ b/drivers/staging/rdma/hfi1/chip.c
@@ -123,6 +123,8 @@ struct flag_table {
123 123
124#define MIN_KERNEL_KCTXTS 2 124#define MIN_KERNEL_KCTXTS 2
125#define FIRST_KERNEL_KCTXT 1 125#define FIRST_KERNEL_KCTXT 1
126/* sizes for both the QP and RSM map tables */
127#define NUM_MAP_ENTRIES 256
126#define NUM_MAP_REGS 32 128#define NUM_MAP_REGS 32
127 129
128/* Bit offset into the GUID which carries HFI id information */ 130/* Bit offset into the GUID which carries HFI id information */
@@ -1029,9 +1031,12 @@ static int thermal_init(struct hfi1_devdata *dd);
1029static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, 1031static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
1030 int msecs); 1032 int msecs);
1031static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); 1033static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
1034static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
1032static void handle_temp_err(struct hfi1_devdata *); 1035static void handle_temp_err(struct hfi1_devdata *);
1033static void dc_shutdown(struct hfi1_devdata *); 1036static void dc_shutdown(struct hfi1_devdata *);
1034static void dc_start(struct hfi1_devdata *); 1037static void dc_start(struct hfi1_devdata *);
1038static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
1039 unsigned int *np);
1035 1040
1036/* 1041/*
1037 * Error interrupt table entry. This is used as input to the interrupt 1042 * Error interrupt table entry. This is used as input to the interrupt
@@ -5661,7 +5666,7 @@ static int sc_to_vl(struct hfi1_devdata *dd, int sw_index)
5661 sci = &dd->send_contexts[sw_index]; 5666 sci = &dd->send_contexts[sw_index];
5662 5667
5663 /* there is no information for user (PSM) and ack contexts */ 5668 /* there is no information for user (PSM) and ack contexts */
5664 if (sci->type != SC_KERNEL) 5669 if ((sci->type != SC_KERNEL) && (sci->type != SC_VL15))
5665 return -1; 5670 return -1;
5666 5671
5667 sc = sci->sc; 5672 sc = sci->sc;
@@ -6199,18 +6204,13 @@ static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data)
6199 6204
6200/* 6205/*
6201 * Handle host requests from the 8051. 6206 * Handle host requests from the 8051.
6202 *
6203 * This is a work-queue function outside of the interrupt.
6204 */ 6207 */
6205void handle_8051_request(struct work_struct *work) 6208static void handle_8051_request(struct hfi1_pportdata *ppd)
6206{ 6209{
6207 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
6208 dc_host_req_work);
6209 struct hfi1_devdata *dd = ppd->dd; 6210 struct hfi1_devdata *dd = ppd->dd;
6210 u64 reg; 6211 u64 reg;
6211 u16 data = 0; 6212 u16 data = 0;
6212 u8 type, i, lanes, *cache = ppd->qsfp_info.cache; 6213 u8 type;
6213 u8 cdr_ctrl_byte = cache[QSFP_CDR_CTRL_BYTE_OFFS];
6214 6214
6215 reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1); 6215 reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
6216 if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0) 6216 if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
@@ -6231,46 +6231,11 @@ void handle_8051_request(struct work_struct *work)
6231 case HREQ_READ_CONFIG: 6231 case HREQ_READ_CONFIG:
6232 case HREQ_SET_TX_EQ_ABS: 6232 case HREQ_SET_TX_EQ_ABS:
6233 case HREQ_SET_TX_EQ_REL: 6233 case HREQ_SET_TX_EQ_REL:
6234 case HREQ_ENABLE:
6234 dd_dev_info(dd, "8051 request: request 0x%x not supported\n", 6235 dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
6235 type); 6236 type);
6236 hreq_response(dd, HREQ_NOT_SUPPORTED, 0); 6237 hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
6237 break; 6238 break;
6238
6239 case HREQ_ENABLE:
6240 lanes = data & 0xF;
6241 for (i = 0; lanes; lanes >>= 1, i++) {
6242 if (!(lanes & 1))
6243 continue;
6244 if (data & 0x200) {
6245 /* enable TX CDR */
6246 if (cache[QSFP_MOD_PWR_OFFS] & 0x8 &&
6247 cache[QSFP_CDR_INFO_OFFS] & 0x80)
6248 cdr_ctrl_byte |= (1 << (i + 4));
6249 } else {
6250 /* disable TX CDR */
6251 if (cache[QSFP_MOD_PWR_OFFS] & 0x8 &&
6252 cache[QSFP_CDR_INFO_OFFS] & 0x80)
6253 cdr_ctrl_byte &= ~(1 << (i + 4));
6254 }
6255
6256 if (data & 0x800) {
6257 /* enable RX CDR */
6258 if (cache[QSFP_MOD_PWR_OFFS] & 0x4 &&
6259 cache[QSFP_CDR_INFO_OFFS] & 0x40)
6260 cdr_ctrl_byte |= (1 << i);
6261 } else {
6262 /* disable RX CDR */
6263 if (cache[QSFP_MOD_PWR_OFFS] & 0x4 &&
6264 cache[QSFP_CDR_INFO_OFFS] & 0x40)
6265 cdr_ctrl_byte &= ~(1 << i);
6266 }
6267 }
6268 one_qsfp_write(ppd, dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS,
6269 &cdr_ctrl_byte, 1);
6270 hreq_response(dd, HREQ_SUCCESS, data);
6271 refresh_qsfp_cache(ppd, &ppd->qsfp_info);
6272 break;
6273
6274 case HREQ_CONFIG_DONE: 6239 case HREQ_CONFIG_DONE:
6275 hreq_response(dd, HREQ_SUCCESS, 0); 6240 hreq_response(dd, HREQ_SUCCESS, 0);
6276 break; 6241 break;
@@ -6278,7 +6243,6 @@ void handle_8051_request(struct work_struct *work)
6278 case HREQ_INTERFACE_TEST: 6243 case HREQ_INTERFACE_TEST:
6279 hreq_response(dd, HREQ_SUCCESS, data); 6244 hreq_response(dd, HREQ_SUCCESS, data);
6280 break; 6245 break;
6281
6282 default: 6246 default:
6283 dd_dev_err(dd, "8051 request: unknown request 0x%x\n", type); 6247 dd_dev_err(dd, "8051 request: unknown request 0x%x\n", type);
6284 hreq_response(dd, HREQ_NOT_SUPPORTED, 0); 6248 hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
@@ -6849,6 +6813,75 @@ static void reset_neighbor_info(struct hfi1_pportdata *ppd)
6849 ppd->neighbor_fm_security = 0; 6813 ppd->neighbor_fm_security = 0;
6850} 6814}
6851 6815
6816static const char * const link_down_reason_strs[] = {
6817 [OPA_LINKDOWN_REASON_NONE] = "None",
6818 [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Recive error 0",
6819 [OPA_LINKDOWN_REASON_BAD_PKT_LEN] = "Bad packet length",
6820 [OPA_LINKDOWN_REASON_PKT_TOO_LONG] = "Packet too long",
6821 [OPA_LINKDOWN_REASON_PKT_TOO_SHORT] = "Packet too short",
6822 [OPA_LINKDOWN_REASON_BAD_SLID] = "Bad SLID",
6823 [OPA_LINKDOWN_REASON_BAD_DLID] = "Bad DLID",
6824 [OPA_LINKDOWN_REASON_BAD_L2] = "Bad L2",
6825 [OPA_LINKDOWN_REASON_BAD_SC] = "Bad SC",
6826 [OPA_LINKDOWN_REASON_RCV_ERROR_8] = "Receive error 8",
6827 [OPA_LINKDOWN_REASON_BAD_MID_TAIL] = "Bad mid tail",
6828 [OPA_LINKDOWN_REASON_RCV_ERROR_10] = "Receive error 10",
6829 [OPA_LINKDOWN_REASON_PREEMPT_ERROR] = "Preempt error",
6830 [OPA_LINKDOWN_REASON_PREEMPT_VL15] = "Preempt vl15",
6831 [OPA_LINKDOWN_REASON_BAD_VL_MARKER] = "Bad VL marker",
6832 [OPA_LINKDOWN_REASON_RCV_ERROR_14] = "Receive error 14",
6833 [OPA_LINKDOWN_REASON_RCV_ERROR_15] = "Receive error 15",
6834 [OPA_LINKDOWN_REASON_BAD_HEAD_DIST] = "Bad head distance",
6835 [OPA_LINKDOWN_REASON_BAD_TAIL_DIST] = "Bad tail distance",
6836 [OPA_LINKDOWN_REASON_BAD_CTRL_DIST] = "Bad control distance",
6837 [OPA_LINKDOWN_REASON_BAD_CREDIT_ACK] = "Bad credit ack",
6838 [OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER] = "Unsupported VL marker",
6839 [OPA_LINKDOWN_REASON_BAD_PREEMPT] = "Bad preempt",
6840 [OPA_LINKDOWN_REASON_BAD_CONTROL_FLIT] = "Bad control flit",
6841 [OPA_LINKDOWN_REASON_EXCEED_MULTICAST_LIMIT] = "Exceed multicast limit",
6842 [OPA_LINKDOWN_REASON_RCV_ERROR_24] = "Receive error 24",
6843 [OPA_LINKDOWN_REASON_RCV_ERROR_25] = "Receive error 25",
6844 [OPA_LINKDOWN_REASON_RCV_ERROR_26] = "Receive error 26",
6845 [OPA_LINKDOWN_REASON_RCV_ERROR_27] = "Receive error 27",
6846 [OPA_LINKDOWN_REASON_RCV_ERROR_28] = "Receive error 28",
6847 [OPA_LINKDOWN_REASON_RCV_ERROR_29] = "Receive error 29",
6848 [OPA_LINKDOWN_REASON_RCV_ERROR_30] = "Receive error 30",
6849 [OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN] =
6850 "Excessive buffer overrun",
6851 [OPA_LINKDOWN_REASON_UNKNOWN] = "Unknown",
6852 [OPA_LINKDOWN_REASON_REBOOT] = "Reboot",
6853 [OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN] = "Neighbor unknown",
6854 [OPA_LINKDOWN_REASON_FM_BOUNCE] = "FM bounce",
6855 [OPA_LINKDOWN_REASON_SPEED_POLICY] = "Speed policy",
6856 [OPA_LINKDOWN_REASON_WIDTH_POLICY] = "Width policy",
6857 [OPA_LINKDOWN_REASON_DISCONNECTED] = "Disconnected",
6858 [OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED] =
6859 "Local media not installed",
6860 [OPA_LINKDOWN_REASON_NOT_INSTALLED] = "Not installed",
6861 [OPA_LINKDOWN_REASON_CHASSIS_CONFIG] = "Chassis config",
6862 [OPA_LINKDOWN_REASON_END_TO_END_NOT_INSTALLED] =
6863 "End to end not installed",
6864 [OPA_LINKDOWN_REASON_POWER_POLICY] = "Power policy",
6865 [OPA_LINKDOWN_REASON_LINKSPEED_POLICY] = "Link speed policy",
6866 [OPA_LINKDOWN_REASON_LINKWIDTH_POLICY] = "Link width policy",
6867 [OPA_LINKDOWN_REASON_SWITCH_MGMT] = "Switch management",
6868 [OPA_LINKDOWN_REASON_SMA_DISABLED] = "SMA disabled",
6869 [OPA_LINKDOWN_REASON_TRANSIENT] = "Transient"
6870};
6871
6872/* return the neighbor link down reason string */
6873static const char *link_down_reason_str(u8 reason)
6874{
6875 const char *str = NULL;
6876
6877 if (reason < ARRAY_SIZE(link_down_reason_strs))
6878 str = link_down_reason_strs[reason];
6879 if (!str)
6880 str = "(invalid)";
6881
6882 return str;
6883}
6884
6852/* 6885/*
6853 * Handle a link down interrupt from the 8051. 6886 * Handle a link down interrupt from the 8051.
6854 * 6887 *
@@ -6857,8 +6890,11 @@ static void reset_neighbor_info(struct hfi1_pportdata *ppd)
6857void handle_link_down(struct work_struct *work) 6890void handle_link_down(struct work_struct *work)
6858{ 6891{
6859 u8 lcl_reason, neigh_reason = 0; 6892 u8 lcl_reason, neigh_reason = 0;
6893 u8 link_down_reason;
6860 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, 6894 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
6861 link_down_work); 6895 link_down_work);
6896 int was_up;
6897 static const char ldr_str[] = "Link down reason: ";
6862 6898
6863 if ((ppd->host_link_state & 6899 if ((ppd->host_link_state &
6864 (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) && 6900 (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) &&
@@ -6867,20 +6903,63 @@ void handle_link_down(struct work_struct *work)
6867 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NOT_INSTALLED); 6903 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NOT_INSTALLED);
6868 6904
6869 /* Go offline first, then deal with reading/writing through 8051 */ 6905 /* Go offline first, then deal with reading/writing through 8051 */
6906 was_up = !!(ppd->host_link_state & HLS_UP);
6870 set_link_state(ppd, HLS_DN_OFFLINE); 6907 set_link_state(ppd, HLS_DN_OFFLINE);
6871 6908
6872 lcl_reason = 0; 6909 if (was_up) {
6873 read_planned_down_reason_code(ppd->dd, &neigh_reason); 6910 lcl_reason = 0;
6911 /* link down reason is only valid if the link was up */
6912 read_link_down_reason(ppd->dd, &link_down_reason);
6913 switch (link_down_reason) {
6914 case LDR_LINK_TRANSFER_ACTIVE_LOW:
6915 /* the link went down, no idle message reason */
6916 dd_dev_info(ppd->dd, "%sUnexpected link down\n",
6917 ldr_str);
6918 break;
6919 case LDR_RECEIVED_LINKDOWN_IDLE_MSG:
6920 /*
6921 * The neighbor reason is only valid if an idle message
6922 * was received for it.
6923 */
6924 read_planned_down_reason_code(ppd->dd, &neigh_reason);
6925 dd_dev_info(ppd->dd,
6926 "%sNeighbor link down message %d, %s\n",
6927 ldr_str, neigh_reason,
6928 link_down_reason_str(neigh_reason));
6929 break;
6930 case LDR_RECEIVED_HOST_OFFLINE_REQ:
6931 dd_dev_info(ppd->dd,
6932 "%sHost requested link to go offline\n",
6933 ldr_str);
6934 break;
6935 default:
6936 dd_dev_info(ppd->dd, "%sUnknown reason 0x%x\n",
6937 ldr_str, link_down_reason);
6938 break;
6939 }
6874 6940
6875 /* 6941 /*
6876 * If no reason, assume peer-initiated but missed 6942 * If no reason, assume peer-initiated but missed
6877 * LinkGoingDown idle flits. 6943 * LinkGoingDown idle flits.
6878 */ 6944 */
6879 if (neigh_reason == 0) 6945 if (neigh_reason == 0)
6880 lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN; 6946 lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN;
6947 } else {
6948 /* went down while polling or going up */
6949 lcl_reason = OPA_LINKDOWN_REASON_TRANSIENT;
6950 }
6881 6951
6882 set_link_down_reason(ppd, lcl_reason, neigh_reason, 0); 6952 set_link_down_reason(ppd, lcl_reason, neigh_reason, 0);
6883 6953
6954 /* inform the SMA when the link transitions from up to down */
6955 if (was_up && ppd->local_link_down_reason.sma == 0 &&
6956 ppd->neigh_link_down_reason.sma == 0) {
6957 ppd->local_link_down_reason.sma =
6958 ppd->local_link_down_reason.latest;
6959 ppd->neigh_link_down_reason.sma =
6960 ppd->neigh_link_down_reason.latest;
6961 }
6962
6884 reset_neighbor_info(ppd); 6963 reset_neighbor_info(ppd);
6885 6964
6886 /* disable the port */ 6965 /* disable the port */
@@ -6890,7 +6969,7 @@ void handle_link_down(struct work_struct *work)
6890 * If there is no cable attached, turn the DC off. Otherwise, 6969 * If there is no cable attached, turn the DC off. Otherwise,
6891 * start the link bring up. 6970 * start the link bring up.
6892 */ 6971 */
6893 if (!qsfp_mod_present(ppd)) { 6972 if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) {
6894 dc_shutdown(ppd->dd); 6973 dc_shutdown(ppd->dd);
6895 } else { 6974 } else {
6896 tune_serdes(ppd); 6975 tune_serdes(ppd);
@@ -7373,7 +7452,11 @@ retry:
7373 ppd->link_width_downgrade_rx_active = rx; 7452 ppd->link_width_downgrade_rx_active = rx;
7374 } 7453 }
7375 7454
7376 if (lwde == 0) { 7455 if (ppd->link_width_downgrade_tx_active == 0 ||
7456 ppd->link_width_downgrade_rx_active == 0) {
7457 /* the 8051 reported a dead link as a downgrade */
7458 dd_dev_err(ppd->dd, "Link downgrade is really a link down, ignoring\n");
7459 } else if (lwde == 0) {
7377 /* downgrade is disabled */ 7460 /* downgrade is disabled */
7378 7461
7379 /* bounce if not at starting active width */ 7462 /* bounce if not at starting active width */
@@ -7534,7 +7617,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
7534 host_msg &= ~(u64)LINKUP_ACHIEVED; 7617 host_msg &= ~(u64)LINKUP_ACHIEVED;
7535 } 7618 }
7536 if (host_msg & EXT_DEVICE_CFG_REQ) { 7619 if (host_msg & EXT_DEVICE_CFG_REQ) {
7537 queue_work(ppd->hfi1_wq, &ppd->dc_host_req_work); 7620 handle_8051_request(ppd);
7538 host_msg &= ~(u64)EXT_DEVICE_CFG_REQ; 7621 host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
7539 } 7622 }
7540 if (host_msg & VERIFY_CAP_FRAME) { 7623 if (host_msg & VERIFY_CAP_FRAME) {
@@ -8660,6 +8743,14 @@ static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc)
8660 *pdrrc = (frame >> DOWN_REMOTE_REASON_SHIFT) & DOWN_REMOTE_REASON_MASK; 8743 *pdrrc = (frame >> DOWN_REMOTE_REASON_SHIFT) & DOWN_REMOTE_REASON_MASK;
8661} 8744}
8662 8745
8746static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr)
8747{
8748 u32 frame;
8749
8750 read_8051_config(dd, LINK_DOWN_REASON, GENERAL_CONFIG, &frame);
8751 *ldr = (frame & 0xff);
8752}
8753
8663static int read_tx_settings(struct hfi1_devdata *dd, 8754static int read_tx_settings(struct hfi1_devdata *dd,
8664 u8 *enable_lane_tx, 8755 u8 *enable_lane_tx,
8665 u8 *tx_polarity_inversion, 8756 u8 *tx_polarity_inversion,
@@ -9049,9 +9140,9 @@ set_local_link_attributes_fail:
9049} 9140}
9050 9141
9051/* 9142/*
9052 * Call this to start the link. Schedule a retry if the cable is not 9143 * Call this to start the link.
9053 * present or if unable to start polling. Do not do anything if the 9144 * Do not do anything if the link is disabled.
9054 * link is disabled. Returns 0 if link is disabled or moved to polling 9145 * Returns 0 if link is disabled, moved to polling, or the driver is not ready.
9055 */ 9146 */
9056int start_link(struct hfi1_pportdata *ppd) 9147int start_link(struct hfi1_pportdata *ppd)
9057{ 9148{
@@ -9068,15 +9159,7 @@ int start_link(struct hfi1_pportdata *ppd)
9068 return 0; 9159 return 0;
9069 } 9160 }
9070 9161
9071 if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES || 9162 return set_link_state(ppd, HLS_DN_POLL);
9072 loopback == LOOPBACK_LCB ||
9073 ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
9074 return set_link_state(ppd, HLS_DN_POLL);
9075
9076 dd_dev_info(ppd->dd,
9077 "%s: stopping link start because no cable is present\n",
9078 __func__);
9079 return -EAGAIN;
9080} 9163}
9081 9164
9082static void wait_for_qsfp_init(struct hfi1_pportdata *ppd) 9165static void wait_for_qsfp_init(struct hfi1_pportdata *ppd)
@@ -9247,7 +9330,7 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
9247 return 0; 9330 return 0;
9248} 9331}
9249 9332
9250/* This routine will only be scheduled if the QSFP module is present */ 9333/* This routine will only be scheduled if the QSFP module present is asserted */
9251void qsfp_event(struct work_struct *work) 9334void qsfp_event(struct work_struct *work)
9252{ 9335{
9253 struct qsfp_data *qd; 9336 struct qsfp_data *qd;
@@ -9676,6 +9759,7 @@ static void set_send_length(struct hfi1_pportdata *ppd)
9676 & SEND_LEN_CHECK1_LEN_VL15_MASK) << 9759 & SEND_LEN_CHECK1_LEN_VL15_MASK) <<
9677 SEND_LEN_CHECK1_LEN_VL15_SHIFT; 9760 SEND_LEN_CHECK1_LEN_VL15_SHIFT;
9678 int i; 9761 int i;
9762 u32 thres;
9679 9763
9680 for (i = 0; i < ppd->vls_supported; i++) { 9764 for (i = 0; i < ppd->vls_supported; i++) {
9681 if (dd->vld[i].mtu > maxvlmtu) 9765 if (dd->vld[i].mtu > maxvlmtu)
@@ -9694,16 +9778,17 @@ static void set_send_length(struct hfi1_pportdata *ppd)
9694 /* adjust kernel credit return thresholds based on new MTUs */ 9778 /* adjust kernel credit return thresholds based on new MTUs */
9695 /* all kernel receive contexts have the same hdrqentsize */ 9779 /* all kernel receive contexts have the same hdrqentsize */
9696 for (i = 0; i < ppd->vls_supported; i++) { 9780 for (i = 0; i < ppd->vls_supported; i++) {
9697 sc_set_cr_threshold(dd->vld[i].sc, 9781 thres = min(sc_percent_to_threshold(dd->vld[i].sc, 50),
9698 sc_mtu_to_threshold(dd->vld[i].sc, 9782 sc_mtu_to_threshold(dd->vld[i].sc,
9699 dd->vld[i].mtu, 9783 dd->vld[i].mtu,
9700 dd->rcd[0]->
9701 rcvhdrqentsize));
9702 }
9703 sc_set_cr_threshold(dd->vld[15].sc,
9704 sc_mtu_to_threshold(dd->vld[15].sc,
9705 dd->vld[15].mtu,
9706 dd->rcd[0]->rcvhdrqentsize)); 9784 dd->rcd[0]->rcvhdrqentsize));
9785 sc_set_cr_threshold(dd->vld[i].sc, thres);
9786 }
9787 thres = min(sc_percent_to_threshold(dd->vld[15].sc, 50),
9788 sc_mtu_to_threshold(dd->vld[15].sc,
9789 dd->vld[15].mtu,
9790 dd->rcd[0]->rcvhdrqentsize));
9791 sc_set_cr_threshold(dd->vld[15].sc, thres);
9707 9792
9708 /* Adjust maximum MTU for the port in DC */ 9793 /* Adjust maximum MTU for the port in DC */
9709 dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 : 9794 dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
@@ -10030,7 +10115,6 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
10030 struct hfi1_devdata *dd = ppd->dd; 10115 struct hfi1_devdata *dd = ppd->dd;
10031 struct ib_event event = {.device = NULL}; 10116 struct ib_event event = {.device = NULL};
10032 int ret1, ret = 0; 10117 int ret1, ret = 0;
10033 int was_up, is_down;
10034 int orig_new_state, poll_bounce; 10118 int orig_new_state, poll_bounce;
10035 10119
10036 mutex_lock(&ppd->hls_lock); 10120 mutex_lock(&ppd->hls_lock);
@@ -10049,8 +10133,6 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
10049 poll_bounce ? "(bounce) " : "", 10133 poll_bounce ? "(bounce) " : "",
10050 link_state_reason_name(ppd, state)); 10134 link_state_reason_name(ppd, state));
10051 10135
10052 was_up = !!(ppd->host_link_state & HLS_UP);
10053
10054 /* 10136 /*
10055 * If we're going to a (HLS_*) link state that implies the logical 10137 * If we're going to a (HLS_*) link state that implies the logical
10056 * link state is neither of (IB_PORT_ARMED, IB_PORT_ACTIVE), then 10138 * link state is neither of (IB_PORT_ARMED, IB_PORT_ACTIVE), then
@@ -10261,17 +10343,6 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
10261 break; 10343 break;
10262 } 10344 }
10263 10345
10264 is_down = !!(ppd->host_link_state & (HLS_DN_POLL |
10265 HLS_DN_DISABLE | HLS_DN_OFFLINE));
10266
10267 if (was_up && is_down && ppd->local_link_down_reason.sma == 0 &&
10268 ppd->neigh_link_down_reason.sma == 0) {
10269 ppd->local_link_down_reason.sma =
10270 ppd->local_link_down_reason.latest;
10271 ppd->neigh_link_down_reason.sma =
10272 ppd->neigh_link_down_reason.latest;
10273 }
10274
10275 goto done; 10346 goto done;
10276 10347
10277unexpected: 10348unexpected:
@@ -12673,22 +12744,24 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
12673 int total_contexts; 12744 int total_contexts;
12674 int ret; 12745 int ret;
12675 unsigned ngroups; 12746 unsigned ngroups;
12747 int qos_rmt_count;
12748 int user_rmt_reduced;
12676 12749
12677 /* 12750 /*
12678 * Kernel contexts: (to be fixed later): 12751 * Kernel receive contexts:
12679 * - min or 2 or 1 context/numa 12752 * - min of 2 or 1 context/numa (excluding control context)
12680 * - Context 0 - control context (VL15/multicast/error) 12753 * - Context 0 - control context (VL15/multicast/error)
12681 * - Context 1 - default context 12754 * - Context 1 - first kernel context
12755 * - Context 2 - second kernel context
12756 * ...
12682 */ 12757 */
12683 if (n_krcvqs) 12758 if (n_krcvqs)
12684 /* 12759 /*
12685 * Don't count context 0 in n_krcvqs since 12760 * n_krcvqs is the sum of module parameter kernel receive
12686 * is isn't used for normal verbs traffic. 12761 * contexts, krcvqs[]. It does not include the control
12687 * 12762 * context, so add that.
12688 * krcvqs will reflect number of kernel
12689 * receive contexts above 0.
12690 */ 12763 */
12691 num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS - 1; 12764 num_kernel_contexts = n_krcvqs + 1;
12692 else 12765 else
12693 num_kernel_contexts = num_online_nodes() + 1; 12766 num_kernel_contexts = num_online_nodes() + 1;
12694 num_kernel_contexts = 12767 num_kernel_contexts =
@@ -12705,12 +12778,13 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
12705 num_kernel_contexts = dd->chip_send_contexts - num_vls - 1; 12778 num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
12706 } 12779 }
12707 /* 12780 /*
12708 * User contexts: (to be fixed later) 12781 * User contexts:
12709 * - default to 1 user context per CPU if num_user_contexts is 12782 * - default to 1 user context per real (non-HT) CPU core if
12710 * negative 12783 * num_user_contexts is negative
12711 */ 12784 */
12712 if (num_user_contexts < 0) 12785 if (num_user_contexts < 0)
12713 num_user_contexts = num_online_cpus(); 12786 num_user_contexts =
12787 cpumask_weight(&dd->affinity->real_cpu_mask);
12714 12788
12715 total_contexts = num_kernel_contexts + num_user_contexts; 12789 total_contexts = num_kernel_contexts + num_user_contexts;
12716 12790
@@ -12727,6 +12801,19 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
12727 total_contexts = num_kernel_contexts + num_user_contexts; 12801 total_contexts = num_kernel_contexts + num_user_contexts;
12728 } 12802 }
12729 12803
12804 /* each user context requires an entry in the RMT */
12805 qos_rmt_count = qos_rmt_entries(dd, NULL, NULL);
12806 if (qos_rmt_count + num_user_contexts > NUM_MAP_ENTRIES) {
12807 user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count;
12808 dd_dev_err(dd,
12809 "RMT size is reducing the number of user receive contexts from %d to %d\n",
12810 (int)num_user_contexts,
12811 user_rmt_reduced);
12812 /* recalculate */
12813 num_user_contexts = user_rmt_reduced;
12814 total_contexts = num_kernel_contexts + num_user_contexts;
12815 }
12816
12730 /* the first N are kernel contexts, the rest are user contexts */ 12817 /* the first N are kernel contexts, the rest are user contexts */
12731 dd->num_rcv_contexts = total_contexts; 12818 dd->num_rcv_contexts = total_contexts;
12732 dd->n_krcv_queues = num_kernel_contexts; 12819 dd->n_krcv_queues = num_kernel_contexts;
@@ -12776,12 +12863,13 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
12776 dd->num_send_contexts = ret; 12863 dd->num_send_contexts = ret;
12777 dd_dev_info( 12864 dd_dev_info(
12778 dd, 12865 dd,
12779 "send contexts: chip %d, used %d (kernel %d, ack %d, user %d)\n", 12866 "send contexts: chip %d, used %d (kernel %d, ack %d, user %d, vl15 %d)\n",
12780 dd->chip_send_contexts, 12867 dd->chip_send_contexts,
12781 dd->num_send_contexts, 12868 dd->num_send_contexts,
12782 dd->sc_sizes[SC_KERNEL].count, 12869 dd->sc_sizes[SC_KERNEL].count,
12783 dd->sc_sizes[SC_ACK].count, 12870 dd->sc_sizes[SC_ACK].count,
12784 dd->sc_sizes[SC_USER].count); 12871 dd->sc_sizes[SC_USER].count,
12872 dd->sc_sizes[SC_VL15].count);
12785 ret = 0; /* success */ 12873 ret = 0; /* success */
12786 } 12874 }
12787 12875
@@ -13451,122 +13539,224 @@ static void init_qpmap_table(struct hfi1_devdata *dd,
13451 int i; 13539 int i;
13452 u64 ctxt = first_ctxt; 13540 u64 ctxt = first_ctxt;
13453 13541
13454 for (i = 0; i < 256;) { 13542 for (i = 0; i < 256; i++) {
13455 reg |= ctxt << (8 * (i % 8)); 13543 reg |= ctxt << (8 * (i % 8));
13456 i++;
13457 ctxt++; 13544 ctxt++;
13458 if (ctxt > last_ctxt) 13545 if (ctxt > last_ctxt)
13459 ctxt = first_ctxt; 13546 ctxt = first_ctxt;
13460 if (i % 8 == 0) { 13547 if (i % 8 == 7) {
13461 write_csr(dd, regno, reg); 13548 write_csr(dd, regno, reg);
13462 reg = 0; 13549 reg = 0;
13463 regno += 8; 13550 regno += 8;
13464 } 13551 }
13465 } 13552 }
13466 if (i % 8)
13467 write_csr(dd, regno, reg);
13468 13553
13469 add_rcvctrl(dd, RCV_CTRL_RCV_QP_MAP_ENABLE_SMASK 13554 add_rcvctrl(dd, RCV_CTRL_RCV_QP_MAP_ENABLE_SMASK
13470 | RCV_CTRL_RCV_BYPASS_ENABLE_SMASK); 13555 | RCV_CTRL_RCV_BYPASS_ENABLE_SMASK);
13471} 13556}
13472 13557
13473/** 13558struct rsm_map_table {
13474 * init_qos - init RX qos 13559 u64 map[NUM_MAP_REGS];
13475 * @dd - device data 13560 unsigned int used;
13476 * @first_context 13561};
13477 * 13562
13478 * This routine initializes Rule 0 and the 13563struct rsm_rule_data {
13479 * RSM map table to implement qos. 13564 u8 offset;
13480 * 13565 u8 pkt_type;
13481 * If all of the limit tests succeed, 13566 u32 field1_off;
13482 * qos is applied based on the array 13567 u32 field2_off;
13483 * interpretation of krcvqs where 13568 u32 index1_off;
13484 * entry 0 is VL0. 13569 u32 index1_width;
13485 * 13570 u32 index2_off;
13486 * The number of vl bits (n) and the number of qpn 13571 u32 index2_width;
13487 * bits (m) are computed to feed both the RSM map table 13572 u32 mask1;
13488 * and the single rule. 13573 u32 value1;
13489 * 13574 u32 mask2;
13575 u32 value2;
13576};
13577
13578/*
13579 * Return an initialized RMT map table for users to fill in. OK if it
13580 * returns NULL, indicating no table.
13490 */ 13581 */
13491static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt) 13582static struct rsm_map_table *alloc_rsm_map_table(struct hfi1_devdata *dd)
13492{ 13583{
13584 struct rsm_map_table *rmt;
13585 u8 rxcontext = is_ax(dd) ? 0 : 0xff; /* 0 is default if a0 ver. */
13586
13587 rmt = kmalloc(sizeof(*rmt), GFP_KERNEL);
13588 if (rmt) {
13589 memset(rmt->map, rxcontext, sizeof(rmt->map));
13590 rmt->used = 0;
13591 }
13592
13593 return rmt;
13594}
13595
13596/*
13597 * Write the final RMT map table to the chip and free the table. OK if
13598 * table is NULL.
13599 */
13600static void complete_rsm_map_table(struct hfi1_devdata *dd,
13601 struct rsm_map_table *rmt)
13602{
13603 int i;
13604
13605 if (rmt) {
13606 /* write table to chip */
13607 for (i = 0; i < NUM_MAP_REGS; i++)
13608 write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rmt->map[i]);
13609
13610 /* enable RSM */
13611 add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
13612 }
13613}
13614
13615/*
13616 * Add a receive side mapping rule.
13617 */
13618static void add_rsm_rule(struct hfi1_devdata *dd, u8 rule_index,
13619 struct rsm_rule_data *rrd)
13620{
13621 write_csr(dd, RCV_RSM_CFG + (8 * rule_index),
13622 (u64)rrd->offset << RCV_RSM_CFG_OFFSET_SHIFT |
13623 1ull << rule_index | /* enable bit */
13624 (u64)rrd->pkt_type << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
13625 write_csr(dd, RCV_RSM_SELECT + (8 * rule_index),
13626 (u64)rrd->field1_off << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
13627 (u64)rrd->field2_off << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
13628 (u64)rrd->index1_off << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
13629 (u64)rrd->index1_width << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
13630 (u64)rrd->index2_off << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
13631 (u64)rrd->index2_width << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
13632 write_csr(dd, RCV_RSM_MATCH + (8 * rule_index),
13633 (u64)rrd->mask1 << RCV_RSM_MATCH_MASK1_SHIFT |
13634 (u64)rrd->value1 << RCV_RSM_MATCH_VALUE1_SHIFT |
13635 (u64)rrd->mask2 << RCV_RSM_MATCH_MASK2_SHIFT |
13636 (u64)rrd->value2 << RCV_RSM_MATCH_VALUE2_SHIFT);
13637}
13638
13639/* return the number of RSM map table entries that will be used for QOS */
13640static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
13641 unsigned int *np)
13642{
13643 int i;
13644 unsigned int m, n;
13493 u8 max_by_vl = 0; 13645 u8 max_by_vl = 0;
13494 unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m;
13495 u64 *rsmmap;
13496 u64 reg;
13497 u8 rxcontext = is_ax(dd) ? 0 : 0xff; /* 0 is default if a0 ver. */
13498 13646
13499 /* validate */ 13647 /* is QOS active at all? */
13500 if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS || 13648 if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
13501 num_vls == 1 || 13649 num_vls == 1 ||
13502 krcvqsset <= 1) 13650 krcvqsset <= 1)
13503 goto bail; 13651 goto no_qos;
13504 for (i = 0; i < min_t(unsigned, num_vls, krcvqsset); i++) 13652
13653 /* determine bits for qpn */
13654 for (i = 0; i < min_t(unsigned int, num_vls, krcvqsset); i++)
13505 if (krcvqs[i] > max_by_vl) 13655 if (krcvqs[i] > max_by_vl)
13506 max_by_vl = krcvqs[i]; 13656 max_by_vl = krcvqs[i];
13507 if (max_by_vl > 32) 13657 if (max_by_vl > 32)
13508 goto bail; 13658 goto no_qos;
13509 qpns_per_vl = __roundup_pow_of_two(max_by_vl); 13659 m = ilog2(__roundup_pow_of_two(max_by_vl));
13510 /* determine bits vl */ 13660
13511 n = ilog2(num_vls); 13661 /* determine bits for vl */
13512 /* determine bits for qpn */ 13662 n = ilog2(__roundup_pow_of_two(num_vls));
13513 m = ilog2(qpns_per_vl); 13663
13664 /* reject if too much is used */
13514 if ((m + n) > 7) 13665 if ((m + n) > 7)
13666 goto no_qos;
13667
13668 if (mp)
13669 *mp = m;
13670 if (np)
13671 *np = n;
13672
13673 return 1 << (m + n);
13674
13675no_qos:
13676 if (mp)
13677 *mp = 0;
13678 if (np)
13679 *np = 0;
13680 return 0;
13681}
13682
13683/**
13684 * init_qos - init RX qos
13685 * @dd - device data
13686 * @rmt - RSM map table
13687 *
13688 * This routine initializes Rule 0 and the RSM map table to implement
13689 * quality of service (qos).
13690 *
13691 * If all of the limit tests succeed, qos is applied based on the array
13692 * interpretation of krcvqs where entry 0 is VL0.
13693 *
13694 * The number of vl bits (n) and the number of qpn bits (m) are computed to
13695 * feed both the RSM map table and the single rule.
13696 */
13697static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
13698{
13699 struct rsm_rule_data rrd;
13700 unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m;
13701 unsigned int rmt_entries;
13702 u64 reg;
13703
13704 if (!rmt)
13515 goto bail; 13705 goto bail;
13516 if (num_vls * qpns_per_vl > dd->chip_rcv_contexts) 13706 rmt_entries = qos_rmt_entries(dd, &m, &n);
13707 if (rmt_entries == 0)
13517 goto bail; 13708 goto bail;
13518 rsmmap = kmalloc_array(NUM_MAP_REGS, sizeof(u64), GFP_KERNEL); 13709 qpns_per_vl = 1 << m;
13519 if (!rsmmap) 13710
13711 /* enough room in the map table? */
13712 rmt_entries = 1 << (m + n);
13713 if (rmt->used + rmt_entries >= NUM_MAP_ENTRIES)
13520 goto bail; 13714 goto bail;
13521 memset(rsmmap, rxcontext, NUM_MAP_REGS * sizeof(u64)); 13715
13522 /* init the local copy of the table */ 13716 /* add qos entries to the the RSM map table */
13523 for (i = 0, ctxt = first_ctxt; i < num_vls; i++) { 13717 for (i = 0, ctxt = FIRST_KERNEL_KCTXT; i < num_vls; i++) {
13524 unsigned tctxt; 13718 unsigned tctxt;
13525 13719
13526 for (qpn = 0, tctxt = ctxt; 13720 for (qpn = 0, tctxt = ctxt;
13527 krcvqs[i] && qpn < qpns_per_vl; qpn++) { 13721 krcvqs[i] && qpn < qpns_per_vl; qpn++) {
13528 unsigned idx, regoff, regidx; 13722 unsigned idx, regoff, regidx;
13529 13723
13530 /* generate index <= 128 */ 13724 /* generate the index the hardware will produce */
13531 idx = (qpn << n) ^ i; 13725 idx = rmt->used + ((qpn << n) ^ i);
13532 regoff = (idx % 8) * 8; 13726 regoff = (idx % 8) * 8;
13533 regidx = idx / 8; 13727 regidx = idx / 8;
13534 reg = rsmmap[regidx]; 13728 /* replace default with context number */
13535 /* replace 0xff with context number */ 13729 reg = rmt->map[regidx];
13536 reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK 13730 reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK
13537 << regoff); 13731 << regoff);
13538 reg |= (u64)(tctxt++) << regoff; 13732 reg |= (u64)(tctxt++) << regoff;
13539 rsmmap[regidx] = reg; 13733 rmt->map[regidx] = reg;
13540 if (tctxt == ctxt + krcvqs[i]) 13734 if (tctxt == ctxt + krcvqs[i])
13541 tctxt = ctxt; 13735 tctxt = ctxt;
13542 } 13736 }
13543 ctxt += krcvqs[i]; 13737 ctxt += krcvqs[i];
13544 } 13738 }
13545 /* flush cached copies to chip */ 13739
13546 for (i = 0; i < NUM_MAP_REGS; i++) 13740 rrd.offset = rmt->used;
13547 write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]); 13741 rrd.pkt_type = 2;
13548 /* add rule0 */ 13742 rrd.field1_off = LRH_BTH_MATCH_OFFSET;
13549 write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */, 13743 rrd.field2_off = LRH_SC_MATCH_OFFSET;
13550 RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK << 13744 rrd.index1_off = LRH_SC_SELECT_OFFSET;
13551 RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT | 13745 rrd.index1_width = n;
13552 2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT); 13746 rrd.index2_off = QPN_SELECT_OFFSET;
13553 write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */, 13747 rrd.index2_width = m + n;
13554 LRH_BTH_MATCH_OFFSET << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT | 13748 rrd.mask1 = LRH_BTH_MASK;
13555 LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT | 13749 rrd.value1 = LRH_BTH_VALUE;
13556 LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT | 13750 rrd.mask2 = LRH_SC_MASK;
13557 ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT | 13751 rrd.value2 = LRH_SC_VALUE;
13558 QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT | 13752
13559 ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT); 13753 /* add rule 0 */
13560 write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */, 13754 add_rsm_rule(dd, 0, &rrd);
13561 LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT | 13755
13562 LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT | 13756 /* mark RSM map entries as used */
13563 LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT | 13757 rmt->used += rmt_entries;
13564 LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT); 13758 /* map everything else to the mcast/err/vl15 context */
13565 /* Enable RSM */ 13759 init_qpmap_table(dd, HFI1_CTRL_CTXT, HFI1_CTRL_CTXT);
13566 add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
13567 kfree(rsmmap);
13568 /* map everything else to first context */
13569 init_qpmap_table(dd, FIRST_KERNEL_KCTXT, MIN_KERNEL_KCTXTS - 1);
13570 dd->qos_shift = n + 1; 13760 dd->qos_shift = n + 1;
13571 return; 13761 return;
13572bail: 13762bail:
@@ -13574,13 +13764,86 @@ bail:
13574 init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1); 13764 init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1);
13575} 13765}
13576 13766
13767static void init_user_fecn_handling(struct hfi1_devdata *dd,
13768 struct rsm_map_table *rmt)
13769{
13770 struct rsm_rule_data rrd;
13771 u64 reg;
13772 int i, idx, regoff, regidx;
13773 u8 offset;
13774
13775 /* there needs to be enough room in the map table */
13776 if (rmt->used + dd->num_user_contexts >= NUM_MAP_ENTRIES) {
13777 dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n");
13778 return;
13779 }
13780
13781 /*
13782 * RSM will extract the destination context as an index into the
13783 * map table. The destination contexts are a sequential block
13784 * in the range first_user_ctxt...num_rcv_contexts-1 (inclusive).
13785 * Map entries are accessed as offset + extracted value. Adjust
13786 * the added offset so this sequence can be placed anywhere in
13787 * the table - as long as the entries themselves do not wrap.
13788 * There are only enough bits in offset for the table size, so
13789 * start with that to allow for a "negative" offset.
13790 */
13791 offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used -
13792 (int)dd->first_user_ctxt);
13793
13794 for (i = dd->first_user_ctxt, idx = rmt->used;
13795 i < dd->num_rcv_contexts; i++, idx++) {
13796 /* replace with identity mapping */
13797 regoff = (idx % 8) * 8;
13798 regidx = idx / 8;
13799 reg = rmt->map[regidx];
13800 reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK << regoff);
13801 reg |= (u64)i << regoff;
13802 rmt->map[regidx] = reg;
13803 }
13804
13805 /*
13806 * For RSM intercept of Expected FECN packets:
13807 * o packet type 0 - expected
13808 * o match on F (bit 95), using select/match 1, and
13809 * o match on SH (bit 133), using select/match 2.
13810 *
13811 * Use index 1 to extract the 8-bit receive context from DestQP
13812 * (start at bit 64). Use that as the RSM map table index.
13813 */
13814 rrd.offset = offset;
13815 rrd.pkt_type = 0;
13816 rrd.field1_off = 95;
13817 rrd.field2_off = 133;
13818 rrd.index1_off = 64;
13819 rrd.index1_width = 8;
13820 rrd.index2_off = 0;
13821 rrd.index2_width = 0;
13822 rrd.mask1 = 1;
13823 rrd.value1 = 1;
13824 rrd.mask2 = 1;
13825 rrd.value2 = 1;
13826
13827 /* add rule 1 */
13828 add_rsm_rule(dd, 1, &rrd);
13829
13830 rmt->used += dd->num_user_contexts;
13831}
13832
13577static void init_rxe(struct hfi1_devdata *dd) 13833static void init_rxe(struct hfi1_devdata *dd)
13578{ 13834{
13835 struct rsm_map_table *rmt;
13836
13579 /* enable all receive errors */ 13837 /* enable all receive errors */
13580 write_csr(dd, RCV_ERR_MASK, ~0ull); 13838 write_csr(dd, RCV_ERR_MASK, ~0ull);
13581 /* setup QPN map table - start where VL15 context leaves off */ 13839
13582 init_qos(dd, dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? 13840 rmt = alloc_rsm_map_table(dd);
13583 MIN_KERNEL_KCTXTS : 0); 13841 /* set up QOS, including the QPN map table */
13842 init_qos(dd, rmt);
13843 init_user_fecn_handling(dd, rmt);
13844 complete_rsm_map_table(dd, rmt);
13845 kfree(rmt);
13846
13584 /* 13847 /*
13585 * make sure RcvCtrl.RcvWcb <= PCIe Device Control 13848 * make sure RcvCtrl.RcvWcb <= PCIe Device Control
13586 * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config 13849 * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
@@ -13762,6 +14025,7 @@ int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
13762 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg); 14025 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
13763 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); 14026 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
13764 reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK; 14027 reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
14028 reg &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK;
13765 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); 14029 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
13766done: 14030done:
13767 return ret; 14031 return ret;
@@ -14148,6 +14412,19 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
14148 (dd->revision >> CCE_REVISION_SW_SHIFT) 14412 (dd->revision >> CCE_REVISION_SW_SHIFT)
14149 & CCE_REVISION_SW_MASK); 14413 & CCE_REVISION_SW_MASK);
14150 14414
14415 /*
14416 * The real cpu mask is part of the affinity struct but has to be
14417 * initialized earlier than the rest of the affinity struct because it
14418 * is needed to calculate the number of user contexts in
14419 * set_up_context_variables(). However, hfi1_dev_affinity_init(),
14420 * which initializes the rest of the affinity struct members,
14421 * depends on set_up_context_variables() for the number of kernel
14422 * contexts, so it cannot be called before set_up_context_variables().
14423 */
14424 ret = init_real_cpu_mask(dd);
14425 if (ret)
14426 goto bail_cleanup;
14427
14151 ret = set_up_context_variables(dd); 14428 ret = set_up_context_variables(dd);
14152 if (ret) 14429 if (ret)
14153 goto bail_cleanup; 14430 goto bail_cleanup;
@@ -14161,9 +14438,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
14161 /* set up KDETH QP prefix in both RX and TX CSRs */ 14438 /* set up KDETH QP prefix in both RX and TX CSRs */
14162 init_kdeth_qp(dd); 14439 init_kdeth_qp(dd);
14163 14440
14164 ret = hfi1_dev_affinity_init(dd); 14441 hfi1_dev_affinity_init(dd);
14165 if (ret)
14166 goto bail_cleanup;
14167 14442
14168 /* send contexts must be set up before receive contexts */ 14443 /* send contexts must be set up before receive contexts */
14169 ret = init_send_contexts(dd); 14444 ret = init_send_contexts(dd);
diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h
index 4f3b878e43eb..1948706fff1a 100644
--- a/drivers/staging/rdma/hfi1/chip.h
+++ b/drivers/staging/rdma/hfi1/chip.h
@@ -389,6 +389,7 @@
389#define LAST_REMOTE_STATE_COMPLETE 0x13 389#define LAST_REMOTE_STATE_COMPLETE 0x13
390#define LINK_QUALITY_INFO 0x14 390#define LINK_QUALITY_INFO 0x14
391#define REMOTE_DEVICE_ID 0x15 391#define REMOTE_DEVICE_ID 0x15
392#define LINK_DOWN_REASON 0x16
392 393
393/* 8051 lane specific register field IDs */ 394/* 8051 lane specific register field IDs */
394#define TX_EQ_SETTINGS 0x00 395#define TX_EQ_SETTINGS 0x00
@@ -497,6 +498,11 @@
497#define PWRM_BER_CONTROL 0x1 498#define PWRM_BER_CONTROL 0x1
498#define PWRM_BANDWIDTH_CONTROL 0x2 499#define PWRM_BANDWIDTH_CONTROL 0x2
499 500
501/* 8051 link down reasons */
502#define LDR_LINK_TRANSFER_ACTIVE_LOW 0xa
503#define LDR_RECEIVED_LINKDOWN_IDLE_MSG 0xb
504#define LDR_RECEIVED_HOST_OFFLINE_REQ 0xc
505
500/* verify capability fabric CRC size bits */ 506/* verify capability fabric CRC size bits */
501enum { 507enum {
502 CAP_CRC_14B = (1 << 0), /* 14b CRC */ 508 CAP_CRC_14B = (1 << 0), /* 14b CRC */
@@ -691,7 +697,6 @@ void handle_verify_cap(struct work_struct *work);
691void handle_freeze(struct work_struct *work); 697void handle_freeze(struct work_struct *work);
692void handle_link_up(struct work_struct *work); 698void handle_link_up(struct work_struct *work);
693void handle_link_down(struct work_struct *work); 699void handle_link_down(struct work_struct *work);
694void handle_8051_request(struct work_struct *work);
695void handle_link_downgrade(struct work_struct *work); 700void handle_link_downgrade(struct work_struct *work);
696void handle_link_bounce(struct work_struct *work); 701void handle_link_bounce(struct work_struct *work);
697void handle_sma_message(struct work_struct *work); 702void handle_sma_message(struct work_struct *work);
diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/staging/rdma/hfi1/chip_registers.h
index 770f05c9b8de..8744de6667c2 100644
--- a/drivers/staging/rdma/hfi1/chip_registers.h
+++ b/drivers/staging/rdma/hfi1/chip_registers.h
@@ -771,6 +771,7 @@
771#define RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK 0x1ull 771#define RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK 0x1ull
772#define RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT 0 772#define RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT 0
773#define RCV_RSM_CFG_PACKET_TYPE_SHIFT 60 773#define RCV_RSM_CFG_PACKET_TYPE_SHIFT 60
774#define RCV_RSM_CFG_OFFSET_SHIFT 32
774#define RCV_RSM_MAP_TABLE (RXE + 0x000000000900) 775#define RCV_RSM_MAP_TABLE (RXE + 0x000000000900)
775#define RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK 0xFFull 776#define RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK 0xFFull
776#define RCV_RSM_MATCH (RXE + 0x000000000800) 777#define RCV_RSM_MATCH (RXE + 0x000000000800)
diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c
index c5b520bf610e..bb2409ad891a 100644
--- a/drivers/staging/rdma/hfi1/diag.c
+++ b/drivers/staging/rdma/hfi1/diag.c
@@ -413,7 +413,8 @@ static ssize_t diagpkt_send(struct diag_pkt *dp)
413 goto bail; 413 goto bail;
414 } 414 }
415 /* can only use kernel contexts */ 415 /* can only use kernel contexts */
416 if (dd->send_contexts[dp->sw_index].type != SC_KERNEL) { 416 if (dd->send_contexts[dp->sw_index].type != SC_KERNEL &&
417 dd->send_contexts[dp->sw_index].type != SC_VL15) {
417 ret = -EINVAL; 418 ret = -EINVAL;
418 goto bail; 419 goto bail;
419 } 420 }
diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c
index 34511e5df1d5..700c6fa3a633 100644
--- a/drivers/staging/rdma/hfi1/driver.c
+++ b/drivers/staging/rdma/hfi1/driver.c
@@ -75,7 +75,8 @@ DEFINE_MUTEX(hfi1_mutex); /* general driver use */
75 75
76unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU; 76unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU;
77module_param_named(max_mtu, hfi1_max_mtu, uint, S_IRUGO); 77module_param_named(max_mtu, hfi1_max_mtu, uint, S_IRUGO);
78MODULE_PARM_DESC(max_mtu, "Set max MTU bytes, default is 8192"); 78MODULE_PARM_DESC(max_mtu, "Set max MTU bytes, default is " __stringify(
79 HFI1_DEFAULT_MAX_MTU));
79 80
80unsigned int hfi1_cu = 1; 81unsigned int hfi1_cu = 1;
81module_param_named(cu, hfi1_cu, uint, S_IRUGO); 82module_param_named(cu, hfi1_cu, uint, S_IRUGO);
diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c
index 3040162cb326..ed680fda611d 100644
--- a/drivers/staging/rdma/hfi1/firmware.c
+++ b/drivers/staging/rdma/hfi1/firmware.c
@@ -1413,8 +1413,15 @@ static int __acquire_chip_resource(struct hfi1_devdata *dd, u32 resource)
1413 1413
1414 if (resource & CR_DYN_MASK) { 1414 if (resource & CR_DYN_MASK) {
1415 /* a dynamic resource is in use if either HFI has set the bit */ 1415 /* a dynamic resource is in use if either HFI has set the bit */
1416 all_bits = resource_mask(0, resource) | 1416 if (dd->pcidev->device == PCI_DEVICE_ID_INTEL0 &&
1417 (resource & (CR_I2C1 | CR_I2C2))) {
1418 /* discrete devices must serialize across both chains */
1419 all_bits = resource_mask(0, CR_I2C1 | CR_I2C2) |
1420 resource_mask(1, CR_I2C1 | CR_I2C2);
1421 } else {
1422 all_bits = resource_mask(0, resource) |
1417 resource_mask(1, resource); 1423 resource_mask(1, resource);
1424 }
1418 my_bit = resource_mask(dd->hfi1_id, resource); 1425 my_bit = resource_mask(dd->hfi1_id, resource);
1419 } else { 1426 } else {
1420 /* non-dynamic resources are not split between HFIs */ 1427 /* non-dynamic resources are not split between HFIs */
diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h
index 16cbdc4073e0..7b78d56de7f5 100644
--- a/drivers/staging/rdma/hfi1/hfi.h
+++ b/drivers/staging/rdma/hfi1/hfi.h
@@ -455,9 +455,9 @@ struct rvt_sge_state;
455#define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE) 455#define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
456 456
457/* use this MTU size if none other is given */ 457/* use this MTU size if none other is given */
458#define HFI1_DEFAULT_ACTIVE_MTU 8192 458#define HFI1_DEFAULT_ACTIVE_MTU 10240
459/* use this MTU size as the default maximum */ 459/* use this MTU size as the default maximum */
460#define HFI1_DEFAULT_MAX_MTU 8192 460#define HFI1_DEFAULT_MAX_MTU 10240
461/* default partition key */ 461/* default partition key */
462#define DEFAULT_PKEY 0xffff 462#define DEFAULT_PKEY 0xffff
463 463
@@ -606,7 +606,6 @@ struct hfi1_pportdata {
606 struct work_struct link_vc_work; 606 struct work_struct link_vc_work;
607 struct work_struct link_up_work; 607 struct work_struct link_up_work;
608 struct work_struct link_down_work; 608 struct work_struct link_down_work;
609 struct work_struct dc_host_req_work;
610 struct work_struct sma_message_work; 609 struct work_struct sma_message_work;
611 struct work_struct freeze_work; 610 struct work_struct freeze_work;
612 struct work_struct link_downgrade_work; 611 struct work_struct link_downgrade_work;
@@ -1258,7 +1257,7 @@ void receive_interrupt_work(struct work_struct *work);
1258static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf) 1257static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
1259{ 1258{
1260 return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | 1259 return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
1261 ((!!(rhf & RHF_DC_INFO_MASK)) << 4); 1260 ((!!(rhf & RHF_DC_INFO_SMASK)) << 4);
1262} 1261}
1263 1262
1264static inline u16 generate_jkey(kuid_t uid) 1263static inline u16 generate_jkey(kuid_t uid)
@@ -1333,6 +1332,9 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
1333void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, 1332void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
1334 u32 pkey, u32 slid, u32 dlid, u8 sc5, 1333 u32 pkey, u32 slid, u32 dlid, u8 sc5,
1335 const struct ib_grh *old_grh); 1334 const struct ib_grh *old_grh);
1335#define PKEY_CHECK_INVALID -1
1336int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth,
1337 u8 sc5, int8_t s_pkey_index);
1336 1338
1337#define PACKET_EGRESS_TIMEOUT 350 1339#define PACKET_EGRESS_TIMEOUT 350
1338static inline void pause_for_credit_return(struct hfi1_devdata *dd) 1340static inline void pause_for_credit_return(struct hfi1_devdata *dd)
@@ -1776,6 +1778,7 @@ extern struct mutex hfi1_mutex;
1776 1778
1777#define HFI1_PKT_USER_SC_INTEGRITY \ 1779#define HFI1_PKT_USER_SC_INTEGRITY \
1778 (SEND_CTXT_CHECK_ENABLE_DISALLOW_NON_KDETH_PACKETS_SMASK \ 1780 (SEND_CTXT_CHECK_ENABLE_DISALLOW_NON_KDETH_PACKETS_SMASK \
1781 | SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK \
1779 | SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_SMASK \ 1782 | SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_SMASK \
1780 | SEND_CTXT_CHECK_ENABLE_DISALLOW_GRH_SMASK) 1783 | SEND_CTXT_CHECK_ENABLE_DISALLOW_GRH_SMASK)
1781 1784
diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c
index cfcdc16b41c3..502b7cf4647d 100644
--- a/drivers/staging/rdma/hfi1/init.c
+++ b/drivers/staging/rdma/hfi1/init.c
@@ -422,9 +422,10 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t)
422 struct cca_timer *cca_timer; 422 struct cca_timer *cca_timer;
423 struct hfi1_pportdata *ppd; 423 struct hfi1_pportdata *ppd;
424 int sl; 424 int sl;
425 u16 ccti, ccti_timer, ccti_min; 425 u16 ccti_timer, ccti_min;
426 struct cc_state *cc_state; 426 struct cc_state *cc_state;
427 unsigned long flags; 427 unsigned long flags;
428 enum hrtimer_restart ret = HRTIMER_NORESTART;
428 429
429 cca_timer = container_of(t, struct cca_timer, hrtimer); 430 cca_timer = container_of(t, struct cca_timer, hrtimer);
430 ppd = cca_timer->ppd; 431 ppd = cca_timer->ppd;
@@ -450,24 +451,21 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t)
450 451
451 spin_lock_irqsave(&ppd->cca_timer_lock, flags); 452 spin_lock_irqsave(&ppd->cca_timer_lock, flags);
452 453
453 ccti = cca_timer->ccti; 454 if (cca_timer->ccti > ccti_min) {
454
455 if (ccti > ccti_min) {
456 cca_timer->ccti--; 455 cca_timer->ccti--;
457 set_link_ipg(ppd); 456 set_link_ipg(ppd);
458 } 457 }
459 458
460 spin_unlock_irqrestore(&ppd->cca_timer_lock, flags); 459 if (cca_timer->ccti > ccti_min) {
461
462 rcu_read_unlock();
463
464 if (ccti > ccti_min) {
465 unsigned long nsec = 1024 * ccti_timer; 460 unsigned long nsec = 1024 * ccti_timer;
466 /* ccti_timer is in units of 1.024 usec */ 461 /* ccti_timer is in units of 1.024 usec */
467 hrtimer_forward_now(t, ns_to_ktime(nsec)); 462 hrtimer_forward_now(t, ns_to_ktime(nsec));
468 return HRTIMER_RESTART; 463 ret = HRTIMER_RESTART;
469 } 464 }
470 return HRTIMER_NORESTART; 465
466 spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
467 rcu_read_unlock();
468 return ret;
471} 469}
472 470
473/* 471/*
@@ -496,7 +494,6 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
496 INIT_WORK(&ppd->link_vc_work, handle_verify_cap); 494 INIT_WORK(&ppd->link_vc_work, handle_verify_cap);
497 INIT_WORK(&ppd->link_up_work, handle_link_up); 495 INIT_WORK(&ppd->link_up_work, handle_link_up);
498 INIT_WORK(&ppd->link_down_work, handle_link_down); 496 INIT_WORK(&ppd->link_down_work, handle_link_down);
499 INIT_WORK(&ppd->dc_host_req_work, handle_8051_request);
500 INIT_WORK(&ppd->freeze_work, handle_freeze); 497 INIT_WORK(&ppd->freeze_work, handle_freeze);
501 INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade); 498 INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
502 INIT_WORK(&ppd->sma_message_work, handle_sma_message); 499 INIT_WORK(&ppd->sma_message_work, handle_sma_message);
@@ -1007,7 +1004,7 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
1007 free_percpu(dd->rcv_limit); 1004 free_percpu(dd->rcv_limit);
1008 hfi1_dev_affinity_free(dd); 1005 hfi1_dev_affinity_free(dd);
1009 free_percpu(dd->send_schedule); 1006 free_percpu(dd->send_schedule);
1010 ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); 1007 rvt_dealloc_device(&dd->verbs_dev.rdi);
1011} 1008}
1012 1009
1013/* 1010/*
@@ -1110,7 +1107,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
1110bail: 1107bail:
1111 if (!list_empty(&dd->list)) 1108 if (!list_empty(&dd->list))
1112 list_del_init(&dd->list); 1109 list_del_init(&dd->list);
1113 ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); 1110 rvt_dealloc_device(&dd->verbs_dev.rdi);
1114 return ERR_PTR(ret); 1111 return ERR_PTR(ret);
1115} 1112}
1116 1113
diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c
index d1e7f4d7cf6f..ed58cf21e790 100644
--- a/drivers/staging/rdma/hfi1/mad.c
+++ b/drivers/staging/rdma/hfi1/mad.c
@@ -999,7 +999,21 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
999 break; 999 break;
1000 } 1000 }
1001 1001
1002 set_link_state(ppd, link_state); 1002 if ((link_state == HLS_DN_POLL ||
1003 link_state == HLS_DN_DOWNDEF)) {
1004 /*
1005 * Going to poll. No matter what the current state,
1006 * always move offline first, then tune and start the
1007 * link. This correctly handles a FM link bounce and
1008 * a link enable. Going offline is a no-op if already
1009 * offline.
1010 */
1011 set_link_state(ppd, HLS_DN_OFFLINE);
1012 tune_serdes(ppd);
1013 start_link(ppd);
1014 } else {
1015 set_link_state(ppd, link_state);
1016 }
1003 if (link_state == HLS_DN_DISABLE && 1017 if (link_state == HLS_DN_DISABLE &&
1004 (ppd->offline_disabled_reason > 1018 (ppd->offline_disabled_reason >
1005 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) || 1019 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c
index b3f0682a36c9..2b0e91d3093d 100644
--- a/drivers/staging/rdma/hfi1/mmu_rb.c
+++ b/drivers/staging/rdma/hfi1/mmu_rb.c
@@ -91,7 +91,7 @@ static unsigned long mmu_node_start(struct mmu_rb_node *node)
91 91
92static unsigned long mmu_node_last(struct mmu_rb_node *node) 92static unsigned long mmu_node_last(struct mmu_rb_node *node)
93{ 93{
94 return PAGE_ALIGN((node->addr & PAGE_MASK) + node->len) - 1; 94 return PAGE_ALIGN(node->addr + node->len) - 1;
95} 95}
96 96
97int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) 97int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
@@ -126,10 +126,15 @@ void hfi1_mmu_rb_unregister(struct rb_root *root)
126 if (!handler) 126 if (!handler)
127 return; 127 return;
128 128
129 /* Unregister first so we don't get any more notifications. */
130 if (current->mm)
131 mmu_notifier_unregister(&handler->mn, current->mm);
132
129 spin_lock_irqsave(&mmu_rb_lock, flags); 133 spin_lock_irqsave(&mmu_rb_lock, flags);
130 list_del(&handler->list); 134 list_del(&handler->list);
131 spin_unlock_irqrestore(&mmu_rb_lock, flags); 135 spin_unlock_irqrestore(&mmu_rb_lock, flags);
132 136
137 spin_lock_irqsave(&handler->lock, flags);
133 if (!RB_EMPTY_ROOT(root)) { 138 if (!RB_EMPTY_ROOT(root)) {
134 struct rb_node *node; 139 struct rb_node *node;
135 struct mmu_rb_node *rbnode; 140 struct mmu_rb_node *rbnode;
@@ -141,9 +146,8 @@ void hfi1_mmu_rb_unregister(struct rb_root *root)
141 handler->ops->remove(root, rbnode, NULL); 146 handler->ops->remove(root, rbnode, NULL);
142 } 147 }
143 } 148 }
149 spin_unlock_irqrestore(&handler->lock, flags);
144 150
145 if (current->mm)
146 mmu_notifier_unregister(&handler->mn, current->mm);
147 kfree(handler); 151 kfree(handler);
148} 152}
149 153
@@ -235,6 +239,25 @@ struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
235 return node; 239 return node;
236} 240}
237 241
242struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *root,
243 unsigned long addr, unsigned long len)
244{
245 struct mmu_rb_handler *handler = find_mmu_handler(root);
246 struct mmu_rb_node *node;
247 unsigned long flags;
248
249 if (!handler)
250 return ERR_PTR(-EINVAL);
251
252 spin_lock_irqsave(&handler->lock, flags);
253 node = __mmu_rb_search(handler, addr, len);
254 if (node)
255 __mmu_int_rb_remove(node, handler->root);
256 spin_unlock_irqrestore(&handler->lock, flags);
257
258 return node;
259}
260
238void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) 261void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
239{ 262{
240 struct mmu_rb_handler *handler = find_mmu_handler(root); 263 struct mmu_rb_handler *handler = find_mmu_handler(root);
@@ -293,9 +316,9 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
293 hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u", 316 hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
294 node->addr, node->len); 317 node->addr, node->len);
295 if (handler->ops->invalidate(root, node)) { 318 if (handler->ops->invalidate(root, node)) {
296 spin_unlock_irqrestore(&handler->lock, flags); 319 __mmu_int_rb_remove(node, root);
297 __mmu_rb_remove(handler, node, mm); 320 if (handler->ops->remove)
298 spin_lock_irqsave(&handler->lock, flags); 321 handler->ops->remove(root, node, mm);
299 } 322 }
300 } 323 }
301 spin_unlock_irqrestore(&handler->lock, flags); 324 spin_unlock_irqrestore(&handler->lock, flags);
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/staging/rdma/hfi1/mmu_rb.h
index 19a306e83c7d..7a57b9c49d27 100644
--- a/drivers/staging/rdma/hfi1/mmu_rb.h
+++ b/drivers/staging/rdma/hfi1/mmu_rb.h
@@ -70,5 +70,7 @@ int hfi1_mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
70void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *); 70void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *);
71struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long, 71struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long,
72 unsigned long); 72 unsigned long);
73struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *, unsigned long,
74 unsigned long);
73 75
74#endif /* _HFI1_MMU_RB_H */ 76#endif /* _HFI1_MMU_RB_H */
diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c
index c6849ce9e5eb..c67b9ad3fcf4 100644
--- a/drivers/staging/rdma/hfi1/pio.c
+++ b/drivers/staging/rdma/hfi1/pio.c
@@ -139,23 +139,30 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
139/* Send Context Size (SCS) wildcards */ 139/* Send Context Size (SCS) wildcards */
140#define SCS_POOL_0 -1 140#define SCS_POOL_0 -1
141#define SCS_POOL_1 -2 141#define SCS_POOL_1 -2
142
142/* Send Context Count (SCC) wildcards */ 143/* Send Context Count (SCC) wildcards */
143#define SCC_PER_VL -1 144#define SCC_PER_VL -1
144#define SCC_PER_CPU -2 145#define SCC_PER_CPU -2
145
146#define SCC_PER_KRCVQ -3 146#define SCC_PER_KRCVQ -3
147#define SCC_ACK_CREDITS 32 147
148/* Send Context Size (SCS) constants */
149#define SCS_ACK_CREDITS 32
150#define SCS_VL15_CREDITS 102 /* 3 pkts of 2048B data + 128B header */
151
152#define PIO_THRESHOLD_CEILING 4096
148 153
149#define PIO_WAIT_BATCH_SIZE 5 154#define PIO_WAIT_BATCH_SIZE 5
150 155
151/* default send context sizes */ 156/* default send context sizes */
152static struct sc_config_sizes sc_config_sizes[SC_MAX] = { 157static struct sc_config_sizes sc_config_sizes[SC_MAX] = {
153 [SC_KERNEL] = { .size = SCS_POOL_0, /* even divide, pool 0 */ 158 [SC_KERNEL] = { .size = SCS_POOL_0, /* even divide, pool 0 */
154 .count = SCC_PER_VL },/* one per NUMA */ 159 .count = SCC_PER_VL }, /* one per NUMA */
155 [SC_ACK] = { .size = SCC_ACK_CREDITS, 160 [SC_ACK] = { .size = SCS_ACK_CREDITS,
156 .count = SCC_PER_KRCVQ }, 161 .count = SCC_PER_KRCVQ },
157 [SC_USER] = { .size = SCS_POOL_0, /* even divide, pool 0 */ 162 [SC_USER] = { .size = SCS_POOL_0, /* even divide, pool 0 */
158 .count = SCC_PER_CPU }, /* one per CPU */ 163 .count = SCC_PER_CPU }, /* one per CPU */
164 [SC_VL15] = { .size = SCS_VL15_CREDITS,
165 .count = 1 },
159 166
160}; 167};
161 168
@@ -202,7 +209,8 @@ static int wildcard_to_pool(int wc)
202static const char *sc_type_names[SC_MAX] = { 209static const char *sc_type_names[SC_MAX] = {
203 "kernel", 210 "kernel",
204 "ack", 211 "ack",
205 "user" 212 "user",
213 "vl15"
206}; 214};
207 215
208static const char *sc_type_name(int index) 216static const char *sc_type_name(int index)
@@ -231,6 +239,22 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
231 int i; 239 int i;
232 240
233 /* 241 /*
242 * When SDMA is enabled, kernel context pio packet size is capped by
243 * "piothreshold". Reduce pio buffer allocation for kernel context by
244 * setting it to a fixed size. The allocation allows 3-deep buffering
245 * of the largest pio packets plus up to 128 bytes header, sufficient
246 * to maintain verbs performance.
247 *
248 * When SDMA is disabled, keep the default pooling allocation.
249 */
250 if (HFI1_CAP_IS_KSET(SDMA)) {
251 u16 max_pkt_size = (piothreshold < PIO_THRESHOLD_CEILING) ?
252 piothreshold : PIO_THRESHOLD_CEILING;
253 sc_config_sizes[SC_KERNEL].size =
254 3 * (max_pkt_size + 128) / PIO_BLOCK_SIZE;
255 }
256
257 /*
234 * Step 0: 258 * Step 0:
235 * - copy the centipercents/absolute sizes from the pool config 259 * - copy the centipercents/absolute sizes from the pool config
236 * - sanity check these values 260 * - sanity check these values
@@ -311,7 +335,7 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
311 if (i == SC_ACK) { 335 if (i == SC_ACK) {
312 count = dd->n_krcv_queues; 336 count = dd->n_krcv_queues;
313 } else if (i == SC_KERNEL) { 337 } else if (i == SC_KERNEL) {
314 count = (INIT_SC_PER_VL * num_vls) + 1 /* VL15 */; 338 count = INIT_SC_PER_VL * num_vls;
315 } else if (count == SCC_PER_CPU) { 339 } else if (count == SCC_PER_CPU) {
316 count = dd->num_rcv_contexts - dd->n_krcv_queues; 340 count = dd->num_rcv_contexts - dd->n_krcv_queues;
317 } else if (count < 0) { 341 } else if (count < 0) {
@@ -596,7 +620,7 @@ u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize)
596 * Return value is what to write into the CSR: trigger return when 620 * Return value is what to write into the CSR: trigger return when
597 * unreturned credits pass this count. 621 * unreturned credits pass this count.
598 */ 622 */
599static u32 sc_percent_to_threshold(struct send_context *sc, u32 percent) 623u32 sc_percent_to_threshold(struct send_context *sc, u32 percent)
600{ 624{
601 return (sc->credits * percent) / 100; 625 return (sc->credits * percent) / 100;
602} 626}
@@ -790,7 +814,10 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
790 * For Ack contexts, set a threshold for half the credits. 814 * For Ack contexts, set a threshold for half the credits.
791 * For User contexts use the given percentage. This has been 815 * For User contexts use the given percentage. This has been
792 * sanitized on driver start-up. 816 * sanitized on driver start-up.
793 * For Kernel contexts, use the default MTU plus a header. 817 * For Kernel contexts, use the default MTU plus a header
818 * or half the credits, whichever is smaller. This should
819 * work for both the 3-deep buffering allocation and the
820 * pooling allocation.
794 */ 821 */
795 if (type == SC_ACK) { 822 if (type == SC_ACK) {
796 thresh = sc_percent_to_threshold(sc, 50); 823 thresh = sc_percent_to_threshold(sc, 50);
@@ -798,7 +825,9 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
798 thresh = sc_percent_to_threshold(sc, 825 thresh = sc_percent_to_threshold(sc,
799 user_credit_return_threshold); 826 user_credit_return_threshold);
800 } else { /* kernel */ 827 } else { /* kernel */
801 thresh = sc_mtu_to_threshold(sc, hfi1_max_mtu, hdrqentsize); 828 thresh = min(sc_percent_to_threshold(sc, 50),
829 sc_mtu_to_threshold(sc, hfi1_max_mtu,
830 hdrqentsize));
802 } 831 }
803 reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT); 832 reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT);
804 /* add in early return */ 833 /* add in early return */
@@ -1531,7 +1560,8 @@ static void sc_piobufavail(struct send_context *sc)
1531 unsigned long flags; 1560 unsigned long flags;
1532 unsigned i, n = 0; 1561 unsigned i, n = 0;
1533 1562
1534 if (dd->send_contexts[sc->sw_index].type != SC_KERNEL) 1563 if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
1564 dd->send_contexts[sc->sw_index].type != SC_VL15)
1535 return; 1565 return;
1536 list = &sc->piowait; 1566 list = &sc->piowait;
1537 /* 1567 /*
@@ -1900,7 +1930,7 @@ int init_pervl_scs(struct hfi1_devdata *dd)
1900 u32 ctxt; 1930 u32 ctxt;
1901 struct hfi1_pportdata *ppd = dd->pport; 1931 struct hfi1_pportdata *ppd = dd->pport;
1902 1932
1903 dd->vld[15].sc = sc_alloc(dd, SC_KERNEL, 1933 dd->vld[15].sc = sc_alloc(dd, SC_VL15,
1904 dd->rcd[0]->rcvhdrqentsize, dd->node); 1934 dd->rcd[0]->rcvhdrqentsize, dd->node);
1905 if (!dd->vld[15].sc) 1935 if (!dd->vld[15].sc)
1906 goto nomem; 1936 goto nomem;
diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/staging/rdma/hfi1/pio.h
index 0026976ce4f6..53a08edb7f64 100644
--- a/drivers/staging/rdma/hfi1/pio.h
+++ b/drivers/staging/rdma/hfi1/pio.h
@@ -51,7 +51,8 @@
51#define SC_KERNEL 0 51#define SC_KERNEL 0
52#define SC_ACK 1 52#define SC_ACK 1
53#define SC_USER 2 53#define SC_USER 2
54#define SC_MAX 3 54#define SC_VL15 3
55#define SC_MAX 4
55 56
56/* invalid send context index */ 57/* invalid send context index */
57#define INVALID_SCI 0xff 58#define INVALID_SCI 0xff
@@ -293,6 +294,7 @@ void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context);
293void sc_add_credit_return_intr(struct send_context *sc); 294void sc_add_credit_return_intr(struct send_context *sc);
294void sc_del_credit_return_intr(struct send_context *sc); 295void sc_del_credit_return_intr(struct send_context *sc);
295void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold); 296void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold);
297u32 sc_percent_to_threshold(struct send_context *sc, u32 percent);
296u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize); 298u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize);
297void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint); 299void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint);
298void sc_wait(struct hfi1_devdata *dd); 300void sc_wait(struct hfi1_devdata *dd);
diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/staging/rdma/hfi1/platform.c
index 0a1d074583e4..8fe8a205b5bb 100644
--- a/drivers/staging/rdma/hfi1/platform.c
+++ b/drivers/staging/rdma/hfi1/platform.c
@@ -114,21 +114,11 @@ static int qual_power(struct hfi1_pportdata *ppd)
114 if (ret) 114 if (ret)
115 return ret; 115 return ret;
116 116
117 if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4) 117 cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
118 cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]);
119 else
120 cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]);
121 118
122 if (cable_power_class <= 3 && cable_power_class > (power_class_max - 1)) 119 if (cable_power_class > power_class_max)
123 ppd->offline_disabled_reason =
124 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY);
125 else if (cable_power_class > 4 && cable_power_class > (power_class_max))
126 ppd->offline_disabled_reason = 120 ppd->offline_disabled_reason =
127 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY); 121 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY);
128 /*
129 * cable_power_class will never have value 4 as this simply
130 * means the high power settings are unused
131 */
132 122
133 if (ppd->offline_disabled_reason == 123 if (ppd->offline_disabled_reason ==
134 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) { 124 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) {
@@ -173,12 +163,9 @@ static int set_qsfp_high_power(struct hfi1_pportdata *ppd)
173 u8 *cache = ppd->qsfp_info.cache; 163 u8 *cache = ppd->qsfp_info.cache;
174 int ret; 164 int ret;
175 165
176 if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4) 166 cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
177 cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]);
178 else
179 cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]);
180 167
181 if (cable_power_class) { 168 if (cable_power_class > QSFP_POWER_CLASS_1) {
182 power_ctrl_byte = cache[QSFP_PWR_CTRL_BYTE_OFFS]; 169 power_ctrl_byte = cache[QSFP_PWR_CTRL_BYTE_OFFS];
183 170
184 power_ctrl_byte |= 1; 171 power_ctrl_byte |= 1;
@@ -190,8 +177,7 @@ static int set_qsfp_high_power(struct hfi1_pportdata *ppd)
190 if (ret != 1) 177 if (ret != 1)
191 return -EIO; 178 return -EIO;
192 179
193 if (cable_power_class > 3) { 180 if (cable_power_class > QSFP_POWER_CLASS_4) {
194 /* > power class 4*/
195 power_ctrl_byte |= (1 << 2); 181 power_ctrl_byte |= (1 << 2);
196 ret = qsfp_write(ppd, ppd->dd->hfi1_id, 182 ret = qsfp_write(ppd, ppd->dd->hfi1_id,
197 QSFP_PWR_CTRL_BYTE_OFFS, 183 QSFP_PWR_CTRL_BYTE_OFFS,
@@ -212,12 +198,21 @@ static void apply_rx_cdr(struct hfi1_pportdata *ppd,
212{ 198{
213 u32 rx_preset; 199 u32 rx_preset;
214 u8 *cache = ppd->qsfp_info.cache; 200 u8 *cache = ppd->qsfp_info.cache;
201 int cable_power_class;
215 202
216 if (!((cache[QSFP_MOD_PWR_OFFS] & 0x4) && 203 if (!((cache[QSFP_MOD_PWR_OFFS] & 0x4) &&
217 (cache[QSFP_CDR_INFO_OFFS] & 0x40))) 204 (cache[QSFP_CDR_INFO_OFFS] & 0x40)))
218 return; 205 return;
219 206
220 /* rx_preset preset to zero to catch error */ 207 /* RX CDR present, bypass supported */
208 cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
209
210 if (cable_power_class <= QSFP_POWER_CLASS_3) {
211 /* Power class <= 3, ignore config & turn RX CDR on */
212 *cdr_ctrl_byte |= 0xF;
213 return;
214 }
215
221 get_platform_config_field( 216 get_platform_config_field(
222 ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE, 217 ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
223 rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR_APPLY, 218 rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR_APPLY,
@@ -250,15 +245,25 @@ static void apply_rx_cdr(struct hfi1_pportdata *ppd,
250 245
251static void apply_tx_cdr(struct hfi1_pportdata *ppd, 246static void apply_tx_cdr(struct hfi1_pportdata *ppd,
252 u32 tx_preset_index, 247 u32 tx_preset_index,
253 u8 *ctr_ctrl_byte) 248 u8 *cdr_ctrl_byte)
254{ 249{
255 u32 tx_preset; 250 u32 tx_preset;
256 u8 *cache = ppd->qsfp_info.cache; 251 u8 *cache = ppd->qsfp_info.cache;
252 int cable_power_class;
257 253
258 if (!((cache[QSFP_MOD_PWR_OFFS] & 0x8) && 254 if (!((cache[QSFP_MOD_PWR_OFFS] & 0x8) &&
259 (cache[QSFP_CDR_INFO_OFFS] & 0x80))) 255 (cache[QSFP_CDR_INFO_OFFS] & 0x80)))
260 return; 256 return;
261 257
258 /* TX CDR present, bypass supported */
259 cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
260
261 if (cable_power_class <= QSFP_POWER_CLASS_3) {
262 /* Power class <= 3, ignore config & turn TX CDR on */
263 *cdr_ctrl_byte |= 0xF0;
264 return;
265 }
266
262 get_platform_config_field( 267 get_platform_config_field(
263 ppd->dd, 268 ppd->dd,
264 PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index, 269 PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index,
@@ -282,10 +287,10 @@ static void apply_tx_cdr(struct hfi1_pportdata *ppd,
282 (tx_preset << 2) | (tx_preset << 3)); 287 (tx_preset << 2) | (tx_preset << 3));
283 288
284 if (tx_preset) 289 if (tx_preset)
285 *ctr_ctrl_byte |= (tx_preset << 4); 290 *cdr_ctrl_byte |= (tx_preset << 4);
286 else 291 else
287 /* Preserve current/determined RX CDR status */ 292 /* Preserve current/determined RX CDR status */
288 *ctr_ctrl_byte &= ((tx_preset << 4) | 0xF); 293 *cdr_ctrl_byte &= ((tx_preset << 4) | 0xF);
289} 294}
290 295
291static void apply_cdr_settings( 296static void apply_cdr_settings(
@@ -598,6 +603,7 @@ static void apply_tunings(
598 "Applying TX settings"); 603 "Applying TX settings");
599} 604}
600 605
606/* Must be holding the QSFP i2c resource */
601static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, 607static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
602 u32 *ptr_rx_preset, u32 *ptr_total_atten) 608 u32 *ptr_rx_preset, u32 *ptr_total_atten)
603{ 609{
@@ -605,26 +611,19 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
605 u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled; 611 u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled;
606 u8 *cache = ppd->qsfp_info.cache; 612 u8 *cache = ppd->qsfp_info.cache;
607 613
608 ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT);
609 if (ret) {
610 dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n",
611 __func__, (int)ppd->dd->hfi1_id);
612 return ret;
613 }
614
615 ppd->qsfp_info.limiting_active = 1; 614 ppd->qsfp_info.limiting_active = 1;
616 615
617 ret = set_qsfp_tx(ppd, 0); 616 ret = set_qsfp_tx(ppd, 0);
618 if (ret) 617 if (ret)
619 goto bail_unlock; 618 return ret;
620 619
621 ret = qual_power(ppd); 620 ret = qual_power(ppd);
622 if (ret) 621 if (ret)
623 goto bail_unlock; 622 return ret;
624 623
625 ret = qual_bitrate(ppd); 624 ret = qual_bitrate(ppd);
626 if (ret) 625 if (ret)
627 goto bail_unlock; 626 return ret;
628 627
629 if (ppd->qsfp_info.reset_needed) { 628 if (ppd->qsfp_info.reset_needed) {
630 reset_qsfp(ppd); 629 reset_qsfp(ppd);
@@ -636,7 +635,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
636 635
637 ret = set_qsfp_high_power(ppd); 636 ret = set_qsfp_high_power(ppd);
638 if (ret) 637 if (ret)
639 goto bail_unlock; 638 return ret;
640 639
641 if (cache[QSFP_EQ_INFO_OFFS] & 0x4) { 640 if (cache[QSFP_EQ_INFO_OFFS] & 0x4) {
642 ret = get_platform_config_field( 641 ret = get_platform_config_field(
@@ -646,7 +645,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
646 ptr_tx_preset, 4); 645 ptr_tx_preset, 4);
647 if (ret) { 646 if (ret) {
648 *ptr_tx_preset = OPA_INVALID_INDEX; 647 *ptr_tx_preset = OPA_INVALID_INDEX;
649 goto bail_unlock; 648 return ret;
650 } 649 }
651 } else { 650 } else {
652 ret = get_platform_config_field( 651 ret = get_platform_config_field(
@@ -656,7 +655,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
656 ptr_tx_preset, 4); 655 ptr_tx_preset, 4);
657 if (ret) { 656 if (ret) {
658 *ptr_tx_preset = OPA_INVALID_INDEX; 657 *ptr_tx_preset = OPA_INVALID_INDEX;
659 goto bail_unlock; 658 return ret;
660 } 659 }
661 } 660 }
662 661
@@ -665,7 +664,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
665 PORT_TABLE_RX_PRESET_IDX, ptr_rx_preset, 4); 664 PORT_TABLE_RX_PRESET_IDX, ptr_rx_preset, 4);
666 if (ret) { 665 if (ret) {
667 *ptr_rx_preset = OPA_INVALID_INDEX; 666 *ptr_rx_preset = OPA_INVALID_INDEX;
668 goto bail_unlock; 667 return ret;
669 } 668 }
670 669
671 if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G)) 670 if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G))
@@ -685,8 +684,6 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
685 684
686 ret = set_qsfp_tx(ppd, 1); 685 ret = set_qsfp_tx(ppd, 1);
687 686
688bail_unlock:
689 release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
690 return ret; 687 return ret;
691} 688}
692 689
@@ -833,12 +830,22 @@ void tune_serdes(struct hfi1_pportdata *ppd)
833 total_atten = platform_atten + remote_atten; 830 total_atten = platform_atten + remote_atten;
834 831
835 tuning_method = OPA_PASSIVE_TUNING; 832 tuning_method = OPA_PASSIVE_TUNING;
836 } else 833 } else {
837 ppd->offline_disabled_reason = 834 ppd->offline_disabled_reason =
838 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_CHASSIS_CONFIG); 835 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_CHASSIS_CONFIG);
836 goto bail;
837 }
839 break; 838 break;
840 case PORT_TYPE_QSFP: 839 case PORT_TYPE_QSFP:
841 if (qsfp_mod_present(ppd)) { 840 if (qsfp_mod_present(ppd)) {
841 ret = acquire_chip_resource(ppd->dd,
842 qsfp_resource(ppd->dd),
843 QSFP_WAIT);
844 if (ret) {
845 dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n",
846 __func__, (int)ppd->dd->hfi1_id);
847 goto bail;
848 }
842 refresh_qsfp_cache(ppd, &ppd->qsfp_info); 849 refresh_qsfp_cache(ppd, &ppd->qsfp_info);
843 850
844 if (ppd->qsfp_info.cache_valid) { 851 if (ppd->qsfp_info.cache_valid) {
@@ -853,21 +860,23 @@ void tune_serdes(struct hfi1_pportdata *ppd)
853 * update the cache to reflect the changes 860 * update the cache to reflect the changes
854 */ 861 */
855 refresh_qsfp_cache(ppd, &ppd->qsfp_info); 862 refresh_qsfp_cache(ppd, &ppd->qsfp_info);
856 if (ret)
857 goto bail;
858
859 limiting_active = 863 limiting_active =
860 ppd->qsfp_info.limiting_active; 864 ppd->qsfp_info.limiting_active;
861 } else { 865 } else {
862 dd_dev_err(dd, 866 dd_dev_err(dd,
863 "%s: Reading QSFP memory failed\n", 867 "%s: Reading QSFP memory failed\n",
864 __func__); 868 __func__);
865 goto bail; 869 ret = -EINVAL; /* a fail indication */
866 } 870 }
867 } else 871 release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
872 if (ret)
873 goto bail;
874 } else {
868 ppd->offline_disabled_reason = 875 ppd->offline_disabled_reason =
869 HFI1_ODR_MASK( 876 HFI1_ODR_MASK(
870 OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED); 877 OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED);
878 goto bail;
879 }
871 break; 880 break;
872 default: 881 default:
873 dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__); 882 dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__);
diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c
index dc9119e1b458..91eb42316df9 100644
--- a/drivers/staging/rdma/hfi1/qp.c
+++ b/drivers/staging/rdma/hfi1/qp.c
@@ -167,8 +167,12 @@ static inline int opa_mtu_enum_to_int(int mtu)
167 */ 167 */
168static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) 168static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
169{ 169{
170 int val = opa_mtu_enum_to_int((int)mtu); 170 int val;
171 171
172 /* Constraining 10KB packets to 8KB packets */
173 if (mtu == (enum ib_mtu)OPA_MTU_10240)
174 mtu = OPA_MTU_8192;
175 val = opa_mtu_enum_to_int((int)mtu);
172 if (val > 0) 176 if (val > 0)
173 return val; 177 return val;
174 return ib_mtu_enum_to_int(mtu); 178 return ib_mtu_enum_to_int(mtu);
diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c
index 9ed1963010fe..2441669f0817 100644
--- a/drivers/staging/rdma/hfi1/qsfp.c
+++ b/drivers/staging/rdma/hfi1/qsfp.c
@@ -96,7 +96,7 @@ int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
96{ 96{
97 int ret; 97 int ret;
98 98
99 if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) 99 if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
100 return -EACCES; 100 return -EACCES;
101 101
102 /* make sure the TWSI bus is in a sane state */ 102 /* make sure the TWSI bus is in a sane state */
@@ -162,7 +162,7 @@ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
162{ 162{
163 int ret; 163 int ret;
164 164
165 if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) 165 if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
166 return -EACCES; 166 return -EACCES;
167 167
168 /* make sure the TWSI bus is in a sane state */ 168 /* make sure the TWSI bus is in a sane state */
@@ -192,7 +192,7 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
192 int ret; 192 int ret;
193 u8 page; 193 u8 page;
194 194
195 if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) 195 if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
196 return -EACCES; 196 return -EACCES;
197 197
198 /* make sure the TWSI bus is in a sane state */ 198 /* make sure the TWSI bus is in a sane state */
@@ -276,7 +276,7 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
276 int ret; 276 int ret;
277 u8 page; 277 u8 page;
278 278
279 if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) 279 if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
280 return -EACCES; 280 return -EACCES;
281 281
282 /* make sure the TWSI bus is in a sane state */ 282 /* make sure the TWSI bus is in a sane state */
@@ -355,6 +355,8 @@ int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
355 * The calls to qsfp_{read,write} in this function correctly handle the 355 * The calls to qsfp_{read,write} in this function correctly handle the
356 * address map difference between this mapping and the mapping implemented 356 * address map difference between this mapping and the mapping implemented
357 * by those functions 357 * by those functions
358 *
359 * The caller must be holding the QSFP i2c chain resource.
358 */ 360 */
359int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) 361int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
360{ 362{
@@ -371,13 +373,9 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
371 373
372 if (!qsfp_mod_present(ppd)) { 374 if (!qsfp_mod_present(ppd)) {
373 ret = -ENODEV; 375 ret = -ENODEV;
374 goto bail_no_release; 376 goto bail;
375 } 377 }
376 378
377 ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT);
378 if (ret)
379 goto bail_no_release;
380
381 ret = qsfp_read(ppd, target, 0, cache, QSFP_PAGESIZE); 379 ret = qsfp_read(ppd, target, 0, cache, QSFP_PAGESIZE);
382 if (ret != QSFP_PAGESIZE) { 380 if (ret != QSFP_PAGESIZE) {
383 dd_dev_info(ppd->dd, 381 dd_dev_info(ppd->dd,
@@ -440,8 +438,6 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
440 } 438 }
441 } 439 }
442 440
443 release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
444
445 spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); 441 spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
446 ppd->qsfp_info.cache_valid = 1; 442 ppd->qsfp_info.cache_valid = 1;
447 ppd->qsfp_info.cache_refresh_required = 0; 443 ppd->qsfp_info.cache_refresh_required = 0;
@@ -450,8 +446,6 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
450 return 0; 446 return 0;
451 447
452bail: 448bail:
453 release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
454bail_no_release:
455 memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128)); 449 memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128));
456 return ret; 450 return ret;
457} 451}
@@ -466,7 +460,28 @@ const char * const hfi1_qsfp_devtech[16] = {
466#define QSFP_DUMP_CHUNK 16 /* Holds longest string */ 460#define QSFP_DUMP_CHUNK 16 /* Holds longest string */
467#define QSFP_DEFAULT_HDR_CNT 224 461#define QSFP_DEFAULT_HDR_CNT 224
468 462
469static const char *pwr_codes = "1.5W2.0W2.5W3.5W"; 463#define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3)
464#define QSFP_HIGH_PWR(pbyte) ((pbyte) & 3)
465/* For use with QSFP_HIGH_PWR macro */
466#define QSFP_HIGH_PWR_UNUSED 0 /* Bits [1:0] = 00 implies low power module */
467
468/*
469 * Takes power class byte [Page 00 Byte 129] in SFF 8636
470 * Returns power class as integer (1 through 7, per SFF 8636 rev 2.4)
471 */
472int get_qsfp_power_class(u8 power_byte)
473{
474 if (QSFP_HIGH_PWR(power_byte) == QSFP_HIGH_PWR_UNUSED)
475 /* power classes count from 1, their bit encodings from 0 */
476 return (QSFP_PWR(power_byte) + 1);
477 /*
478 * 00 in the high power classes stands for unused, bringing
479 * balance to the off-by-1 offset above, we add 4 here to
480 * account for the difference between the low and high power
481 * groups
482 */
483 return (QSFP_HIGH_PWR(power_byte) + 4);
484}
470 485
471int qsfp_mod_present(struct hfi1_pportdata *ppd) 486int qsfp_mod_present(struct hfi1_pportdata *ppd)
472{ 487{
@@ -537,6 +552,16 @@ set_zeroes:
537 return ret; 552 return ret;
538} 553}
539 554
555static const char *pwr_codes[8] = {"N/AW",
556 "1.5W",
557 "2.0W",
558 "2.5W",
559 "3.5W",
560 "4.0W",
561 "4.5W",
562 "5.0W"
563 };
564
540int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len) 565int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
541{ 566{
542 u8 *cache = &ppd->qsfp_info.cache[0]; 567 u8 *cache = &ppd->qsfp_info.cache[0];
@@ -546,6 +571,7 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
546 int bidx = 0; 571 int bidx = 0;
547 u8 *atten = &cache[QSFP_ATTEN_OFFS]; 572 u8 *atten = &cache[QSFP_ATTEN_OFFS];
548 u8 *vendor_oui = &cache[QSFP_VOUI_OFFS]; 573 u8 *vendor_oui = &cache[QSFP_VOUI_OFFS];
574 u8 power_byte = 0;
549 575
550 sofar = 0; 576 sofar = 0;
551 lenstr[0] = ' '; 577 lenstr[0] = ' ';
@@ -555,9 +581,9 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
555 if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS])) 581 if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS]))
556 sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]); 582 sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]);
557 583
584 power_byte = cache[QSFP_MOD_PWR_OFFS];
558 sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n", 585 sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n",
559 pwr_codes + 586 pwr_codes[get_qsfp_power_class(power_byte)]);
560 (QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]) * 4));
561 587
562 sofar += scnprintf(buf + sofar, len - sofar, "TECH:%s%s\n", 588 sofar += scnprintf(buf + sofar, len - sofar, "TECH:%s%s\n",
563 lenstr, 589 lenstr,
diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/staging/rdma/hfi1/qsfp.h
index 831fe4cf1345..dadc66c442b9 100644
--- a/drivers/staging/rdma/hfi1/qsfp.h
+++ b/drivers/staging/rdma/hfi1/qsfp.h
@@ -82,8 +82,9 @@
82/* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */ 82/* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */
83#define QSFP_MOD_ID_OFFS 128 83#define QSFP_MOD_ID_OFFS 128
84/* 84/*
85 * Byte 129 is "Extended Identifier". We only care about D7,D6: Power class 85 * Byte 129 is "Extended Identifier".
86 * 0:1.5W, 1:2.0W, 2:2.5W, 3:3.5W 86 * For bits [7:6]: 0:1.5W, 1:2.0W, 2:2.5W, 3:3.5W
87 * For bits [1:0]: 0:Unused, 1:4W, 2:4.5W, 3:5W
87 */ 88 */
88#define QSFP_MOD_PWR_OFFS 129 89#define QSFP_MOD_PWR_OFFS 129
89/* Byte 130 is Connector type. Not Intel req'd */ 90/* Byte 130 is Connector type. Not Intel req'd */
@@ -190,6 +191,9 @@ extern const char *const hfi1_qsfp_devtech[16];
190#define QSFP_HIGH_BIAS_WARNING 0x22 191#define QSFP_HIGH_BIAS_WARNING 0x22
191#define QSFP_LOW_BIAS_WARNING 0x11 192#define QSFP_LOW_BIAS_WARNING 0x11
192 193
194#define QSFP_ATTEN_SDR(attenarray) (attenarray[0])
195#define QSFP_ATTEN_DDR(attenarray) (attenarray[1])
196
193/* 197/*
194 * struct qsfp_data encapsulates state of QSFP device for one port. 198 * struct qsfp_data encapsulates state of QSFP device for one port.
195 * it will be part of port-specific data if a board supports QSFP. 199 * it will be part of port-specific data if a board supports QSFP.
@@ -201,12 +205,6 @@ extern const char *const hfi1_qsfp_devtech[16];
201 * and let the qsfp_lock arbitrate access to common resources. 205 * and let the qsfp_lock arbitrate access to common resources.
202 * 206 *
203 */ 207 */
204
205#define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3)
206#define QSFP_HIGH_PWR(pbyte) (((pbyte) & 3) | 4)
207#define QSFP_ATTEN_SDR(attenarray) (attenarray[0])
208#define QSFP_ATTEN_DDR(attenarray) (attenarray[1])
209
210struct qsfp_data { 208struct qsfp_data {
211 /* Helps to find our way */ 209 /* Helps to find our way */
212 struct hfi1_pportdata *ppd; 210 struct hfi1_pportdata *ppd;
@@ -223,6 +221,7 @@ struct qsfp_data {
223 221
224int refresh_qsfp_cache(struct hfi1_pportdata *ppd, 222int refresh_qsfp_cache(struct hfi1_pportdata *ppd,
225 struct qsfp_data *cp); 223 struct qsfp_data *cp);
224int get_qsfp_power_class(u8 power_byte);
226int qsfp_mod_present(struct hfi1_pportdata *ppd); 225int qsfp_mod_present(struct hfi1_pportdata *ppd);
227int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, 226int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr,
228 u32 len, u8 *data); 227 u32 len, u8 *data);
diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c
index 0d7e1017f3cb..792f15eb8efe 100644
--- a/drivers/staging/rdma/hfi1/rc.c
+++ b/drivers/staging/rdma/hfi1/rc.c
@@ -1497,7 +1497,7 @@ reserved:
1497 /* Ignore reserved NAK codes. */ 1497 /* Ignore reserved NAK codes. */
1498 goto bail_stop; 1498 goto bail_stop;
1499 } 1499 }
1500 return ret; 1500 /* cannot be reached */
1501bail_stop: 1501bail_stop:
1502 hfi1_stop_rc_timers(qp); 1502 hfi1_stop_rc_timers(qp);
1503 return ret; 1503 return ret;
@@ -2021,8 +2021,6 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
2021 if (sl >= OPA_MAX_SLS) 2021 if (sl >= OPA_MAX_SLS)
2022 return; 2022 return;
2023 2023
2024 cca_timer = &ppd->cca_timer[sl];
2025
2026 cc_state = get_cc_state(ppd); 2024 cc_state = get_cc_state(ppd);
2027 2025
2028 if (!cc_state) 2026 if (!cc_state)
@@ -2041,6 +2039,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
2041 2039
2042 spin_lock_irqsave(&ppd->cca_timer_lock, flags); 2040 spin_lock_irqsave(&ppd->cca_timer_lock, flags);
2043 2041
2042 cca_timer = &ppd->cca_timer[sl];
2044 if (cca_timer->ccti < ccti_limit) { 2043 if (cca_timer->ccti < ccti_limit) {
2045 if (cca_timer->ccti + ccti_incr <= ccti_limit) 2044 if (cca_timer->ccti + ccti_incr <= ccti_limit)
2046 cca_timer->ccti += ccti_incr; 2045 cca_timer->ccti += ccti_incr;
@@ -2049,8 +2048,6 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
2049 set_link_ipg(ppd); 2048 set_link_ipg(ppd);
2050 } 2049 }
2051 2050
2052 spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
2053
2054 ccti = cca_timer->ccti; 2051 ccti = cca_timer->ccti;
2055 2052
2056 if (!hrtimer_active(&cca_timer->hrtimer)) { 2053 if (!hrtimer_active(&cca_timer->hrtimer)) {
@@ -2061,6 +2058,8 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
2061 HRTIMER_MODE_REL); 2058 HRTIMER_MODE_REL);
2062 } 2059 }
2063 2060
2061 spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
2062
2064 if ((trigger_threshold != 0) && (ccti >= trigger_threshold)) 2063 if ((trigger_threshold != 0) && (ccti >= trigger_threshold))
2065 log_cca_event(ppd, sl, rlid, lqpn, rqpn, svc_type); 2064 log_cca_event(ppd, sl, rlid, lqpn, rqpn, svc_type);
2066} 2065}
diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c
index 08813cdbd475..a659aec3c3c6 100644
--- a/drivers/staging/rdma/hfi1/ruc.c
+++ b/drivers/staging/rdma/hfi1/ruc.c
@@ -831,7 +831,6 @@ void hfi1_do_send(struct rvt_qp *qp)
831 struct hfi1_pkt_state ps; 831 struct hfi1_pkt_state ps;
832 struct hfi1_qp_priv *priv = qp->priv; 832 struct hfi1_qp_priv *priv = qp->priv;
833 int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 833 int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
834 unsigned long flags;
835 unsigned long timeout; 834 unsigned long timeout;
836 unsigned long timeout_int; 835 unsigned long timeout_int;
837 int cpu; 836 int cpu;
@@ -866,11 +865,11 @@ void hfi1_do_send(struct rvt_qp *qp)
866 timeout_int = SEND_RESCHED_TIMEOUT; 865 timeout_int = SEND_RESCHED_TIMEOUT;
867 } 866 }
868 867
869 spin_lock_irqsave(&qp->s_lock, flags); 868 spin_lock_irqsave(&qp->s_lock, ps.flags);
870 869
871 /* Return if we are already busy processing a work request. */ 870 /* Return if we are already busy processing a work request. */
872 if (!hfi1_send_ok(qp)) { 871 if (!hfi1_send_ok(qp)) {
873 spin_unlock_irqrestore(&qp->s_lock, flags); 872 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
874 return; 873 return;
875 } 874 }
876 875
@@ -884,7 +883,7 @@ void hfi1_do_send(struct rvt_qp *qp)
884 do { 883 do {
885 /* Check for a constructed packet to be sent. */ 884 /* Check for a constructed packet to be sent. */
886 if (qp->s_hdrwords != 0) { 885 if (qp->s_hdrwords != 0) {
887 spin_unlock_irqrestore(&qp->s_lock, flags); 886 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
888 /* 887 /*
889 * If the packet cannot be sent now, return and 888 * If the packet cannot be sent now, return and
890 * the send tasklet will be woken up later. 889 * the send tasklet will be woken up later.
@@ -897,11 +896,14 @@ void hfi1_do_send(struct rvt_qp *qp)
897 if (unlikely(time_after(jiffies, timeout))) { 896 if (unlikely(time_after(jiffies, timeout))) {
898 if (workqueue_congested(cpu, 897 if (workqueue_congested(cpu,
899 ps.ppd->hfi1_wq)) { 898 ps.ppd->hfi1_wq)) {
900 spin_lock_irqsave(&qp->s_lock, flags); 899 spin_lock_irqsave(
900 &qp->s_lock,
901 ps.flags);
901 qp->s_flags &= ~RVT_S_BUSY; 902 qp->s_flags &= ~RVT_S_BUSY;
902 hfi1_schedule_send(qp); 903 hfi1_schedule_send(qp);
903 spin_unlock_irqrestore(&qp->s_lock, 904 spin_unlock_irqrestore(
904 flags); 905 &qp->s_lock,
906 ps.flags);
905 this_cpu_inc( 907 this_cpu_inc(
906 *ps.ppd->dd->send_schedule); 908 *ps.ppd->dd->send_schedule);
907 return; 909 return;
@@ -913,11 +915,11 @@ void hfi1_do_send(struct rvt_qp *qp)
913 } 915 }
914 timeout = jiffies + (timeout_int) / 8; 916 timeout = jiffies + (timeout_int) / 8;
915 } 917 }
916 spin_lock_irqsave(&qp->s_lock, flags); 918 spin_lock_irqsave(&qp->s_lock, ps.flags);
917 } 919 }
918 } while (make_req(qp, &ps)); 920 } while (make_req(qp, &ps));
919 921
920 spin_unlock_irqrestore(&qp->s_lock, flags); 922 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
921} 923}
922 924
923/* 925/*
diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/staging/rdma/hfi1/sysfs.c
index c7f1271190af..8cd6df8634ad 100644
--- a/drivers/staging/rdma/hfi1/sysfs.c
+++ b/drivers/staging/rdma/hfi1/sysfs.c
@@ -84,7 +84,7 @@ static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
84 rcu_read_unlock(); 84 rcu_read_unlock();
85 return -EINVAL; 85 return -EINVAL;
86 } 86 }
87 memcpy(buf, &cc_state->cct, count); 87 memcpy(buf, (void *)&cc_state->cct + pos, count);
88 rcu_read_unlock(); 88 rcu_read_unlock();
89 89
90 return count; 90 return count;
@@ -131,7 +131,7 @@ static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
131 rcu_read_unlock(); 131 rcu_read_unlock();
132 return -EINVAL; 132 return -EINVAL;
133 } 133 }
134 memcpy(buf, &cc_state->cong_setting, count); 134 memcpy(buf, (void *)&cc_state->cong_setting + pos, count);
135 rcu_read_unlock(); 135 rcu_read_unlock();
136 136
137 return count; 137 return count;
diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c
index ae8a70f703eb..1e503ad0bebb 100644
--- a/drivers/staging/rdma/hfi1/ud.c
+++ b/drivers/staging/rdma/hfi1/ud.c
@@ -322,7 +322,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
322 (lid == ppd->lid || 322 (lid == ppd->lid ||
323 (lid == be16_to_cpu(IB_LID_PERMISSIVE) && 323 (lid == be16_to_cpu(IB_LID_PERMISSIVE) &&
324 qp->ibqp.qp_type == IB_QPT_GSI)))) { 324 qp->ibqp.qp_type == IB_QPT_GSI)))) {
325 unsigned long flags; 325 unsigned long tflags = ps->flags;
326 /* 326 /*
327 * If DMAs are in progress, we can't generate 327 * If DMAs are in progress, we can't generate
328 * a completion for the loopback packet since 328 * a completion for the loopback packet since
@@ -335,10 +335,10 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
335 goto bail; 335 goto bail;
336 } 336 }
337 qp->s_cur = next_cur; 337 qp->s_cur = next_cur;
338 local_irq_save(flags); 338 spin_unlock_irqrestore(&qp->s_lock, tflags);
339 spin_unlock_irqrestore(&qp->s_lock, flags);
340 ud_loopback(qp, wqe); 339 ud_loopback(qp, wqe);
341 spin_lock_irqsave(&qp->s_lock, flags); 340 spin_lock_irqsave(&qp->s_lock, tflags);
341 ps->flags = tflags;
342 hfi1_send_complete(qp, wqe, IB_WC_SUCCESS); 342 hfi1_send_complete(qp, wqe, IB_WC_SUCCESS);
343 goto done_free_tx; 343 goto done_free_tx;
344 } 344 }
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c
index 8bd56d5c783d..1b640a35b3fe 100644
--- a/drivers/staging/rdma/hfi1/user_exp_rcv.c
+++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c
@@ -399,8 +399,11 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
399 * pages, accept the amount pinned so far and program only that. 399 * pages, accept the amount pinned so far and program only that.
400 * User space knows how to deal with partially programmed buffers. 400 * User space knows how to deal with partially programmed buffers.
401 */ 401 */
402 if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages)) 402 if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages)) {
403 return -ENOMEM; 403 ret = -ENOMEM;
404 goto bail;
405 }
406
404 pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages); 407 pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages);
405 if (pinned <= 0) { 408 if (pinned <= 0) {
406 ret = pinned; 409 ret = pinned;
diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c
index d53a659548e0..0014c9c0e967 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.c
+++ b/drivers/staging/rdma/hfi1/user_sdma.c
@@ -180,6 +180,8 @@ struct user_sdma_iovec {
180 u64 offset; 180 u64 offset;
181}; 181};
182 182
183#define SDMA_CACHE_NODE_EVICT BIT(0)
184
183struct sdma_mmu_node { 185struct sdma_mmu_node {
184 struct mmu_rb_node rb; 186 struct mmu_rb_node rb;
185 struct list_head list; 187 struct list_head list;
@@ -187,6 +189,7 @@ struct sdma_mmu_node {
187 atomic_t refcount; 189 atomic_t refcount;
188 struct page **pages; 190 struct page **pages;
189 unsigned npages; 191 unsigned npages;
192 unsigned long flags;
190}; 193};
191 194
192struct user_sdma_request { 195struct user_sdma_request {
@@ -597,6 +600,13 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
597 goto free_req; 600 goto free_req;
598 } 601 }
599 602
603 /* Checking P_KEY for requests from user-space */
604 if (egress_pkey_check(dd->pport, req->hdr.lrh, req->hdr.bth, sc,
605 PKEY_CHECK_INVALID)) {
606 ret = -EINVAL;
607 goto free_req;
608 }
609
600 /* 610 /*
601 * Also should check the BTH.lnh. If it says the next header is GRH then 611 * Also should check the BTH.lnh. If it says the next header is GRH then
602 * the RXE parsing will be off and will land in the middle of the KDETH 612 * the RXE parsing will be off and will land in the middle of the KDETH
@@ -1030,27 +1040,29 @@ static inline int num_user_pages(const struct iovec *iov)
1030 return 1 + ((epage - spage) >> PAGE_SHIFT); 1040 return 1 + ((epage - spage) >> PAGE_SHIFT);
1031} 1041}
1032 1042
1033/* Caller must hold pq->evict_lock */
1034static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) 1043static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
1035{ 1044{
1036 u32 cleared = 0; 1045 u32 cleared = 0;
1037 struct sdma_mmu_node *node, *ptr; 1046 struct sdma_mmu_node *node, *ptr;
1047 struct list_head to_evict = LIST_HEAD_INIT(to_evict);
1038 1048
1049 spin_lock(&pq->evict_lock);
1039 list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) { 1050 list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) {
1040 /* Make sure that no one is still using the node. */ 1051 /* Make sure that no one is still using the node. */
1041 if (!atomic_read(&node->refcount)) { 1052 if (!atomic_read(&node->refcount)) {
1042 /* 1053 set_bit(SDMA_CACHE_NODE_EVICT, &node->flags);
1043 * Need to use the page count now as the remove callback 1054 list_del_init(&node->list);
1044 * will free the node. 1055 list_add(&node->list, &to_evict);
1045 */
1046 cleared += node->npages; 1056 cleared += node->npages;
1047 spin_unlock(&pq->evict_lock);
1048 hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb);
1049 spin_lock(&pq->evict_lock);
1050 if (cleared >= npages) 1057 if (cleared >= npages)
1051 break; 1058 break;
1052 } 1059 }
1053 } 1060 }
1061 spin_unlock(&pq->evict_lock);
1062
1063 list_for_each_entry_safe(node, ptr, &to_evict, list)
1064 hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb);
1065
1054 return cleared; 1066 return cleared;
1055} 1067}
1056 1068
@@ -1062,9 +1074,9 @@ static int pin_vector_pages(struct user_sdma_request *req,
1062 struct sdma_mmu_node *node = NULL; 1074 struct sdma_mmu_node *node = NULL;
1063 struct mmu_rb_node *rb_node; 1075 struct mmu_rb_node *rb_node;
1064 1076
1065 rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root, 1077 rb_node = hfi1_mmu_rb_extract(&pq->sdma_rb_root,
1066 (unsigned long)iovec->iov.iov_base, 1078 (unsigned long)iovec->iov.iov_base,
1067 iovec->iov.iov_len); 1079 iovec->iov.iov_len);
1068 if (rb_node && !IS_ERR(rb_node)) 1080 if (rb_node && !IS_ERR(rb_node))
1069 node = container_of(rb_node, struct sdma_mmu_node, rb); 1081 node = container_of(rb_node, struct sdma_mmu_node, rb);
1070 else 1082 else
@@ -1076,7 +1088,6 @@ static int pin_vector_pages(struct user_sdma_request *req,
1076 return -ENOMEM; 1088 return -ENOMEM;
1077 1089
1078 node->rb.addr = (unsigned long)iovec->iov.iov_base; 1090 node->rb.addr = (unsigned long)iovec->iov.iov_base;
1079 node->rb.len = iovec->iov.iov_len;
1080 node->pq = pq; 1091 node->pq = pq;
1081 atomic_set(&node->refcount, 0); 1092 atomic_set(&node->refcount, 0);
1082 INIT_LIST_HEAD(&node->list); 1093 INIT_LIST_HEAD(&node->list);
@@ -1093,11 +1104,25 @@ static int pin_vector_pages(struct user_sdma_request *req,
1093 memcpy(pages, node->pages, node->npages * sizeof(*pages)); 1104 memcpy(pages, node->pages, node->npages * sizeof(*pages));
1094 1105
1095 npages -= node->npages; 1106 npages -= node->npages;
1107
1108 /*
1109 * If rb_node is NULL, it means that this is brand new node
1110 * and, therefore not on the eviction list.
1111 * If, however, the rb_node is non-NULL, it means that the
1112 * node is already in RB tree and, therefore on the eviction
1113 * list (nodes are unconditionally inserted in the eviction
1114 * list). In that case, we have to remove the node prior to
1115 * calling the eviction function in order to prevent it from
1116 * freeing this node.
1117 */
1118 if (rb_node) {
1119 spin_lock(&pq->evict_lock);
1120 list_del_init(&node->list);
1121 spin_unlock(&pq->evict_lock);
1122 }
1096retry: 1123retry:
1097 if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) { 1124 if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) {
1098 spin_lock(&pq->evict_lock);
1099 cleared = sdma_cache_evict(pq, npages); 1125 cleared = sdma_cache_evict(pq, npages);
1100 spin_unlock(&pq->evict_lock);
1101 if (cleared >= npages) 1126 if (cleared >= npages)
1102 goto retry; 1127 goto retry;
1103 } 1128 }
@@ -1117,37 +1142,32 @@ retry:
1117 goto bail; 1142 goto bail;
1118 } 1143 }
1119 kfree(node->pages); 1144 kfree(node->pages);
1145 node->rb.len = iovec->iov.iov_len;
1120 node->pages = pages; 1146 node->pages = pages;
1121 node->npages += pinned; 1147 node->npages += pinned;
1122 npages = node->npages; 1148 npages = node->npages;
1123 spin_lock(&pq->evict_lock); 1149 spin_lock(&pq->evict_lock);
1124 if (!rb_node) 1150 list_add(&node->list, &pq->evict);
1125 list_add(&node->list, &pq->evict);
1126 else
1127 list_move(&node->list, &pq->evict);
1128 pq->n_locked += pinned; 1151 pq->n_locked += pinned;
1129 spin_unlock(&pq->evict_lock); 1152 spin_unlock(&pq->evict_lock);
1130 } 1153 }
1131 iovec->pages = node->pages; 1154 iovec->pages = node->pages;
1132 iovec->npages = npages; 1155 iovec->npages = npages;
1133 1156
1134 if (!rb_node) { 1157 ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
1135 ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb); 1158 if (ret) {
1136 if (ret) { 1159 spin_lock(&pq->evict_lock);
1137 spin_lock(&pq->evict_lock); 1160 if (!list_empty(&node->list))
1138 list_del(&node->list); 1161 list_del(&node->list);
1139 pq->n_locked -= node->npages; 1162 pq->n_locked -= node->npages;
1140 spin_unlock(&pq->evict_lock); 1163 spin_unlock(&pq->evict_lock);
1141 ret = 0; 1164 goto bail;
1142 goto bail;
1143 }
1144 } else {
1145 atomic_inc(&node->refcount);
1146 } 1165 }
1147 return 0; 1166 return 0;
1148bail: 1167bail:
1149 if (!rb_node) 1168 if (rb_node)
1150 kfree(node); 1169 unpin_vector_pages(current->mm, node->pages, 0, node->npages);
1170 kfree(node);
1151 return ret; 1171 return ret;
1152} 1172}
1153 1173
@@ -1558,7 +1578,20 @@ static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
1558 container_of(mnode, struct sdma_mmu_node, rb); 1578 container_of(mnode, struct sdma_mmu_node, rb);
1559 1579
1560 spin_lock(&node->pq->evict_lock); 1580 spin_lock(&node->pq->evict_lock);
1561 list_del(&node->list); 1581 /*
1582 * We've been called by the MMU notifier but this node has been
1583 * scheduled for eviction. The eviction function will take care
1584 * of freeing this node.
1585 * We have to take the above lock first because we are racing
1586 * against the setting of the bit in the eviction function.
1587 */
1588 if (mm && test_bit(SDMA_CACHE_NODE_EVICT, &node->flags)) {
1589 spin_unlock(&node->pq->evict_lock);
1590 return;
1591 }
1592
1593 if (!list_empty(&node->list))
1594 list_del(&node->list);
1562 node->pq->n_locked -= node->npages; 1595 node->pq->n_locked -= node->npages;
1563 spin_unlock(&node->pq->evict_lock); 1596 spin_unlock(&node->pq->evict_lock);
1564 1597
diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c
index 89f2aad45c1b..9cdc85fa366f 100644
--- a/drivers/staging/rdma/hfi1/verbs.c
+++ b/drivers/staging/rdma/hfi1/verbs.c
@@ -545,7 +545,7 @@ static inline int qp_ok(int opcode, struct hfi1_packet *packet)
545 545
546 if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) 546 if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
547 goto dropit; 547 goto dropit;
548 if (((opcode & OPCODE_QP_MASK) == packet->qp->allowed_ops) || 548 if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
549 (opcode == IB_OPCODE_CNP)) 549 (opcode == IB_OPCODE_CNP))
550 return 1; 550 return 1;
551dropit: 551dropit:
@@ -1089,16 +1089,16 @@ bail:
1089 1089
1090/* 1090/*
1091 * egress_pkey_matches_entry - return 1 if the pkey matches ent (ent 1091 * egress_pkey_matches_entry - return 1 if the pkey matches ent (ent
1092 * being an entry from the ingress partition key table), return 0 1092 * being an entry from the partition key table), return 0
1093 * otherwise. Use the matching criteria for egress partition keys 1093 * otherwise. Use the matching criteria for egress partition keys
1094 * specified in the OPAv1 spec., section 9.1l.7. 1094 * specified in the OPAv1 spec., section 9.1l.7.
1095 */ 1095 */
1096static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) 1096static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
1097{ 1097{
1098 u16 mkey = pkey & PKEY_LOW_15_MASK; 1098 u16 mkey = pkey & PKEY_LOW_15_MASK;
1099 u16 ment = ent & PKEY_LOW_15_MASK; 1099 u16 mentry = ent & PKEY_LOW_15_MASK;
1100 1100
1101 if (mkey == ment) { 1101 if (mkey == mentry) {
1102 /* 1102 /*
1103 * If pkey[15] is set (full partition member), 1103 * If pkey[15] is set (full partition member),
1104 * is bit 15 in the corresponding table element 1104 * is bit 15 in the corresponding table element
@@ -1111,32 +1111,32 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
1111 return 0; 1111 return 0;
1112} 1112}
1113 1113
1114/* 1114/**
1115 * egress_pkey_check - return 0 if hdr's pkey matches according to the 1115 * egress_pkey_check - check P_KEY of a packet
1116 * criteria in the OPAv1 spec., section 9.11.7. 1116 * @ppd: Physical IB port data
1117 * @lrh: Local route header
1118 * @bth: Base transport header
1119 * @sc5: SC for packet
1120 * @s_pkey_index: It will be used for look up optimization for kernel contexts
1121 * only. If it is negative value, then it means user contexts is calling this
1122 * function.
1123 *
1124 * It checks if hdr's pkey is valid.
1125 *
1126 * Return: 0 on success, otherwise, 1
1117 */ 1127 */
1118static inline int egress_pkey_check(struct hfi1_pportdata *ppd, 1128int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth,
1119 struct hfi1_ib_header *hdr, 1129 u8 sc5, int8_t s_pkey_index)
1120 struct rvt_qp *qp)
1121{ 1130{
1122 struct hfi1_qp_priv *priv = qp->priv;
1123 struct hfi1_other_headers *ohdr;
1124 struct hfi1_devdata *dd; 1131 struct hfi1_devdata *dd;
1125 int i = 0; 1132 int i;
1126 u16 pkey; 1133 u16 pkey;
1127 u8 lnh, sc5 = priv->s_sc; 1134 int is_user_ctxt_mechanism = (s_pkey_index < 0);
1128 1135
1129 if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT)) 1136 if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT))
1130 return 0; 1137 return 0;
1131 1138
1132 /* locate the pkey within the headers */ 1139 pkey = (u16)be32_to_cpu(bth[0]);
1133 lnh = be16_to_cpu(hdr->lrh[0]) & 3;
1134 if (lnh == HFI1_LRH_GRH)
1135 ohdr = &hdr->u.l.oth;
1136 else
1137 ohdr = &hdr->u.oth;
1138
1139 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
1140 1140
1141 /* If SC15, pkey[0:14] must be 0x7fff */ 1141 /* If SC15, pkey[0:14] must be 0x7fff */
1142 if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK)) 1142 if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK))
@@ -1146,28 +1146,37 @@ static inline int egress_pkey_check(struct hfi1_pportdata *ppd,
1146 if ((pkey & PKEY_LOW_15_MASK) == 0) 1146 if ((pkey & PKEY_LOW_15_MASK) == 0)
1147 goto bad; 1147 goto bad;
1148 1148
1149 /* The most likely matching pkey has index qp->s_pkey_index */ 1149 /*
1150 if (unlikely(!egress_pkey_matches_entry(pkey, 1150 * For the kernel contexts only, if a qp is passed into the function,
1151 ppd->pkeys 1151 * the most likely matching pkey has index qp->s_pkey_index
1152 [qp->s_pkey_index]))) { 1152 */
1153 /* no match - try the entire table */ 1153 if (!is_user_ctxt_mechanism &&
1154 for (; i < MAX_PKEY_VALUES; i++) { 1154 egress_pkey_matches_entry(pkey, ppd->pkeys[s_pkey_index])) {
1155 if (egress_pkey_matches_entry(pkey, ppd->pkeys[i])) 1155 return 0;
1156 break;
1157 }
1158 } 1156 }
1159 1157
1160 if (i < MAX_PKEY_VALUES) 1158 for (i = 0; i < MAX_PKEY_VALUES; i++) {
1161 return 0; 1159 if (egress_pkey_matches_entry(pkey, ppd->pkeys[i]))
1160 return 0;
1161 }
1162bad: 1162bad:
1163 incr_cntr64(&ppd->port_xmit_constraint_errors); 1163 /*
1164 dd = ppd->dd; 1164 * For the user-context mechanism, the P_KEY check would only happen
1165 if (!(dd->err_info_xmit_constraint.status & OPA_EI_STATUS_SMASK)) { 1165 * once per SDMA request, not once per packet. Therefore, there's no
1166 u16 slid = be16_to_cpu(hdr->lrh[3]); 1166 * need to increment the counter for the user-context mechanism.
1167 1167 */
1168 dd->err_info_xmit_constraint.status |= OPA_EI_STATUS_SMASK; 1168 if (!is_user_ctxt_mechanism) {
1169 dd->err_info_xmit_constraint.slid = slid; 1169 incr_cntr64(&ppd->port_xmit_constraint_errors);
1170 dd->err_info_xmit_constraint.pkey = pkey; 1170 dd = ppd->dd;
1171 if (!(dd->err_info_xmit_constraint.status &
1172 OPA_EI_STATUS_SMASK)) {
1173 u16 slid = be16_to_cpu(lrh[3]);
1174
1175 dd->err_info_xmit_constraint.status |=
1176 OPA_EI_STATUS_SMASK;
1177 dd->err_info_xmit_constraint.slid = slid;
1178 dd->err_info_xmit_constraint.pkey = pkey;
1179 }
1171 } 1180 }
1172 return 1; 1181 return 1;
1173} 1182}
@@ -1227,11 +1236,26 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
1227{ 1236{
1228 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); 1237 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
1229 struct hfi1_qp_priv *priv = qp->priv; 1238 struct hfi1_qp_priv *priv = qp->priv;
1239 struct hfi1_other_headers *ohdr;
1240 struct hfi1_ib_header *hdr;
1230 send_routine sr; 1241 send_routine sr;
1231 int ret; 1242 int ret;
1243 u8 lnh;
1244
1245 hdr = &ps->s_txreq->phdr.hdr;
1246 /* locate the pkey within the headers */
1247 lnh = be16_to_cpu(hdr->lrh[0]) & 3;
1248 if (lnh == HFI1_LRH_GRH)
1249 ohdr = &hdr->u.l.oth;
1250 else
1251 ohdr = &hdr->u.oth;
1232 1252
1233 sr = get_send_routine(qp, ps->s_txreq); 1253 sr = get_send_routine(qp, ps->s_txreq);
1234 ret = egress_pkey_check(dd->pport, &ps->s_txreq->phdr.hdr, qp); 1254 ret = egress_pkey_check(dd->pport,
1255 hdr->lrh,
1256 ohdr->bth,
1257 priv->s_sc,
1258 qp->s_pkey_index);
1235 if (unlikely(ret)) { 1259 if (unlikely(ret)) {
1236 /* 1260 /*
1237 * The value we are returning here does not get propagated to 1261 * The value we are returning here does not get propagated to
diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h
index 6c4670fffdbb..3ee223983b20 100644
--- a/drivers/staging/rdma/hfi1/verbs.h
+++ b/drivers/staging/rdma/hfi1/verbs.h
@@ -215,6 +215,7 @@ struct hfi1_pkt_state {
215 struct hfi1_ibport *ibp; 215 struct hfi1_ibport *ibp;
216 struct hfi1_pportdata *ppd; 216 struct hfi1_pportdata *ppd;
217 struct verbs_txreq *s_txreq; 217 struct verbs_txreq *s_txreq;
218 unsigned long flags;
218}; 219};
219 220
220#define HFI1_PSN_CREDIT 16 221#define HFI1_PSN_CREDIT 16
@@ -334,9 +335,6 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
334#endif 335#endif
335#define PSN_MODIFY_MASK 0xFFFFFF 336#define PSN_MODIFY_MASK 0xFFFFFF
336 337
337/* Number of bits to pay attention to in the opcode for checking qp type */
338#define OPCODE_QP_MASK 0xE0
339
340/* 338/*
341 * Compare the lower 24 bits of the msn values. 339 * Compare the lower 24 bits of the msn values.
342 * Returns an integer <, ==, or > than zero. 340 * Returns an integer <, ==, or > than zero.
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index ab2bf12975e1..590384a2bf8b 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2195,7 +2195,7 @@ queue_full:
2195 transport_handle_queue_full(cmd, cmd->se_dev); 2195 transport_handle_queue_full(cmd, cmd->se_dev);
2196} 2196}
2197 2197
2198static inline void transport_free_sgl(struct scatterlist *sgl, int nents) 2198void target_free_sgl(struct scatterlist *sgl, int nents)
2199{ 2199{
2200 struct scatterlist *sg; 2200 struct scatterlist *sg;
2201 int count; 2201 int count;
@@ -2205,6 +2205,7 @@ static inline void transport_free_sgl(struct scatterlist *sgl, int nents)
2205 2205
2206 kfree(sgl); 2206 kfree(sgl);
2207} 2207}
2208EXPORT_SYMBOL(target_free_sgl);
2208 2209
2209static inline void transport_reset_sgl_orig(struct se_cmd *cmd) 2210static inline void transport_reset_sgl_orig(struct se_cmd *cmd)
2210{ 2211{
@@ -2225,7 +2226,7 @@ static inline void transport_reset_sgl_orig(struct se_cmd *cmd)
2225static inline void transport_free_pages(struct se_cmd *cmd) 2226static inline void transport_free_pages(struct se_cmd *cmd)
2226{ 2227{
2227 if (!(cmd->se_cmd_flags & SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC)) { 2228 if (!(cmd->se_cmd_flags & SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC)) {
2228 transport_free_sgl(cmd->t_prot_sg, cmd->t_prot_nents); 2229 target_free_sgl(cmd->t_prot_sg, cmd->t_prot_nents);
2229 cmd->t_prot_sg = NULL; 2230 cmd->t_prot_sg = NULL;
2230 cmd->t_prot_nents = 0; 2231 cmd->t_prot_nents = 0;
2231 } 2232 }
@@ -2236,7 +2237,7 @@ static inline void transport_free_pages(struct se_cmd *cmd)
2236 * SG_TO_MEM_NOALLOC to function with COMPARE_AND_WRITE 2237 * SG_TO_MEM_NOALLOC to function with COMPARE_AND_WRITE
2237 */ 2238 */
2238 if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) { 2239 if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) {
2239 transport_free_sgl(cmd->t_bidi_data_sg, 2240 target_free_sgl(cmd->t_bidi_data_sg,
2240 cmd->t_bidi_data_nents); 2241 cmd->t_bidi_data_nents);
2241 cmd->t_bidi_data_sg = NULL; 2242 cmd->t_bidi_data_sg = NULL;
2242 cmd->t_bidi_data_nents = 0; 2243 cmd->t_bidi_data_nents = 0;
@@ -2246,11 +2247,11 @@ static inline void transport_free_pages(struct se_cmd *cmd)
2246 } 2247 }
2247 transport_reset_sgl_orig(cmd); 2248 transport_reset_sgl_orig(cmd);
2248 2249
2249 transport_free_sgl(cmd->t_data_sg, cmd->t_data_nents); 2250 target_free_sgl(cmd->t_data_sg, cmd->t_data_nents);
2250 cmd->t_data_sg = NULL; 2251 cmd->t_data_sg = NULL;
2251 cmd->t_data_nents = 0; 2252 cmd->t_data_nents = 0;
2252 2253
2253 transport_free_sgl(cmd->t_bidi_data_sg, cmd->t_bidi_data_nents); 2254 target_free_sgl(cmd->t_bidi_data_sg, cmd->t_bidi_data_nents);
2254 cmd->t_bidi_data_sg = NULL; 2255 cmd->t_bidi_data_sg = NULL;
2255 cmd->t_bidi_data_nents = 0; 2256 cmd->t_bidi_data_nents = 0;
2256} 2257}
@@ -2324,20 +2325,22 @@ EXPORT_SYMBOL(transport_kunmap_data_sg);
2324 2325
2325int 2326int
2326target_alloc_sgl(struct scatterlist **sgl, unsigned int *nents, u32 length, 2327target_alloc_sgl(struct scatterlist **sgl, unsigned int *nents, u32 length,
2327 bool zero_page) 2328 bool zero_page, bool chainable)
2328{ 2329{
2329 struct scatterlist *sg; 2330 struct scatterlist *sg;
2330 struct page *page; 2331 struct page *page;
2331 gfp_t zero_flag = (zero_page) ? __GFP_ZERO : 0; 2332 gfp_t zero_flag = (zero_page) ? __GFP_ZERO : 0;
2332 unsigned int nent; 2333 unsigned int nalloc, nent;
2333 int i = 0; 2334 int i = 0;
2334 2335
2335 nent = DIV_ROUND_UP(length, PAGE_SIZE); 2336 nalloc = nent = DIV_ROUND_UP(length, PAGE_SIZE);
2336 sg = kmalloc(sizeof(struct scatterlist) * nent, GFP_KERNEL); 2337 if (chainable)
2338 nalloc++;
2339 sg = kmalloc_array(nalloc, sizeof(struct scatterlist), GFP_KERNEL);
2337 if (!sg) 2340 if (!sg)
2338 return -ENOMEM; 2341 return -ENOMEM;
2339 2342
2340 sg_init_table(sg, nent); 2343 sg_init_table(sg, nalloc);
2341 2344
2342 while (length) { 2345 while (length) {
2343 u32 page_len = min_t(u32, length, PAGE_SIZE); 2346 u32 page_len = min_t(u32, length, PAGE_SIZE);
@@ -2361,6 +2364,7 @@ out:
2361 kfree(sg); 2364 kfree(sg);
2362 return -ENOMEM; 2365 return -ENOMEM;
2363} 2366}
2367EXPORT_SYMBOL(target_alloc_sgl);
2364 2368
2365/* 2369/*
2366 * Allocate any required resources to execute the command. For writes we 2370 * Allocate any required resources to execute the command. For writes we
@@ -2376,7 +2380,7 @@ transport_generic_new_cmd(struct se_cmd *cmd)
2376 if (cmd->prot_op != TARGET_PROT_NORMAL && 2380 if (cmd->prot_op != TARGET_PROT_NORMAL &&
2377 !(cmd->se_cmd_flags & SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC)) { 2381 !(cmd->se_cmd_flags & SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC)) {
2378 ret = target_alloc_sgl(&cmd->t_prot_sg, &cmd->t_prot_nents, 2382 ret = target_alloc_sgl(&cmd->t_prot_sg, &cmd->t_prot_nents,
2379 cmd->prot_length, true); 2383 cmd->prot_length, true, false);
2380 if (ret < 0) 2384 if (ret < 0)
2381 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 2385 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
2382 } 2386 }
@@ -2401,13 +2405,13 @@ transport_generic_new_cmd(struct se_cmd *cmd)
2401 2405
2402 ret = target_alloc_sgl(&cmd->t_bidi_data_sg, 2406 ret = target_alloc_sgl(&cmd->t_bidi_data_sg,
2403 &cmd->t_bidi_data_nents, 2407 &cmd->t_bidi_data_nents,
2404 bidi_length, zero_flag); 2408 bidi_length, zero_flag, false);
2405 if (ret < 0) 2409 if (ret < 0)
2406 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 2410 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
2407 } 2411 }
2408 2412
2409 ret = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents, 2413 ret = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents,
2410 cmd->data_length, zero_flag); 2414 cmd->data_length, zero_flag, false);
2411 if (ret < 0) 2415 if (ret < 0)
2412 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 2416 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
2413 } else if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) && 2417 } else if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) &&
@@ -2421,7 +2425,7 @@ transport_generic_new_cmd(struct se_cmd *cmd)
2421 2425
2422 ret = target_alloc_sgl(&cmd->t_bidi_data_sg, 2426 ret = target_alloc_sgl(&cmd->t_bidi_data_sg,
2423 &cmd->t_bidi_data_nents, 2427 &cmd->t_bidi_data_nents,
2424 caw_length, zero_flag); 2428 caw_length, zero_flag, false);
2425 if (ret < 0) 2429 if (ret < 0)
2426 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 2430 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
2427 } 2431 }
diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 47fe94ee10b8..75cd85426ae3 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -563,7 +563,7 @@ static int target_xcopy_setup_pt_cmd(
563 563
564 if (alloc_mem) { 564 if (alloc_mem) {
565 rc = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents, 565 rc = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents,
566 cmd->data_length, false); 566 cmd->data_length, false, false);
567 if (rc < 0) { 567 if (rc < 0) {
568 ret = rc; 568 ret = rc;
569 goto out; 569 goto out;
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index b2c9fada8eac..2be976dd4966 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -53,6 +53,11 @@ struct mlx5_core_cq {
53 unsigned arm_sn; 53 unsigned arm_sn;
54 struct mlx5_rsc_debug *dbg; 54 struct mlx5_rsc_debug *dbg;
55 int pid; 55 int pid;
56 struct {
57 struct list_head list;
58 void (*comp)(struct mlx5_core_cq *);
59 void *priv;
60 } tasklet_ctx;
56}; 61};
57 62
58 63
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 07b504f7eb84..80776d0c52dc 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -42,6 +42,7 @@
42#include <linux/vmalloc.h> 42#include <linux/vmalloc.h>
43#include <linux/radix-tree.h> 43#include <linux/radix-tree.h>
44#include <linux/workqueue.h> 44#include <linux/workqueue.h>
45#include <linux/interrupt.h>
45 46
46#include <linux/mlx5/device.h> 47#include <linux/mlx5/device.h>
47#include <linux/mlx5/doorbell.h> 48#include <linux/mlx5/doorbell.h>
@@ -312,6 +313,14 @@ struct mlx5_buf {
312 u8 page_shift; 313 u8 page_shift;
313}; 314};
314 315
316struct mlx5_eq_tasklet {
317 struct list_head list;
318 struct list_head process_list;
319 struct tasklet_struct task;
320 /* lock on completion tasklet list */
321 spinlock_t lock;
322};
323
315struct mlx5_eq { 324struct mlx5_eq {
316 struct mlx5_core_dev *dev; 325 struct mlx5_core_dev *dev;
317 __be32 __iomem *doorbell; 326 __be32 __iomem *doorbell;
@@ -325,6 +334,7 @@ struct mlx5_eq {
325 struct list_head list; 334 struct list_head list;
326 int index; 335 int index;
327 struct mlx5_rsc_debug *dbg; 336 struct mlx5_rsc_debug *dbg;
337 struct mlx5_eq_tasklet tasklet_ctx;
328}; 338};
329 339
330struct mlx5_core_psv { 340struct mlx5_core_psv {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index fb2cef4e9747..fc0320c004a3 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -220,6 +220,7 @@ enum ib_device_cap_flags {
220 IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), 220 IB_DEVICE_ON_DEMAND_PAGING = (1 << 31),
221 IB_DEVICE_SG_GAPS_REG = (1ULL << 32), 221 IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
222 IB_DEVICE_VIRTUAL_FUNCTION = ((u64)1 << 33), 222 IB_DEVICE_VIRTUAL_FUNCTION = ((u64)1 << 33),
223 IB_DEVICE_RAW_SCATTER_FCS = ((u64)1 << 34),
223}; 224};
224 225
225enum ib_signature_prot_cap { 226enum ib_signature_prot_cap {
@@ -931,6 +932,13 @@ struct ib_qp_cap {
931 u32 max_send_sge; 932 u32 max_send_sge;
932 u32 max_recv_sge; 933 u32 max_recv_sge;
933 u32 max_inline_data; 934 u32 max_inline_data;
935
936 /*
937 * Maximum number of rdma_rw_ctx structures in flight at a time.
938 * ib_create_qp() will calculate the right amount of neededed WRs
939 * and MRs based on this.
940 */
941 u32 max_rdma_ctxs;
934}; 942};
935 943
936enum ib_sig_type { 944enum ib_sig_type {
@@ -981,6 +989,7 @@ enum ib_qp_create_flags {
981 IB_QP_CREATE_NETIF_QP = 1 << 5, 989 IB_QP_CREATE_NETIF_QP = 1 << 5,
982 IB_QP_CREATE_SIGNATURE_EN = 1 << 6, 990 IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
983 IB_QP_CREATE_USE_GFP_NOIO = 1 << 7, 991 IB_QP_CREATE_USE_GFP_NOIO = 1 << 7,
992 IB_QP_CREATE_SCATTER_FCS = 1 << 8,
984 /* reserve bits 26-31 for low level drivers' internal use */ 993 /* reserve bits 26-31 for low level drivers' internal use */
985 IB_QP_CREATE_RESERVED_START = 1 << 26, 994 IB_QP_CREATE_RESERVED_START = 1 << 26,
986 IB_QP_CREATE_RESERVED_END = 1 << 31, 995 IB_QP_CREATE_RESERVED_END = 1 << 31,
@@ -1002,7 +1011,11 @@ struct ib_qp_init_attr {
1002 enum ib_sig_type sq_sig_type; 1011 enum ib_sig_type sq_sig_type;
1003 enum ib_qp_type qp_type; 1012 enum ib_qp_type qp_type;
1004 enum ib_qp_create_flags create_flags; 1013 enum ib_qp_create_flags create_flags;
1005 u8 port_num; /* special QP types only */ 1014
1015 /*
1016 * Only needed for special QP types, or when using the RW API.
1017 */
1018 u8 port_num;
1006}; 1019};
1007 1020
1008struct ib_qp_open_attr { 1021struct ib_qp_open_attr {
@@ -1421,9 +1434,14 @@ struct ib_qp {
1421 struct ib_pd *pd; 1434 struct ib_pd *pd;
1422 struct ib_cq *send_cq; 1435 struct ib_cq *send_cq;
1423 struct ib_cq *recv_cq; 1436 struct ib_cq *recv_cq;
1437 spinlock_t mr_lock;
1438 int mrs_used;
1439 struct list_head rdma_mrs;
1440 struct list_head sig_mrs;
1424 struct ib_srq *srq; 1441 struct ib_srq *srq;
1425 struct ib_xrcd *xrcd; /* XRC TGT QPs only */ 1442 struct ib_xrcd *xrcd; /* XRC TGT QPs only */
1426 struct list_head xrcd_list; 1443 struct list_head xrcd_list;
1444
1427 /* count times opened, mcast attaches, flow attaches */ 1445 /* count times opened, mcast attaches, flow attaches */
1428 atomic_t usecnt; 1446 atomic_t usecnt;
1429 struct list_head open_list; 1447 struct list_head open_list;
@@ -1438,12 +1456,16 @@ struct ib_qp {
1438struct ib_mr { 1456struct ib_mr {
1439 struct ib_device *device; 1457 struct ib_device *device;
1440 struct ib_pd *pd; 1458 struct ib_pd *pd;
1441 struct ib_uobject *uobject;
1442 u32 lkey; 1459 u32 lkey;
1443 u32 rkey; 1460 u32 rkey;
1444 u64 iova; 1461 u64 iova;
1445 u32 length; 1462 u32 length;
1446 unsigned int page_size; 1463 unsigned int page_size;
1464 bool need_inval;
1465 union {
1466 struct ib_uobject *uobject; /* user */
1467 struct list_head qp_entry; /* FR */
1468 };
1447}; 1469};
1448 1470
1449struct ib_mw { 1471struct ib_mw {
@@ -1827,7 +1849,8 @@ struct ib_device {
1827 u32 max_num_sg); 1849 u32 max_num_sg);
1828 int (*map_mr_sg)(struct ib_mr *mr, 1850 int (*map_mr_sg)(struct ib_mr *mr,
1829 struct scatterlist *sg, 1851 struct scatterlist *sg,
1830 int sg_nents); 1852 int sg_nents,
1853 unsigned int *sg_offset);
1831 struct ib_mw * (*alloc_mw)(struct ib_pd *pd, 1854 struct ib_mw * (*alloc_mw)(struct ib_pd *pd,
1832 enum ib_mw_type type, 1855 enum ib_mw_type type,
1833 struct ib_udata *udata); 1856 struct ib_udata *udata);
@@ -2317,6 +2340,18 @@ static inline bool rdma_cap_roce_gid_table(const struct ib_device *device,
2317 device->add_gid && device->del_gid; 2340 device->add_gid && device->del_gid;
2318} 2341}
2319 2342
2343/*
2344 * Check if the device supports READ W/ INVALIDATE.
2345 */
2346static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
2347{
2348 /*
2349 * iWarp drivers must support READ W/ INVALIDATE. No other protocol
2350 * has support for it yet.
2351 */
2352 return rdma_protocol_iwarp(dev, port_num);
2353}
2354
2320int ib_query_gid(struct ib_device *device, 2355int ib_query_gid(struct ib_device *device,
2321 u8 port_num, int index, union ib_gid *gid, 2356 u8 port_num, int index, union ib_gid *gid,
2322 struct ib_gid_attr *attr); 2357 struct ib_gid_attr *attr);
@@ -3111,29 +3146,23 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
3111 u16 pkey, const union ib_gid *gid, 3146 u16 pkey, const union ib_gid *gid,
3112 const struct sockaddr *addr); 3147 const struct sockaddr *addr);
3113 3148
3114int ib_map_mr_sg(struct ib_mr *mr, 3149int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
3115 struct scatterlist *sg, 3150 unsigned int *sg_offset, unsigned int page_size);
3116 int sg_nents,
3117 unsigned int page_size);
3118 3151
3119static inline int 3152static inline int
3120ib_map_mr_sg_zbva(struct ib_mr *mr, 3153ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
3121 struct scatterlist *sg, 3154 unsigned int *sg_offset, unsigned int page_size)
3122 int sg_nents,
3123 unsigned int page_size)
3124{ 3155{
3125 int n; 3156 int n;
3126 3157
3127 n = ib_map_mr_sg(mr, sg, sg_nents, page_size); 3158 n = ib_map_mr_sg(mr, sg, sg_nents, sg_offset, page_size);
3128 mr->iova = 0; 3159 mr->iova = 0;
3129 3160
3130 return n; 3161 return n;
3131} 3162}
3132 3163
3133int ib_sg_to_pages(struct ib_mr *mr, 3164int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents,
3134 struct scatterlist *sgl, 3165 unsigned int *sg_offset, int (*set_page)(struct ib_mr *, u64));
3135 int sg_nents,
3136 int (*set_page)(struct ib_mr *, u64));
3137 3166
3138void ib_drain_rq(struct ib_qp *qp); 3167void ib_drain_rq(struct ib_qp *qp);
3139void ib_drain_sq(struct ib_qp *qp); 3168void ib_drain_sq(struct ib_qp *qp);
diff --git a/include/rdma/mr_pool.h b/include/rdma/mr_pool.h
new file mode 100644
index 000000000000..986010b812eb
--- /dev/null
+++ b/include/rdma/mr_pool.h
@@ -0,0 +1,25 @@
1/*
2 * Copyright (c) 2016 HGST, a Western Digital Company.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#ifndef _RDMA_MR_POOL_H
14#define _RDMA_MR_POOL_H 1
15
16#include <rdma/ib_verbs.h>
17
18struct ib_mr *ib_mr_pool_get(struct ib_qp *qp, struct list_head *list);
19void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr);
20
21int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr,
22 enum ib_mr_type type, u32 max_num_sg);
23void ib_mr_pool_destroy(struct ib_qp *qp, struct list_head *list);
24
25#endif /* _RDMA_MR_POOL_H */
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index a8696551abb1..d57ceee90d26 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -467,6 +467,7 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
467} 467}
468 468
469struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); 469struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
470void rvt_dealloc_device(struct rvt_dev_info *rdi);
470int rvt_register_device(struct rvt_dev_info *rvd); 471int rvt_register_device(struct rvt_dev_info *rvd);
471void rvt_unregister_device(struct rvt_dev_info *rvd); 472void rvt_unregister_device(struct rvt_dev_info *rvd);
472int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); 473int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 497e59065c2c..0e1ff2abfe92 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -117,8 +117,9 @@
117/* 117/*
118 * Wait flags that would prevent any packet type from being sent. 118 * Wait flags that would prevent any packet type from being sent.
119 */ 119 */
120#define RVT_S_ANY_WAIT_IO (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \ 120#define RVT_S_ANY_WAIT_IO \
121 RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) 121 (RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN | RVT_S_WAIT_TX | \
122 RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM)
122 123
123/* 124/*
124 * Wait flags that would prevent send work requests from making progress. 125 * Wait flags that would prevent send work requests from making progress.
diff --git a/include/rdma/rw.h b/include/rdma/rw.h
new file mode 100644
index 000000000000..377d865e506d
--- /dev/null
+++ b/include/rdma/rw.h
@@ -0,0 +1,88 @@
1/*
2 * Copyright (c) 2016 HGST, a Western Digital Company.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#ifndef _RDMA_RW_H
14#define _RDMA_RW_H
15
16#include <linux/dma-mapping.h>
17#include <linux/scatterlist.h>
18#include <rdma/ib_verbs.h>
19#include <rdma/rdma_cm.h>
20#include <rdma/mr_pool.h>
21
22struct rdma_rw_ctx {
23 /* number of RDMA READ/WRITE WRs (not counting MR WRs) */
24 u32 nr_ops;
25
26 /* tag for the union below: */
27 u8 type;
28
29 union {
30 /* for mapping a single SGE: */
31 struct {
32 struct ib_sge sge;
33 struct ib_rdma_wr wr;
34 } single;
35
36 /* for mapping of multiple SGEs: */
37 struct {
38 struct ib_sge *sges;
39 struct ib_rdma_wr *wrs;
40 } map;
41
42 /* for registering multiple WRs: */
43 struct rdma_rw_reg_ctx {
44 struct ib_sge sge;
45 struct ib_rdma_wr wr;
46 struct ib_reg_wr reg_wr;
47 struct ib_send_wr inv_wr;
48 struct ib_mr *mr;
49 } *reg;
50
51 struct {
52 struct rdma_rw_reg_ctx data;
53 struct rdma_rw_reg_ctx prot;
54 struct ib_send_wr sig_inv_wr;
55 struct ib_mr *sig_mr;
56 struct ib_sge sig_sge;
57 struct ib_sig_handover_wr sig_wr;
58 } *sig;
59 };
60};
61
62int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
63 struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
64 u64 remote_addr, u32 rkey, enum dma_data_direction dir);
65void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
66 struct scatterlist *sg, u32 sg_cnt,
67 enum dma_data_direction dir);
68
69int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
70 u8 port_num, struct scatterlist *sg, u32 sg_cnt,
71 struct scatterlist *prot_sg, u32 prot_sg_cnt,
72 struct ib_sig_attrs *sig_attrs, u64 remote_addr, u32 rkey,
73 enum dma_data_direction dir);
74void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
75 u8 port_num, struct scatterlist *sg, u32 sg_cnt,
76 struct scatterlist *prot_sg, u32 prot_sg_cnt,
77 enum dma_data_direction dir);
78
79struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
80 u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr);
81int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
82 struct ib_cqe *cqe, struct ib_send_wr *chain_wr);
83
84void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr);
85int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr);
86void rdma_rw_cleanup_mrs(struct ib_qp *qp);
87
88#endif /* _RDMA_RW_H */
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 28ee5c2e6bcd..d8ab5101fad5 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -85,7 +85,6 @@ extern struct configfs_attribute *passthrough_attrib_attrs[];
85void *transport_kmap_data_sg(struct se_cmd *); 85void *transport_kmap_data_sg(struct se_cmd *);
86void transport_kunmap_data_sg(struct se_cmd *); 86void transport_kunmap_data_sg(struct se_cmd *);
87/* core helpers also used by xcopy during internal command setup */ 87/* core helpers also used by xcopy during internal command setup */
88int target_alloc_sgl(struct scatterlist **, unsigned int *, u32, bool);
89sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *, 88sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *,
90 struct scatterlist *, u32, struct scatterlist *, u32); 89 struct scatterlist *, u32, struct scatterlist *, u32);
91 90
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index 8ff6d40a294f..78d88f03b296 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -185,6 +185,10 @@ int core_tpg_set_initiator_node_tag(struct se_portal_group *,
185int core_tpg_register(struct se_wwn *, struct se_portal_group *, int); 185int core_tpg_register(struct se_wwn *, struct se_portal_group *, int);
186int core_tpg_deregister(struct se_portal_group *); 186int core_tpg_deregister(struct se_portal_group *);
187 187
188int target_alloc_sgl(struct scatterlist **sgl, unsigned int *nents,
189 u32 length, bool zero_page, bool chainable);
190void target_free_sgl(struct scatterlist *sgl, int nents);
191
188/* 192/*
189 * The LIO target core uses DMA_TO_DEVICE to mean that data is going 193 * The LIO target core uses DMA_TO_DEVICE to mean that data is going
190 * to the target (eg handling a WRITE) and DMA_FROM_DEVICE to mean 194 * to the target (eg handling a WRITE) and DMA_FROM_DEVICE to mean
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 8126c143a519..b6543d73d20a 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -226,6 +226,7 @@ struct ib_uverbs_ex_query_device_resp {
226 struct ib_uverbs_odp_caps odp_caps; 226 struct ib_uverbs_odp_caps odp_caps;
227 __u64 timestamp_mask; 227 __u64 timestamp_mask;
228 __u64 hca_core_clock; /* in KHZ */ 228 __u64 hca_core_clock; /* in KHZ */
229 __u64 device_cap_flags_ex;
229}; 230};
230 231
231struct ib_uverbs_query_port { 232struct ib_uverbs_query_port {
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 93ff038ea9d1..d921adc62765 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -111,7 +111,7 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
111 cpu_relax(); 111 cpu_relax();
112 } 112 }
113 113
114 ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, PAGE_SIZE); 114 ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, 0, PAGE_SIZE);
115 if (unlikely(ret != ibmr->sg_len)) 115 if (unlikely(ret != ibmr->sg_len))
116 return ret < 0 ? ret : -EINVAL; 116 return ret < 0 ? ret : -EINVAL;
117 117
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index c250924a9fd3..94c3fa910b85 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -421,7 +421,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
421 return -ENOMEM; 421 return -ENOMEM;
422 } 422 }
423 423
424 n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); 424 n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE);
425 if (unlikely(n != frmr->sg_nents)) { 425 if (unlikely(n != frmr->sg_nents)) {
426 pr_err("RPC: %s: failed to map mr %p (%u/%u)\n", 426 pr_err("RPC: %s: failed to map mr %p (%u/%u)\n",
427 __func__, frmr->fr_mr, n, frmr->sg_nents); 427 __func__, frmr->fr_mr, n, frmr->sg_nents);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 3b24a646eb46..fbe7444e7de6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -281,7 +281,7 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
281 } 281 }
282 atomic_inc(&xprt->sc_dma_used); 282 atomic_inc(&xprt->sc_dma_used);
283 283
284 n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); 284 n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE);
285 if (unlikely(n != frmr->sg_nents)) { 285 if (unlikely(n != frmr->sg_nents)) {
286 pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n", 286 pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n",
287 frmr->mr, n, frmr->sg_nents); 287 frmr->mr, n, frmr->sg_nents);