aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-08-04 20:10:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-08-04 20:10:31 -0400
commit0cda611386b2fcbf8bb32e9a5d82bfed4856fc36 (patch)
tree1647e00675ab924edfb22b69ea3872db091b8900
parentfdf1f7ff1bd7f1c6d1d5dc2b29b6b11a4f722276 (diff)
parent7f1d25b47d919cef29388aff37e7b074e65bf512 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull base rdma updates from Doug Ledford: "Round one of 4.8 code: while this is mostly normal, there is a new driver in here (the driver was hosted outside the kernel for several years and is actually a fairly mature and well coded driver). It amounts to 13,000 of the 16,000 lines of added code in here. Summary: - Updates/fixes for iw_cxgb4 driver - Updates/fixes for mlx5 driver - Add flow steering and RSS API - Add hardware stats to mlx4 and mlx5 drivers - Add firmware version API for RDMA driver use - Add the rxe driver (this is a software RoCE driver that makes any Ethernet device a RoCE device) - Fixes for i40iw driver - Support for send only multicast joins in the cma layer - Other minor fixes" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (72 commits) Soft RoCE driver IB/core: Support for CMA multicast join flags IB/sa: Add cached attribute containing SM information to SA port IB/uverbs: Fix race between uverbs_close and remove_one IB/mthca: Clean up error unwind flow in mthca_reset() IB/mthca: NULL arg to pci_dev_put is OK IB/hfi1: NULL arg to sc_return_credits is OK IB/mlx4: Add diagnostic hardware counters net/mlx4: Query performance and diagnostics counters net/mlx4: Add diagnostic counters capability bit Use smaller 512 byte messages for portmapper messages IB/ipoib: Report SG feature regardless of HW UD CSUM capability IB/mlx4: Don't use GFP_ATOMIC for CQ resize struct IB/hfi1: Disable by default IB/rdmavt: Disable by default IB/mlx5: Fix port counter ID association to QP offset IB/mlx5: Fix iteration overrun in GSI qps i40iw: Add NULL check for puda buffer i40iw: Change dup_ack_thresh to u8 i40iw: Remove unnecessary check for moving CQ head ...
-rw-r--r--MAINTAINERS9
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/core/cma.c98
-rw-r--r--drivers/infiniband/core/device.c9
-rw-r--r--drivers/infiniband/core/iwcm.c54
-rw-r--r--drivers/infiniband/core/iwcm.h2
-rw-r--r--drivers/infiniband/core/iwpm_util.c3
-rw-r--r--drivers/infiniband/core/multicast.c12
-rw-r--r--drivers/infiniband/core/netlink.c6
-rw-r--r--drivers/infiniband/core/sa_query.c41
-rw-r--r--drivers/infiniband/core/sysfs.c15
-rw-r--r--drivers/infiniband/core/ucma.c18
-rw-r--r--drivers/infiniband/core/uverbs.h14
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c535
-rw-r--r--drivers/infiniband/core/uverbs_main.c75
-rw-r--r--drivers/infiniband/core/verbs.c163
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c27
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c193
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c42
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h24
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c127
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c31
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c40
-rw-r--r--drivers/infiniband/hw/hfi1/Kconfig1
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c2
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h2
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c15
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_d.h3
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_type.h2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c29
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_user.h2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c76
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c4
-rw-r--r--drivers/infiniband/hw/mlx4/main.c222
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h9
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c87
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c19
-rw-r--r--drivers/infiniband/hw/mlx5/main.c429
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h74
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c4
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c691
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c112
-rw-r--r--drivers/infiniband/hw/mlx5/user.h88
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c24
-rw-r--r--drivers/infiniband/hw/mthca/mthca_reset.c42
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c33
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c19
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c16
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c17
-rw-r--r--drivers/infiniband/sw/Makefile1
-rw-r--r--drivers/infiniband/sw/rdmavt/Kconfig1
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig24
-rw-r--r--drivers/infiniband/sw/rxe/Makefile24
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c386
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h77
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c98
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c734
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c165
-rw-r--r--drivers/infiniband/sw/rxe/rxe_dma.c166
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hdr.h952
-rw-r--r--drivers/infiniband/sw/rxe/rxe_icrc.c96
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h286
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mcast.c190
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mmap.c173
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c643
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c708
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.h53
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.c961
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.h129
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h172
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c502
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h163
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c851
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.c217
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.h178
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c420
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c726
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c1380
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c193
-rw-r--r--drivers/infiniband/sw/rxe/rxe_sysfs.c157
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c154
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.h95
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c1330
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h480
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c10
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_msg.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c40
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/srq.c265
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/transobj.c4
-rw-r--r--include/linux/mlx4/device.h7
-rw-r--r--include/linux/mlx5/cq.h2
-rw-r--r--include/linux/mlx5/driver.h6
-rw-r--r--include/linux/mlx5/qp.h4
-rw-r--r--include/linux/mlx5/srq.h25
-rw-r--r--include/rdma/ib_sa.h13
-rw-r--r--include/rdma/ib_verbs.h102
-rw-r--r--include/rdma/rdma_cm.h4
-rw-r--r--include/uapi/rdma/Kbuild1
-rw-r--r--include/uapi/rdma/ib_user_verbs.h95
-rw-r--r--include/uapi/rdma/rdma_user_cm.h9
-rw-r--r--include/uapi/rdma/rdma_user_rxe.h144
107 files changed, 16539 insertions, 663 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index f518e69e5f29..5e1f03f0a526 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7647,6 +7647,15 @@ W: http://www.mellanox.com
7647Q: http://patchwork.ozlabs.org/project/netdev/list/ 7647Q: http://patchwork.ozlabs.org/project/netdev/list/
7648F: drivers/net/ethernet/mellanox/mlxsw/ 7648F: drivers/net/ethernet/mellanox/mlxsw/
7649 7649
7650SOFT-ROCE DRIVER (rxe)
7651M: Moni Shoua <monis@mellanox.com>
7652L: linux-rdma@vger.kernel.org
7653S: Supported
7654W: https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
7655Q: http://patchwork.kernel.org/project/linux-rdma/list/
7656F: drivers/infiniband/hw/rxe/
7657F: include/uapi/rdma/rdma_user_rxe.h
7658
7650MEMBARRIER SUPPORT 7659MEMBARRIER SUPPORT
7651M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> 7660M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7652M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> 7661M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 2137adfbd8c3..e9b7dc037ff8 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -84,6 +84,7 @@ source "drivers/infiniband/ulp/iser/Kconfig"
84source "drivers/infiniband/ulp/isert/Kconfig" 84source "drivers/infiniband/ulp/isert/Kconfig"
85 85
86source "drivers/infiniband/sw/rdmavt/Kconfig" 86source "drivers/infiniband/sw/rdmavt/Kconfig"
87source "drivers/infiniband/sw/rxe/Kconfig"
87 88
88source "drivers/infiniband/hw/hfi1/Kconfig" 89source "drivers/infiniband/hw/hfi1/Kconfig"
89 90
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index ad1b1adcf6f0..e6dfa1bd3def 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -68,6 +68,7 @@ MODULE_DESCRIPTION("Generic RDMA CM Agent");
68MODULE_LICENSE("Dual BSD/GPL"); 68MODULE_LICENSE("Dual BSD/GPL");
69 69
70#define CMA_CM_RESPONSE_TIMEOUT 20 70#define CMA_CM_RESPONSE_TIMEOUT 20
71#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000
71#define CMA_MAX_CM_RETRIES 15 72#define CMA_MAX_CM_RETRIES 15
72#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) 73#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
73#define CMA_IBOE_PACKET_LIFETIME 18 74#define CMA_IBOE_PACKET_LIFETIME 18
@@ -162,6 +163,14 @@ struct rdma_bind_list {
162 unsigned short port; 163 unsigned short port;
163}; 164};
164 165
166struct class_port_info_context {
167 struct ib_class_port_info *class_port_info;
168 struct ib_device *device;
169 struct completion done;
170 struct ib_sa_query *sa_query;
171 u8 port_num;
172};
173
165static int cma_ps_alloc(struct net *net, enum rdma_port_space ps, 174static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
166 struct rdma_bind_list *bind_list, int snum) 175 struct rdma_bind_list *bind_list, int snum)
167{ 176{
@@ -306,6 +315,7 @@ struct cma_multicast {
306 struct sockaddr_storage addr; 315 struct sockaddr_storage addr;
307 struct kref mcref; 316 struct kref mcref;
308 bool igmp_joined; 317 bool igmp_joined;
318 u8 join_state;
309}; 319};
310 320
311struct cma_work { 321struct cma_work {
@@ -3752,10 +3762,63 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
3752 } 3762 }
3753} 3763}
3754 3764
3765static void cma_query_sa_classport_info_cb(int status,
3766 struct ib_class_port_info *rec,
3767 void *context)
3768{
3769 struct class_port_info_context *cb_ctx = context;
3770
3771 WARN_ON(!context);
3772
3773 if (status || !rec) {
3774 pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n",
3775 cb_ctx->device->name, cb_ctx->port_num, status);
3776 goto out;
3777 }
3778
3779 memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info));
3780
3781out:
3782 complete(&cb_ctx->done);
3783}
3784
3785static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num,
3786 struct ib_class_port_info *class_port_info)
3787{
3788 struct class_port_info_context *cb_ctx;
3789 int ret;
3790
3791 cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL);
3792 if (!cb_ctx)
3793 return -ENOMEM;
3794
3795 cb_ctx->device = device;
3796 cb_ctx->class_port_info = class_port_info;
3797 cb_ctx->port_num = port_num;
3798 init_completion(&cb_ctx->done);
3799
3800 ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num,
3801 CMA_QUERY_CLASSPORT_INFO_TIMEOUT,
3802 GFP_KERNEL, cma_query_sa_classport_info_cb,
3803 cb_ctx, &cb_ctx->sa_query);
3804 if (ret < 0) {
3805 pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n",
3806 device->name, port_num, ret);
3807 goto out;
3808 }
3809
3810 wait_for_completion(&cb_ctx->done);
3811
3812out:
3813 kfree(cb_ctx);
3814 return ret;
3815}
3816
3755static int cma_join_ib_multicast(struct rdma_id_private *id_priv, 3817static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
3756 struct cma_multicast *mc) 3818 struct cma_multicast *mc)
3757{ 3819{
3758 struct ib_sa_mcmember_rec rec; 3820 struct ib_sa_mcmember_rec rec;
3821 struct ib_class_port_info class_port_info;
3759 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3822 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3760 ib_sa_comp_mask comp_mask; 3823 ib_sa_comp_mask comp_mask;
3761 int ret; 3824 int ret;
@@ -3774,7 +3837,24 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
3774 rec.qkey = cpu_to_be32(id_priv->qkey); 3837 rec.qkey = cpu_to_be32(id_priv->qkey);
3775 rdma_addr_get_sgid(dev_addr, &rec.port_gid); 3838 rdma_addr_get_sgid(dev_addr, &rec.port_gid);
3776 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 3839 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
3777 rec.join_state = 1; 3840 rec.join_state = mc->join_state;
3841
3842 if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) {
3843 ret = cma_query_sa_classport_info(id_priv->id.device,
3844 id_priv->id.port_num,
3845 &class_port_info);
3846
3847 if (ret)
3848 return ret;
3849
3850 if (!(ib_get_cpi_capmask2(&class_port_info) &
3851 IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) {
3852 pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
3853 "RDMA CM: SM doesn't support Send Only Full Member option\n",
3854 id_priv->id.device->name, id_priv->id.port_num);
3855 return -EOPNOTSUPP;
3856 }
3857 }
3778 3858
3779 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | 3859 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
3780 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | 3860 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
@@ -3843,6 +3923,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
3843 struct sockaddr *addr = (struct sockaddr *)&mc->addr; 3923 struct sockaddr *addr = (struct sockaddr *)&mc->addr;
3844 struct net_device *ndev = NULL; 3924 struct net_device *ndev = NULL;
3845 enum ib_gid_type gid_type; 3925 enum ib_gid_type gid_type;
3926 bool send_only;
3927
3928 send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
3846 3929
3847 if (cma_zero_addr((struct sockaddr *)&mc->addr)) 3930 if (cma_zero_addr((struct sockaddr *)&mc->addr))
3848 return -EINVAL; 3931 return -EINVAL;
@@ -3878,10 +3961,12 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
3878 if (addr->sa_family == AF_INET) { 3961 if (addr->sa_family == AF_INET) {
3879 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 3962 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
3880 mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; 3963 mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
3881 err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, 3964 if (!send_only) {
3882 true); 3965 err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
3883 if (!err) 3966 true);
3884 mc->igmp_joined = true; 3967 if (!err)
3968 mc->igmp_joined = true;
3969 }
3885 } 3970 }
3886 } else { 3971 } else {
3887 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 3972 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
@@ -3911,7 +3996,7 @@ out1:
3911} 3996}
3912 3997
3913int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 3998int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
3914 void *context) 3999 u8 join_state, void *context)
3915{ 4000{
3916 struct rdma_id_private *id_priv; 4001 struct rdma_id_private *id_priv;
3917 struct cma_multicast *mc; 4002 struct cma_multicast *mc;
@@ -3930,6 +4015,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
3930 mc->context = context; 4015 mc->context = context;
3931 mc->id_priv = id_priv; 4016 mc->id_priv = id_priv;
3932 mc->igmp_joined = false; 4017 mc->igmp_joined = false;
4018 mc->join_state = join_state;
3933 spin_lock(&id_priv->lock); 4019 spin_lock(&id_priv->lock);
3934 list_add(&mc->list, &id_priv->mc_list); 4020 list_add(&mc->list, &id_priv->mc_list);
3935 spin_unlock(&id_priv->lock); 4021 spin_unlock(&id_priv->lock);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 5c155fa91eec..760ef603a468 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -311,6 +311,15 @@ static int read_port_immutable(struct ib_device *device)
311 return 0; 311 return 0;
312} 312}
313 313
314void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len)
315{
316 if (dev->get_dev_fw_str)
317 dev->get_dev_fw_str(dev, str, str_len);
318 else
319 str[0] = '\0';
320}
321EXPORT_SYMBOL(ib_get_device_fw_str);
322
314/** 323/**
315 * ib_register_device - Register an IB device with IB core 324 * ib_register_device - Register an IB device with IB core
316 * @device:Device to register 325 * @device:Device to register
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index f0572049d291..357624f8b9d3 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -183,15 +183,14 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv)
183 183
184/* 184/*
185 * Release a reference on cm_id. If the last reference is being 185 * Release a reference on cm_id. If the last reference is being
186 * released, enable the waiting thread (in iw_destroy_cm_id) to 186 * released, free the cm_id and return 1.
187 * get woken up, and return 1 if a thread is already waiting.
188 */ 187 */
189static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) 188static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
190{ 189{
191 BUG_ON(atomic_read(&cm_id_priv->refcount)==0); 190 BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
192 if (atomic_dec_and_test(&cm_id_priv->refcount)) { 191 if (atomic_dec_and_test(&cm_id_priv->refcount)) {
193 BUG_ON(!list_empty(&cm_id_priv->work_list)); 192 BUG_ON(!list_empty(&cm_id_priv->work_list));
194 complete(&cm_id_priv->destroy_comp); 193 free_cm_id(cm_id_priv);
195 return 1; 194 return 1;
196 } 195 }
197 196
@@ -208,19 +207,10 @@ static void add_ref(struct iw_cm_id *cm_id)
208static void rem_ref(struct iw_cm_id *cm_id) 207static void rem_ref(struct iw_cm_id *cm_id)
209{ 208{
210 struct iwcm_id_private *cm_id_priv; 209 struct iwcm_id_private *cm_id_priv;
211 int cb_destroy;
212 210
213 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 211 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
214 212
215 /* 213 (void)iwcm_deref_id(cm_id_priv);
216 * Test bit before deref in case the cm_id gets freed on another
217 * thread.
218 */
219 cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
220 if (iwcm_deref_id(cm_id_priv) && cb_destroy) {
221 BUG_ON(!list_empty(&cm_id_priv->work_list));
222 free_cm_id(cm_id_priv);
223 }
224} 214}
225 215
226static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); 216static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
@@ -370,6 +360,12 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
370 wait_event(cm_id_priv->connect_wait, 360 wait_event(cm_id_priv->connect_wait,
371 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 361 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
372 362
363 /*
364 * Since we're deleting the cm_id, drop any events that
365 * might arrive before the last dereference.
366 */
367 set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);
368
373 spin_lock_irqsave(&cm_id_priv->lock, flags); 369 spin_lock_irqsave(&cm_id_priv->lock, flags);
374 switch (cm_id_priv->state) { 370 switch (cm_id_priv->state) {
375 case IW_CM_STATE_LISTEN: 371 case IW_CM_STATE_LISTEN:
@@ -433,13 +429,7 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id)
433 struct iwcm_id_private *cm_id_priv; 429 struct iwcm_id_private *cm_id_priv;
434 430
435 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 431 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
436 BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
437
438 destroy_cm_id(cm_id); 432 destroy_cm_id(cm_id);
439
440 wait_for_completion(&cm_id_priv->destroy_comp);
441
442 free_cm_id(cm_id_priv);
443} 433}
444EXPORT_SYMBOL(iw_destroy_cm_id); 434EXPORT_SYMBOL(iw_destroy_cm_id);
445 435
@@ -809,10 +799,7 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
809 ret = cm_id->cm_handler(cm_id, iw_event); 799 ret = cm_id->cm_handler(cm_id, iw_event);
810 if (ret) { 800 if (ret) {
811 iw_cm_reject(cm_id, NULL, 0); 801 iw_cm_reject(cm_id, NULL, 0);
812 set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 802 iw_destroy_cm_id(cm_id);
813 destroy_cm_id(cm_id);
814 if (atomic_read(&cm_id_priv->refcount)==0)
815 free_cm_id(cm_id_priv);
816 } 803 }
817 804
818out: 805out:
@@ -1000,7 +987,6 @@ static void cm_work_handler(struct work_struct *_work)
1000 unsigned long flags; 987 unsigned long flags;
1001 int empty; 988 int empty;
1002 int ret = 0; 989 int ret = 0;
1003 int destroy_id;
1004 990
1005 spin_lock_irqsave(&cm_id_priv->lock, flags); 991 spin_lock_irqsave(&cm_id_priv->lock, flags);
1006 empty = list_empty(&cm_id_priv->work_list); 992 empty = list_empty(&cm_id_priv->work_list);
@@ -1013,20 +999,14 @@ static void cm_work_handler(struct work_struct *_work)
1013 put_work(work); 999 put_work(work);
1014 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1000 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1015 1001
1016 ret = process_event(cm_id_priv, &levent); 1002 if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
1017 if (ret) { 1003 ret = process_event(cm_id_priv, &levent);
1018 set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 1004 if (ret)
1019 destroy_cm_id(&cm_id_priv->id); 1005 destroy_cm_id(&cm_id_priv->id);
1020 } 1006 } else
1021 BUG_ON(atomic_read(&cm_id_priv->refcount)==0); 1007 pr_debug("dropping event %d\n", levent.event);
1022 destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 1008 if (iwcm_deref_id(cm_id_priv))
1023 if (iwcm_deref_id(cm_id_priv)) {
1024 if (destroy_id) {
1025 BUG_ON(!list_empty(&cm_id_priv->work_list));
1026 free_cm_id(cm_id_priv);
1027 }
1028 return; 1009 return;
1029 }
1030 if (empty) 1010 if (empty)
1031 return; 1011 return;
1032 spin_lock_irqsave(&cm_id_priv->lock, flags); 1012 spin_lock_irqsave(&cm_id_priv->lock, flags);
diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h
index 3f6cc82564c8..82c2cd1b0a80 100644
--- a/drivers/infiniband/core/iwcm.h
+++ b/drivers/infiniband/core/iwcm.h
@@ -56,7 +56,7 @@ struct iwcm_id_private {
56 struct list_head work_free_list; 56 struct list_head work_free_list;
57}; 57};
58 58
59#define IWCM_F_CALLBACK_DESTROY 1 59#define IWCM_F_DROP_EVENTS 1
60#define IWCM_F_CONNECT_WAIT 2 60#define IWCM_F_CONNECT_WAIT 2
61 61
62#endif /* IWCM_H */ 62#endif /* IWCM_H */
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index b65e06c560d7..ade71e7f0131 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -37,6 +37,7 @@
37#define IWPM_MAPINFO_HASH_MASK (IWPM_MAPINFO_HASH_SIZE - 1) 37#define IWPM_MAPINFO_HASH_MASK (IWPM_MAPINFO_HASH_SIZE - 1)
38#define IWPM_REMINFO_HASH_SIZE 64 38#define IWPM_REMINFO_HASH_SIZE 64
39#define IWPM_REMINFO_HASH_MASK (IWPM_REMINFO_HASH_SIZE - 1) 39#define IWPM_REMINFO_HASH_MASK (IWPM_REMINFO_HASH_SIZE - 1)
40#define IWPM_MSG_SIZE 512
40 41
41static LIST_HEAD(iwpm_nlmsg_req_list); 42static LIST_HEAD(iwpm_nlmsg_req_list);
42static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); 43static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock);
@@ -452,7 +453,7 @@ struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
452{ 453{
453 struct sk_buff *skb = NULL; 454 struct sk_buff *skb = NULL;
454 455
455 skb = dev_alloc_skb(NLMSG_GOODSIZE); 456 skb = dev_alloc_skb(IWPM_MSG_SIZE);
456 if (!skb) { 457 if (!skb) {
457 pr_err("%s Unable to allocate skb\n", __func__); 458 pr_err("%s Unable to allocate skb\n", __func__);
458 goto create_nlmsg_exit; 459 goto create_nlmsg_exit;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index a83ec28a147b..3a3c5d73bbfc 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -93,18 +93,6 @@ enum {
93 93
94struct mcast_member; 94struct mcast_member;
95 95
96/*
97* There are 4 types of join states:
98* FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember.
99*/
100enum {
101 FULLMEMBER_JOIN,
102 NONMEMBER_JOIN,
103 SENDONLY_NONMEBER_JOIN,
104 SENDONLY_FULLMEMBER_JOIN,
105 NUM_JOIN_MEMBERSHIP_TYPES,
106};
107
108struct mcast_group { 96struct mcast_group {
109 struct ib_sa_mcmember_rec rec; 97 struct ib_sa_mcmember_rec rec;
110 struct rb_node node; 98 struct rb_node node;
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 9b8c20c8209b..10469b0088b5 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -229,7 +229,10 @@ static void ibnl_rcv(struct sk_buff *skb)
229int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, 229int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
230 __u32 pid) 230 __u32 pid)
231{ 231{
232 return nlmsg_unicast(nls, skb, pid); 232 int err;
233
234 err = netlink_unicast(nls, skb, pid, 0);
235 return (err < 0) ? err : 0;
233} 236}
234EXPORT_SYMBOL(ibnl_unicast); 237EXPORT_SYMBOL(ibnl_unicast);
235 238
@@ -252,6 +255,7 @@ int __init ibnl_init(void)
252 return -ENOMEM; 255 return -ENOMEM;
253 } 256 }
254 257
258 nls->sk_sndtimeo = 10 * HZ;
255 return 0; 259 return 0;
256} 260}
257 261
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index e95538650dc6..b9bf7aa055e7 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -65,10 +65,17 @@ struct ib_sa_sm_ah {
65 u8 src_path_mask; 65 u8 src_path_mask;
66}; 66};
67 67
68struct ib_sa_classport_cache {
69 bool valid;
70 struct ib_class_port_info data;
71};
72
68struct ib_sa_port { 73struct ib_sa_port {
69 struct ib_mad_agent *agent; 74 struct ib_mad_agent *agent;
70 struct ib_sa_sm_ah *sm_ah; 75 struct ib_sa_sm_ah *sm_ah;
71 struct work_struct update_task; 76 struct work_struct update_task;
77 struct ib_sa_classport_cache classport_info;
78 spinlock_t classport_lock; /* protects class port info set */
72 spinlock_t ah_lock; 79 spinlock_t ah_lock;
73 u8 port_num; 80 u8 port_num;
74}; 81};
@@ -998,6 +1005,13 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
998 port->sm_ah = NULL; 1005 port->sm_ah = NULL;
999 spin_unlock_irqrestore(&port->ah_lock, flags); 1006 spin_unlock_irqrestore(&port->ah_lock, flags);
1000 1007
1008 if (event->event == IB_EVENT_SM_CHANGE ||
1009 event->event == IB_EVENT_CLIENT_REREGISTER ||
1010 event->event == IB_EVENT_LID_CHANGE) {
1011 spin_lock_irqsave(&port->classport_lock, flags);
1012 port->classport_info.valid = false;
1013 spin_unlock_irqrestore(&port->classport_lock, flags);
1014 }
1001 queue_work(ib_wq, &sa_dev->port[event->element.port_num - 1015 queue_work(ib_wq, &sa_dev->port[event->element.port_num -
1002 sa_dev->start_port].update_task); 1016 sa_dev->start_port].update_task);
1003 } 1017 }
@@ -1719,6 +1733,7 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
1719 int status, 1733 int status,
1720 struct ib_sa_mad *mad) 1734 struct ib_sa_mad *mad)
1721{ 1735{
1736 unsigned long flags;
1722 struct ib_sa_classport_info_query *query = 1737 struct ib_sa_classport_info_query *query =
1723 container_of(sa_query, struct ib_sa_classport_info_query, sa_query); 1738 container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
1724 1739
@@ -1728,6 +1743,16 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
1728 ib_unpack(classport_info_rec_table, 1743 ib_unpack(classport_info_rec_table,
1729 ARRAY_SIZE(classport_info_rec_table), 1744 ARRAY_SIZE(classport_info_rec_table),
1730 mad->data, &rec); 1745 mad->data, &rec);
1746
1747 spin_lock_irqsave(&sa_query->port->classport_lock, flags);
1748 if (!status && !sa_query->port->classport_info.valid) {
1749 memcpy(&sa_query->port->classport_info.data, &rec,
1750 sizeof(sa_query->port->classport_info.data));
1751
1752 sa_query->port->classport_info.valid = true;
1753 }
1754 spin_unlock_irqrestore(&sa_query->port->classport_lock, flags);
1755
1731 query->callback(status, &rec, query->context); 1756 query->callback(status, &rec, query->context);
1732 } else { 1757 } else {
1733 query->callback(status, NULL, query->context); 1758 query->callback(status, NULL, query->context);
@@ -1754,7 +1779,9 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
1754 struct ib_sa_port *port; 1779 struct ib_sa_port *port;
1755 struct ib_mad_agent *agent; 1780 struct ib_mad_agent *agent;
1756 struct ib_sa_mad *mad; 1781 struct ib_sa_mad *mad;
1782 struct ib_class_port_info cached_class_port_info;
1757 int ret; 1783 int ret;
1784 unsigned long flags;
1758 1785
1759 if (!sa_dev) 1786 if (!sa_dev)
1760 return -ENODEV; 1787 return -ENODEV;
@@ -1762,6 +1789,17 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
1762 port = &sa_dev->port[port_num - sa_dev->start_port]; 1789 port = &sa_dev->port[port_num - sa_dev->start_port];
1763 agent = port->agent; 1790 agent = port->agent;
1764 1791
1792 /* Use cached ClassPortInfo attribute if valid instead of sending mad */
1793 spin_lock_irqsave(&port->classport_lock, flags);
1794 if (port->classport_info.valid && callback) {
1795 memcpy(&cached_class_port_info, &port->classport_info.data,
1796 sizeof(cached_class_port_info));
1797 spin_unlock_irqrestore(&port->classport_lock, flags);
1798 callback(0, &cached_class_port_info, context);
1799 return 0;
1800 }
1801 spin_unlock_irqrestore(&port->classport_lock, flags);
1802
1765 query = kzalloc(sizeof(*query), gfp_mask); 1803 query = kzalloc(sizeof(*query), gfp_mask);
1766 if (!query) 1804 if (!query)
1767 return -ENOMEM; 1805 return -ENOMEM;
@@ -1885,6 +1923,9 @@ static void ib_sa_add_one(struct ib_device *device)
1885 sa_dev->port[i].sm_ah = NULL; 1923 sa_dev->port[i].sm_ah = NULL;
1886 sa_dev->port[i].port_num = i + s; 1924 sa_dev->port[i].port_num = i + s;
1887 1925
1926 spin_lock_init(&sa_dev->port[i].classport_lock);
1927 sa_dev->port[i].classport_info.valid = false;
1928
1888 sa_dev->port[i].agent = 1929 sa_dev->port[i].agent =
1889 ib_register_mad_agent(device, i + s, IB_QPT_GSI, 1930 ib_register_mad_agent(device, i + s, IB_QPT_GSI,
1890 NULL, 0, send_handler, 1931 NULL, 0, send_handler,
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 60df4f8e81be..15defefecb4f 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -38,6 +38,7 @@
38#include <linux/stat.h> 38#include <linux/stat.h>
39#include <linux/string.h> 39#include <linux/string.h>
40#include <linux/netdevice.h> 40#include <linux/netdevice.h>
41#include <linux/ethtool.h>
41 42
42#include <rdma/ib_mad.h> 43#include <rdma/ib_mad.h>
43#include <rdma/ib_pma.h> 44#include <rdma/ib_pma.h>
@@ -1200,16 +1201,28 @@ static ssize_t set_node_desc(struct device *device,
1200 return count; 1201 return count;
1201} 1202}
1202 1203
1204static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1205 char *buf)
1206{
1207 struct ib_device *dev = container_of(device, struct ib_device, dev);
1208
1209 ib_get_device_fw_str(dev, buf, PAGE_SIZE);
1210 strlcat(buf, "\n", PAGE_SIZE);
1211 return strlen(buf);
1212}
1213
1203static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); 1214static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
1204static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); 1215static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
1205static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL); 1216static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
1206static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc); 1217static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc);
1218static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
1207 1219
1208static struct device_attribute *ib_class_attributes[] = { 1220static struct device_attribute *ib_class_attributes[] = {
1209 &dev_attr_node_type, 1221 &dev_attr_node_type,
1210 &dev_attr_sys_image_guid, 1222 &dev_attr_sys_image_guid,
1211 &dev_attr_node_guid, 1223 &dev_attr_node_guid,
1212 &dev_attr_node_desc 1224 &dev_attr_node_desc,
1225 &dev_attr_fw_ver,
1213}; 1226};
1214 1227
1215static void free_port_list_attributes(struct ib_device *device) 1228static void free_port_list_attributes(struct ib_device *device)
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index c0f3826abb30..2825ece91d3c 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -106,6 +106,7 @@ struct ucma_multicast {
106 int events_reported; 106 int events_reported;
107 107
108 u64 uid; 108 u64 uid;
109 u8 join_state;
109 struct list_head list; 110 struct list_head list;
110 struct sockaddr_storage addr; 111 struct sockaddr_storage addr;
111}; 112};
@@ -1317,12 +1318,20 @@ static ssize_t ucma_process_join(struct ucma_file *file,
1317 struct ucma_multicast *mc; 1318 struct ucma_multicast *mc;
1318 struct sockaddr *addr; 1319 struct sockaddr *addr;
1319 int ret; 1320 int ret;
1321 u8 join_state;
1320 1322
1321 if (out_len < sizeof(resp)) 1323 if (out_len < sizeof(resp))
1322 return -ENOSPC; 1324 return -ENOSPC;
1323 1325
1324 addr = (struct sockaddr *) &cmd->addr; 1326 addr = (struct sockaddr *) &cmd->addr;
1325 if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) 1327 if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
1328 return -EINVAL;
1329
1330 if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER)
1331 join_state = BIT(FULLMEMBER_JOIN);
1332 else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER)
1333 join_state = BIT(SENDONLY_FULLMEMBER_JOIN);
1334 else
1326 return -EINVAL; 1335 return -EINVAL;
1327 1336
1328 ctx = ucma_get_ctx(file, cmd->id); 1337 ctx = ucma_get_ctx(file, cmd->id);
@@ -1335,10 +1344,11 @@ static ssize_t ucma_process_join(struct ucma_file *file,
1335 ret = -ENOMEM; 1344 ret = -ENOMEM;
1336 goto err1; 1345 goto err1;
1337 } 1346 }
1338 1347 mc->join_state = join_state;
1339 mc->uid = cmd->uid; 1348 mc->uid = cmd->uid;
1340 memcpy(&mc->addr, addr, cmd->addr_size); 1349 memcpy(&mc->addr, addr, cmd->addr_size);
1341 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc); 1350 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
1351 join_state, mc);
1342 if (ret) 1352 if (ret)
1343 goto err2; 1353 goto err2;
1344 1354
@@ -1382,7 +1392,7 @@ static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
1382 join_cmd.uid = cmd.uid; 1392 join_cmd.uid = cmd.uid;
1383 join_cmd.id = cmd.id; 1393 join_cmd.id = cmd.id;
1384 join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr); 1394 join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr);
1385 join_cmd.reserved = 0; 1395 join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER;
1386 memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); 1396 memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
1387 1397
1388 return ucma_process_join(file, &join_cmd, out_len); 1398 return ucma_process_join(file, &join_cmd, out_len);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 612ccfd39bf9..df26a741cda6 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -116,6 +116,7 @@ struct ib_uverbs_event_file {
116struct ib_uverbs_file { 116struct ib_uverbs_file {
117 struct kref ref; 117 struct kref ref;
118 struct mutex mutex; 118 struct mutex mutex;
119 struct mutex cleanup_mutex; /* protect cleanup */
119 struct ib_uverbs_device *device; 120 struct ib_uverbs_device *device;
120 struct ib_ucontext *ucontext; 121 struct ib_ucontext *ucontext;
121 struct ib_event_handler event_handler; 122 struct ib_event_handler event_handler;
@@ -162,6 +163,10 @@ struct ib_uqp_object {
162 struct ib_uxrcd_object *uxrcd; 163 struct ib_uxrcd_object *uxrcd;
163}; 164};
164 165
166struct ib_uwq_object {
167 struct ib_uevent_object uevent;
168};
169
165struct ib_ucq_object { 170struct ib_ucq_object {
166 struct ib_uobject uobject; 171 struct ib_uobject uobject;
167 struct ib_uverbs_file *uverbs_file; 172 struct ib_uverbs_file *uverbs_file;
@@ -181,6 +186,8 @@ extern struct idr ib_uverbs_qp_idr;
181extern struct idr ib_uverbs_srq_idr; 186extern struct idr ib_uverbs_srq_idr;
182extern struct idr ib_uverbs_xrcd_idr; 187extern struct idr ib_uverbs_xrcd_idr;
183extern struct idr ib_uverbs_rule_idr; 188extern struct idr ib_uverbs_rule_idr;
189extern struct idr ib_uverbs_wq_idr;
190extern struct idr ib_uverbs_rwq_ind_tbl_idr;
184 191
185void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); 192void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
186 193
@@ -199,6 +206,7 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
199void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); 206void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
200void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); 207void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
201void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); 208void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
209void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
202void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); 210void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
203void ib_uverbs_event_handler(struct ib_event_handler *handler, 211void ib_uverbs_event_handler(struct ib_event_handler *handler,
204 struct ib_event *event); 212 struct ib_event *event);
@@ -219,6 +227,7 @@ struct ib_uverbs_flow_spec {
219 struct ib_uverbs_flow_spec_eth eth; 227 struct ib_uverbs_flow_spec_eth eth;
220 struct ib_uverbs_flow_spec_ipv4 ipv4; 228 struct ib_uverbs_flow_spec_ipv4 ipv4;
221 struct ib_uverbs_flow_spec_tcp_udp tcp_udp; 229 struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
230 struct ib_uverbs_flow_spec_ipv6 ipv6;
222 }; 231 };
223}; 232};
224 233
@@ -275,5 +284,10 @@ IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
275IB_UVERBS_DECLARE_EX_CMD(query_device); 284IB_UVERBS_DECLARE_EX_CMD(query_device);
276IB_UVERBS_DECLARE_EX_CMD(create_cq); 285IB_UVERBS_DECLARE_EX_CMD(create_cq);
277IB_UVERBS_DECLARE_EX_CMD(create_qp); 286IB_UVERBS_DECLARE_EX_CMD(create_qp);
287IB_UVERBS_DECLARE_EX_CMD(create_wq);
288IB_UVERBS_DECLARE_EX_CMD(modify_wq);
289IB_UVERBS_DECLARE_EX_CMD(destroy_wq);
290IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table);
291IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table);
278 292
279#endif /* UVERBS_H */ 293#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 825021d1008b..f6647318138d 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -57,6 +57,8 @@ static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
57static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; 57static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
58static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; 58static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
59static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; 59static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
60static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" };
61static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" };
60 62
61/* 63/*
62 * The ib_uobject locking scheme is as follows: 64 * The ib_uobject locking scheme is as follows:
@@ -243,6 +245,27 @@ static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
243 return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); 245 return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
244} 246}
245 247
248static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context)
249{
250 return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0);
251}
252
253static void put_wq_read(struct ib_wq *wq)
254{
255 put_uobj_read(wq->uobject);
256}
257
258static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle,
259 struct ib_ucontext *context)
260{
261 return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0);
262}
263
264static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table)
265{
266 put_uobj_read(ind_table->uobject);
267}
268
246static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) 269static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
247{ 270{
248 struct ib_uobject *uobj; 271 struct ib_uobject *uobj;
@@ -326,6 +349,8 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
326 INIT_LIST_HEAD(&ucontext->qp_list); 349 INIT_LIST_HEAD(&ucontext->qp_list);
327 INIT_LIST_HEAD(&ucontext->srq_list); 350 INIT_LIST_HEAD(&ucontext->srq_list);
328 INIT_LIST_HEAD(&ucontext->ah_list); 351 INIT_LIST_HEAD(&ucontext->ah_list);
352 INIT_LIST_HEAD(&ucontext->wq_list);
353 INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list);
329 INIT_LIST_HEAD(&ucontext->xrcd_list); 354 INIT_LIST_HEAD(&ucontext->xrcd_list);
330 INIT_LIST_HEAD(&ucontext->rule_list); 355 INIT_LIST_HEAD(&ucontext->rule_list);
331 rcu_read_lock(); 356 rcu_read_lock();
@@ -1750,6 +1775,8 @@ static int create_qp(struct ib_uverbs_file *file,
1750 struct ib_qp_init_attr attr = {}; 1775 struct ib_qp_init_attr attr = {};
1751 struct ib_uverbs_ex_create_qp_resp resp; 1776 struct ib_uverbs_ex_create_qp_resp resp;
1752 int ret; 1777 int ret;
1778 struct ib_rwq_ind_table *ind_tbl = NULL;
1779 bool has_sq = true;
1753 1780
1754 if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) 1781 if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
1755 return -EPERM; 1782 return -EPERM;
@@ -1761,6 +1788,32 @@ static int create_qp(struct ib_uverbs_file *file,
1761 init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, 1788 init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
1762 &qp_lock_class); 1789 &qp_lock_class);
1763 down_write(&obj->uevent.uobject.mutex); 1790 down_write(&obj->uevent.uobject.mutex);
1791 if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
1792 sizeof(cmd->rwq_ind_tbl_handle) &&
1793 (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
1794 ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle,
1795 file->ucontext);
1796 if (!ind_tbl) {
1797 ret = -EINVAL;
1798 goto err_put;
1799 }
1800
1801 attr.rwq_ind_tbl = ind_tbl;
1802 }
1803
1804 if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) +
1805 sizeof(cmd->reserved1)) && cmd->reserved1) {
1806 ret = -EOPNOTSUPP;
1807 goto err_put;
1808 }
1809
1810 if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) {
1811 ret = -EINVAL;
1812 goto err_put;
1813 }
1814
1815 if (ind_tbl && !cmd->max_send_wr)
1816 has_sq = false;
1764 1817
1765 if (cmd->qp_type == IB_QPT_XRC_TGT) { 1818 if (cmd->qp_type == IB_QPT_XRC_TGT) {
1766 xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, 1819 xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
@@ -1784,20 +1837,24 @@ static int create_qp(struct ib_uverbs_file *file,
1784 } 1837 }
1785 } 1838 }
1786 1839
1787 if (cmd->recv_cq_handle != cmd->send_cq_handle) { 1840 if (!ind_tbl) {
1788 rcq = idr_read_cq(cmd->recv_cq_handle, 1841 if (cmd->recv_cq_handle != cmd->send_cq_handle) {
1789 file->ucontext, 0); 1842 rcq = idr_read_cq(cmd->recv_cq_handle,
1790 if (!rcq) { 1843 file->ucontext, 0);
1791 ret = -EINVAL; 1844 if (!rcq) {
1792 goto err_put; 1845 ret = -EINVAL;
1846 goto err_put;
1847 }
1793 } 1848 }
1794 } 1849 }
1795 } 1850 }
1796 1851
1797 scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); 1852 if (has_sq)
1798 rcq = rcq ?: scq; 1853 scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
1854 if (!ind_tbl)
1855 rcq = rcq ?: scq;
1799 pd = idr_read_pd(cmd->pd_handle, file->ucontext); 1856 pd = idr_read_pd(cmd->pd_handle, file->ucontext);
1800 if (!pd || !scq) { 1857 if (!pd || (!scq && has_sq)) {
1801 ret = -EINVAL; 1858 ret = -EINVAL;
1802 goto err_put; 1859 goto err_put;
1803 } 1860 }
@@ -1864,16 +1921,20 @@ static int create_qp(struct ib_uverbs_file *file,
1864 qp->send_cq = attr.send_cq; 1921 qp->send_cq = attr.send_cq;
1865 qp->recv_cq = attr.recv_cq; 1922 qp->recv_cq = attr.recv_cq;
1866 qp->srq = attr.srq; 1923 qp->srq = attr.srq;
1924 qp->rwq_ind_tbl = ind_tbl;
1867 qp->event_handler = attr.event_handler; 1925 qp->event_handler = attr.event_handler;
1868 qp->qp_context = attr.qp_context; 1926 qp->qp_context = attr.qp_context;
1869 qp->qp_type = attr.qp_type; 1927 qp->qp_type = attr.qp_type;
1870 atomic_set(&qp->usecnt, 0); 1928 atomic_set(&qp->usecnt, 0);
1871 atomic_inc(&pd->usecnt); 1929 atomic_inc(&pd->usecnt);
1872 atomic_inc(&attr.send_cq->usecnt); 1930 if (attr.send_cq)
1931 atomic_inc(&attr.send_cq->usecnt);
1873 if (attr.recv_cq) 1932 if (attr.recv_cq)
1874 atomic_inc(&attr.recv_cq->usecnt); 1933 atomic_inc(&attr.recv_cq->usecnt);
1875 if (attr.srq) 1934 if (attr.srq)
1876 atomic_inc(&attr.srq->usecnt); 1935 atomic_inc(&attr.srq->usecnt);
1936 if (ind_tbl)
1937 atomic_inc(&ind_tbl->usecnt);
1877 } 1938 }
1878 qp->uobject = &obj->uevent.uobject; 1939 qp->uobject = &obj->uevent.uobject;
1879 1940
@@ -1913,6 +1974,8 @@ static int create_qp(struct ib_uverbs_file *file,
1913 put_cq_read(rcq); 1974 put_cq_read(rcq);
1914 if (srq) 1975 if (srq)
1915 put_srq_read(srq); 1976 put_srq_read(srq);
1977 if (ind_tbl)
1978 put_rwq_indirection_table_read(ind_tbl);
1916 1979
1917 mutex_lock(&file->mutex); 1980 mutex_lock(&file->mutex);
1918 list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); 1981 list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
@@ -1940,6 +2003,8 @@ err_put:
1940 put_cq_read(rcq); 2003 put_cq_read(rcq);
1941 if (srq) 2004 if (srq)
1942 put_srq_read(srq); 2005 put_srq_read(srq);
2006 if (ind_tbl)
2007 put_rwq_indirection_table_read(ind_tbl);
1943 2008
1944 put_uobj_write(&obj->uevent.uobject); 2009 put_uobj_write(&obj->uevent.uobject);
1945 return ret; 2010 return ret;
@@ -2033,7 +2098,7 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
2033 if (err) 2098 if (err)
2034 return err; 2099 return err;
2035 2100
2036 if (cmd.comp_mask) 2101 if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK)
2037 return -EINVAL; 2102 return -EINVAL;
2038 2103
2039 if (cmd.reserved) 2104 if (cmd.reserved)
@@ -3040,6 +3105,15 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
3040 memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask, 3105 memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask,
3041 sizeof(struct ib_flow_ipv4_filter)); 3106 sizeof(struct ib_flow_ipv4_filter));
3042 break; 3107 break;
3108 case IB_FLOW_SPEC_IPV6:
3109 ib_spec->ipv6.size = sizeof(struct ib_flow_spec_ipv6);
3110 if (ib_spec->ipv6.size != kern_spec->ipv6.size)
3111 return -EINVAL;
3112 memcpy(&ib_spec->ipv6.val, &kern_spec->ipv6.val,
3113 sizeof(struct ib_flow_ipv6_filter));
3114 memcpy(&ib_spec->ipv6.mask, &kern_spec->ipv6.mask,
3115 sizeof(struct ib_flow_ipv6_filter));
3116 break;
3043 case IB_FLOW_SPEC_TCP: 3117 case IB_FLOW_SPEC_TCP:
3044 case IB_FLOW_SPEC_UDP: 3118 case IB_FLOW_SPEC_UDP:
3045 ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp); 3119 ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp);
@@ -3056,6 +3130,445 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
3056 return 0; 3130 return 0;
3057} 3131}
3058 3132
3133int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
3134 struct ib_device *ib_dev,
3135 struct ib_udata *ucore,
3136 struct ib_udata *uhw)
3137{
3138 struct ib_uverbs_ex_create_wq cmd = {};
3139 struct ib_uverbs_ex_create_wq_resp resp = {};
3140 struct ib_uwq_object *obj;
3141 int err = 0;
3142 struct ib_cq *cq;
3143 struct ib_pd *pd;
3144 struct ib_wq *wq;
3145 struct ib_wq_init_attr wq_init_attr = {};
3146 size_t required_cmd_sz;
3147 size_t required_resp_len;
3148
3149 required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge);
3150 required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
3151
3152 if (ucore->inlen < required_cmd_sz)
3153 return -EINVAL;
3154
3155 if (ucore->outlen < required_resp_len)
3156 return -ENOSPC;
3157
3158 if (ucore->inlen > sizeof(cmd) &&
3159 !ib_is_udata_cleared(ucore, sizeof(cmd),
3160 ucore->inlen - sizeof(cmd)))
3161 return -EOPNOTSUPP;
3162
3163 err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
3164 if (err)
3165 return err;
3166
3167 if (cmd.comp_mask)
3168 return -EOPNOTSUPP;
3169
3170 obj = kmalloc(sizeof(*obj), GFP_KERNEL);
3171 if (!obj)
3172 return -ENOMEM;
3173
3174 init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext,
3175 &wq_lock_class);
3176 down_write(&obj->uevent.uobject.mutex);
3177 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
3178 if (!pd) {
3179 err = -EINVAL;
3180 goto err_uobj;
3181 }
3182
3183 cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
3184 if (!cq) {
3185 err = -EINVAL;
3186 goto err_put_pd;
3187 }
3188
3189 wq_init_attr.cq = cq;
3190 wq_init_attr.max_sge = cmd.max_sge;
3191 wq_init_attr.max_wr = cmd.max_wr;
3192 wq_init_attr.wq_context = file;
3193 wq_init_attr.wq_type = cmd.wq_type;
3194 wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
3195 obj->uevent.events_reported = 0;
3196 INIT_LIST_HEAD(&obj->uevent.event_list);
3197 wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
3198 if (IS_ERR(wq)) {
3199 err = PTR_ERR(wq);
3200 goto err_put_cq;
3201 }
3202
3203 wq->uobject = &obj->uevent.uobject;
3204 obj->uevent.uobject.object = wq;
3205 wq->wq_type = wq_init_attr.wq_type;
3206 wq->cq = cq;
3207 wq->pd = pd;
3208 wq->device = pd->device;
3209 wq->wq_context = wq_init_attr.wq_context;
3210 atomic_set(&wq->usecnt, 0);
3211 atomic_inc(&pd->usecnt);
3212 atomic_inc(&cq->usecnt);
3213 wq->uobject = &obj->uevent.uobject;
3214 obj->uevent.uobject.object = wq;
3215 err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
3216 if (err)
3217 goto destroy_wq;
3218
3219 memset(&resp, 0, sizeof(resp));
3220 resp.wq_handle = obj->uevent.uobject.id;
3221 resp.max_sge = wq_init_attr.max_sge;
3222 resp.max_wr = wq_init_attr.max_wr;
3223 resp.wqn = wq->wq_num;
3224 resp.response_length = required_resp_len;
3225 err = ib_copy_to_udata(ucore,
3226 &resp, resp.response_length);
3227 if (err)
3228 goto err_copy;
3229
3230 put_pd_read(pd);
3231 put_cq_read(cq);
3232
3233 mutex_lock(&file->mutex);
3234 list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list);
3235 mutex_unlock(&file->mutex);
3236
3237 obj->uevent.uobject.live = 1;
3238 up_write(&obj->uevent.uobject.mutex);
3239 return 0;
3240
3241err_copy:
3242 idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
3243destroy_wq:
3244 ib_destroy_wq(wq);
3245err_put_cq:
3246 put_cq_read(cq);
3247err_put_pd:
3248 put_pd_read(pd);
3249err_uobj:
3250 put_uobj_write(&obj->uevent.uobject);
3251
3252 return err;
3253}
3254
3255int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
3256 struct ib_device *ib_dev,
3257 struct ib_udata *ucore,
3258 struct ib_udata *uhw)
3259{
3260 struct ib_uverbs_ex_destroy_wq cmd = {};
3261 struct ib_uverbs_ex_destroy_wq_resp resp = {};
3262 struct ib_wq *wq;
3263 struct ib_uobject *uobj;
3264 struct ib_uwq_object *obj;
3265 size_t required_cmd_sz;
3266 size_t required_resp_len;
3267 int ret;
3268
3269 required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle);
3270 required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
3271
3272 if (ucore->inlen < required_cmd_sz)
3273 return -EINVAL;
3274
3275 if (ucore->outlen < required_resp_len)
3276 return -ENOSPC;
3277
3278 if (ucore->inlen > sizeof(cmd) &&
3279 !ib_is_udata_cleared(ucore, sizeof(cmd),
3280 ucore->inlen - sizeof(cmd)))
3281 return -EOPNOTSUPP;
3282
3283 ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
3284 if (ret)
3285 return ret;
3286
3287 if (cmd.comp_mask)
3288 return -EOPNOTSUPP;
3289
3290 resp.response_length = required_resp_len;
3291 uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle,
3292 file->ucontext);
3293 if (!uobj)
3294 return -EINVAL;
3295
3296 wq = uobj->object;
3297 obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
3298 ret = ib_destroy_wq(wq);
3299 if (!ret)
3300 uobj->live = 0;
3301
3302 put_uobj_write(uobj);
3303 if (ret)
3304 return ret;
3305
3306 idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
3307
3308 mutex_lock(&file->mutex);
3309 list_del(&uobj->list);
3310 mutex_unlock(&file->mutex);
3311
3312 ib_uverbs_release_uevent(file, &obj->uevent);
3313 resp.events_reported = obj->uevent.events_reported;
3314 put_uobj(uobj);
3315
3316 ret = ib_copy_to_udata(ucore, &resp, resp.response_length);
3317 if (ret)
3318 return ret;
3319
3320 return 0;
3321}
3322
3323int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
3324 struct ib_device *ib_dev,
3325 struct ib_udata *ucore,
3326 struct ib_udata *uhw)
3327{
3328 struct ib_uverbs_ex_modify_wq cmd = {};
3329 struct ib_wq *wq;
3330 struct ib_wq_attr wq_attr = {};
3331 size_t required_cmd_sz;
3332 int ret;
3333
3334 required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state);
3335 if (ucore->inlen < required_cmd_sz)
3336 return -EINVAL;
3337
3338 if (ucore->inlen > sizeof(cmd) &&
3339 !ib_is_udata_cleared(ucore, sizeof(cmd),
3340 ucore->inlen - sizeof(cmd)))
3341 return -EOPNOTSUPP;
3342
3343 ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
3344 if (ret)
3345 return ret;
3346
3347 if (!cmd.attr_mask)
3348 return -EINVAL;
3349
3350 if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE))
3351 return -EINVAL;
3352
3353 wq = idr_read_wq(cmd.wq_handle, file->ucontext);
3354 if (!wq)
3355 return -EINVAL;
3356
3357 wq_attr.curr_wq_state = cmd.curr_wq_state;
3358 wq_attr.wq_state = cmd.wq_state;
3359 ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
3360 put_wq_read(wq);
3361 return ret;
3362}
3363
3364int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
3365 struct ib_device *ib_dev,
3366 struct ib_udata *ucore,
3367 struct ib_udata *uhw)
3368{
3369 struct ib_uverbs_ex_create_rwq_ind_table cmd = {};
3370 struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {};
3371 struct ib_uobject *uobj;
3372 int err = 0;
3373 struct ib_rwq_ind_table_init_attr init_attr = {};
3374 struct ib_rwq_ind_table *rwq_ind_tbl;
3375 struct ib_wq **wqs = NULL;
3376 u32 *wqs_handles = NULL;
3377 struct ib_wq *wq = NULL;
3378 int i, j, num_read_wqs;
3379 u32 num_wq_handles;
3380 u32 expected_in_size;
3381 size_t required_cmd_sz_header;
3382 size_t required_resp_len;
3383
3384 required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size);
3385 required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
3386
3387 if (ucore->inlen < required_cmd_sz_header)
3388 return -EINVAL;
3389
3390 if (ucore->outlen < required_resp_len)
3391 return -ENOSPC;
3392
3393 err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header);
3394 if (err)
3395 return err;
3396
3397 ucore->inbuf += required_cmd_sz_header;
3398 ucore->inlen -= required_cmd_sz_header;
3399
3400 if (cmd.comp_mask)
3401 return -EOPNOTSUPP;
3402
3403 if (cmd.log_ind_tbl_size > IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE)
3404 return -EINVAL;
3405
3406 num_wq_handles = 1 << cmd.log_ind_tbl_size;
3407 expected_in_size = num_wq_handles * sizeof(__u32);
3408 if (num_wq_handles == 1)
3409 /* input size for wq handles is u64 aligned */
3410 expected_in_size += sizeof(__u32);
3411
3412 if (ucore->inlen < expected_in_size)
3413 return -EINVAL;
3414
3415 if (ucore->inlen > expected_in_size &&
3416 !ib_is_udata_cleared(ucore, expected_in_size,
3417 ucore->inlen - expected_in_size))
3418 return -EOPNOTSUPP;
3419
3420 wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles),
3421 GFP_KERNEL);
3422 if (!wqs_handles)
3423 return -ENOMEM;
3424
3425 err = ib_copy_from_udata(wqs_handles, ucore,
3426 num_wq_handles * sizeof(__u32));
3427 if (err)
3428 goto err_free;
3429
3430 wqs = kcalloc(num_wq_handles, sizeof(*wqs), GFP_KERNEL);
3431 if (!wqs) {
3432 err = -ENOMEM;
3433 goto err_free;
3434 }
3435
3436 for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
3437 num_read_wqs++) {
3438 wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext);
3439 if (!wq) {
3440 err = -EINVAL;
3441 goto put_wqs;
3442 }
3443
3444 wqs[num_read_wqs] = wq;
3445 }
3446
3447 uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
3448 if (!uobj) {
3449 err = -ENOMEM;
3450 goto put_wqs;
3451 }
3452
3453 init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class);
3454 down_write(&uobj->mutex);
3455 init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
3456 init_attr.ind_tbl = wqs;
3457 rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
3458
3459 if (IS_ERR(rwq_ind_tbl)) {
3460 err = PTR_ERR(rwq_ind_tbl);
3461 goto err_uobj;
3462 }
3463
3464 rwq_ind_tbl->ind_tbl = wqs;
3465 rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size;
3466 rwq_ind_tbl->uobject = uobj;
3467 uobj->object = rwq_ind_tbl;
3468 rwq_ind_tbl->device = ib_dev;
3469 atomic_set(&rwq_ind_tbl->usecnt, 0);
3470
3471 for (i = 0; i < num_wq_handles; i++)
3472 atomic_inc(&wqs[i]->usecnt);
3473
3474 err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
3475 if (err)
3476 goto destroy_ind_tbl;
3477
3478 resp.ind_tbl_handle = uobj->id;
3479 resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
3480 resp.response_length = required_resp_len;
3481
3482 err = ib_copy_to_udata(ucore,
3483 &resp, resp.response_length);
3484 if (err)
3485 goto err_copy;
3486
3487 kfree(wqs_handles);
3488
3489 for (j = 0; j < num_read_wqs; j++)
3490 put_wq_read(wqs[j]);
3491
3492 mutex_lock(&file->mutex);
3493 list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list);
3494 mutex_unlock(&file->mutex);
3495
3496 uobj->live = 1;
3497
3498 up_write(&uobj->mutex);
3499 return 0;
3500
3501err_copy:
3502 idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
3503destroy_ind_tbl:
3504 ib_destroy_rwq_ind_table(rwq_ind_tbl);
3505err_uobj:
3506 put_uobj_write(uobj);
3507put_wqs:
3508 for (j = 0; j < num_read_wqs; j++)
3509 put_wq_read(wqs[j]);
3510err_free:
3511 kfree(wqs_handles);
3512 kfree(wqs);
3513 return err;
3514}
3515
3516int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
3517 struct ib_device *ib_dev,
3518 struct ib_udata *ucore,
3519 struct ib_udata *uhw)
3520{
3521 struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
3522 struct ib_rwq_ind_table *rwq_ind_tbl;
3523 struct ib_uobject *uobj;
3524 int ret;
3525 struct ib_wq **ind_tbl;
3526 size_t required_cmd_sz;
3527
3528 required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle);
3529
3530 if (ucore->inlen < required_cmd_sz)
3531 return -EINVAL;
3532
3533 if (ucore->inlen > sizeof(cmd) &&
3534 !ib_is_udata_cleared(ucore, sizeof(cmd),
3535 ucore->inlen - sizeof(cmd)))
3536 return -EOPNOTSUPP;
3537
3538 ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
3539 if (ret)
3540 return ret;
3541
3542 if (cmd.comp_mask)
3543 return -EOPNOTSUPP;
3544
3545 uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle,
3546 file->ucontext);
3547 if (!uobj)
3548 return -EINVAL;
3549 rwq_ind_tbl = uobj->object;
3550 ind_tbl = rwq_ind_tbl->ind_tbl;
3551
3552 ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
3553 if (!ret)
3554 uobj->live = 0;
3555
3556 put_uobj_write(uobj);
3557
3558 if (ret)
3559 return ret;
3560
3561 idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
3562
3563 mutex_lock(&file->mutex);
3564 list_del(&uobj->list);
3565 mutex_unlock(&file->mutex);
3566
3567 put_uobj(uobj);
3568 kfree(ind_tbl);
3569 return ret;
3570}
3571
3059int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, 3572int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3060 struct ib_device *ib_dev, 3573 struct ib_device *ib_dev,
3061 struct ib_udata *ucore, 3574 struct ib_udata *ucore,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 31f422a70623..0012fa58c105 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -76,6 +76,8 @@ DEFINE_IDR(ib_uverbs_qp_idr);
76DEFINE_IDR(ib_uverbs_srq_idr); 76DEFINE_IDR(ib_uverbs_srq_idr);
77DEFINE_IDR(ib_uverbs_xrcd_idr); 77DEFINE_IDR(ib_uverbs_xrcd_idr);
78DEFINE_IDR(ib_uverbs_rule_idr); 78DEFINE_IDR(ib_uverbs_rule_idr);
79DEFINE_IDR(ib_uverbs_wq_idr);
80DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr);
79 81
80static DEFINE_SPINLOCK(map_lock); 82static DEFINE_SPINLOCK(map_lock);
81static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); 83static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -130,6 +132,11 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
130 [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, 132 [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
131 [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, 133 [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
132 [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, 134 [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp,
135 [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq,
136 [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq,
137 [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq,
138 [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
139 [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
133}; 140};
134 141
135static void ib_uverbs_add_one(struct ib_device *device); 142static void ib_uverbs_add_one(struct ib_device *device);
@@ -265,6 +272,27 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
265 kfree(uqp); 272 kfree(uqp);
266 } 273 }
267 274
275 list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) {
276 struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object;
277 struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
278
279 idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
280 ib_destroy_rwq_ind_table(rwq_ind_tbl);
281 kfree(ind_tbl);
282 kfree(uobj);
283 }
284
285 list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) {
286 struct ib_wq *wq = uobj->object;
287 struct ib_uwq_object *uwq =
288 container_of(uobj, struct ib_uwq_object, uevent.uobject);
289
290 idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
291 ib_destroy_wq(wq);
292 ib_uverbs_release_uevent(file, &uwq->uevent);
293 kfree(uwq);
294 }
295
268 list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { 296 list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
269 struct ib_srq *srq = uobj->object; 297 struct ib_srq *srq = uobj->object;
270 struct ib_uevent_object *uevent = 298 struct ib_uevent_object *uevent =
@@ -568,6 +596,16 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
568 &uobj->events_reported); 596 &uobj->events_reported);
569} 597}
570 598
599void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
600{
601 struct ib_uevent_object *uobj = container_of(event->element.wq->uobject,
602 struct ib_uevent_object, uobject);
603
604 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
605 event->event, &uobj->event_list,
606 &uobj->events_reported);
607}
608
571void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) 609void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
572{ 610{
573 struct ib_uevent_object *uobj; 611 struct ib_uevent_object *uobj;
@@ -931,6 +969,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
931 file->async_file = NULL; 969 file->async_file = NULL;
932 kref_init(&file->ref); 970 kref_init(&file->ref);
933 mutex_init(&file->mutex); 971 mutex_init(&file->mutex);
972 mutex_init(&file->cleanup_mutex);
934 973
935 filp->private_data = file; 974 filp->private_data = file;
936 kobject_get(&dev->kobj); 975 kobject_get(&dev->kobj);
@@ -956,18 +995,20 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
956{ 995{
957 struct ib_uverbs_file *file = filp->private_data; 996 struct ib_uverbs_file *file = filp->private_data;
958 struct ib_uverbs_device *dev = file->device; 997 struct ib_uverbs_device *dev = file->device;
959 struct ib_ucontext *ucontext = NULL; 998
999 mutex_lock(&file->cleanup_mutex);
1000 if (file->ucontext) {
1001 ib_uverbs_cleanup_ucontext(file, file->ucontext);
1002 file->ucontext = NULL;
1003 }
1004 mutex_unlock(&file->cleanup_mutex);
960 1005
961 mutex_lock(&file->device->lists_mutex); 1006 mutex_lock(&file->device->lists_mutex);
962 ucontext = file->ucontext;
963 file->ucontext = NULL;
964 if (!file->is_closed) { 1007 if (!file->is_closed) {
965 list_del(&file->list); 1008 list_del(&file->list);
966 file->is_closed = 1; 1009 file->is_closed = 1;
967 } 1010 }
968 mutex_unlock(&file->device->lists_mutex); 1011 mutex_unlock(&file->device->lists_mutex);
969 if (ucontext)
970 ib_uverbs_cleanup_ucontext(file, ucontext);
971 1012
972 if (file->async_file) 1013 if (file->async_file)
973 kref_put(&file->async_file->ref, ib_uverbs_release_event_file); 1014 kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
@@ -1181,22 +1222,30 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
1181 mutex_lock(&uverbs_dev->lists_mutex); 1222 mutex_lock(&uverbs_dev->lists_mutex);
1182 while (!list_empty(&uverbs_dev->uverbs_file_list)) { 1223 while (!list_empty(&uverbs_dev->uverbs_file_list)) {
1183 struct ib_ucontext *ucontext; 1224 struct ib_ucontext *ucontext;
1184
1185 file = list_first_entry(&uverbs_dev->uverbs_file_list, 1225 file = list_first_entry(&uverbs_dev->uverbs_file_list,
1186 struct ib_uverbs_file, list); 1226 struct ib_uverbs_file, list);
1187 file->is_closed = 1; 1227 file->is_closed = 1;
1188 ucontext = file->ucontext;
1189 list_del(&file->list); 1228 list_del(&file->list);
1190 file->ucontext = NULL;
1191 kref_get(&file->ref); 1229 kref_get(&file->ref);
1192 mutex_unlock(&uverbs_dev->lists_mutex); 1230 mutex_unlock(&uverbs_dev->lists_mutex);
1193 /* We must release the mutex before going ahead and calling 1231
1194 * disassociate_ucontext. disassociate_ucontext might end up
1195 * indirectly calling uverbs_close, for example due to freeing
1196 * the resources (e.g mmput).
1197 */
1198 ib_uverbs_event_handler(&file->event_handler, &event); 1232 ib_uverbs_event_handler(&file->event_handler, &event);
1233
1234 mutex_lock(&file->cleanup_mutex);
1235 ucontext = file->ucontext;
1236 file->ucontext = NULL;
1237 mutex_unlock(&file->cleanup_mutex);
1238
1239 /* At this point ib_uverbs_close cannot be running
1240 * ib_uverbs_cleanup_ucontext
1241 */
1199 if (ucontext) { 1242 if (ucontext) {
1243 /* We must release the mutex before going ahead and
1244 * calling disassociate_ucontext. disassociate_ucontext
1245 * might end up indirectly calling uverbs_close,
1246 * for example due to freeing the resources
1247 * (e.g mmput).
1248 */
1200 ib_dev->disassociate_ucontext(ucontext); 1249 ib_dev->disassociate_ucontext(ucontext);
1201 ib_uverbs_cleanup_ucontext(file, ucontext); 1250 ib_uverbs_cleanup_ucontext(file, ucontext);
1202 } 1251 }
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 6298f54b4137..2e813edcddab 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -758,6 +758,12 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
758 struct ib_qp *qp; 758 struct ib_qp *qp;
759 int ret; 759 int ret;
760 760
761 if (qp_init_attr->rwq_ind_tbl &&
762 (qp_init_attr->recv_cq ||
763 qp_init_attr->srq || qp_init_attr->cap.max_recv_wr ||
764 qp_init_attr->cap.max_recv_sge))
765 return ERR_PTR(-EINVAL);
766
761 /* 767 /*
762 * If the callers is using the RDMA API calculate the resources 768 * If the callers is using the RDMA API calculate the resources
763 * needed for the RDMA READ/WRITE operations. 769 * needed for the RDMA READ/WRITE operations.
@@ -775,6 +781,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
775 qp->real_qp = qp; 781 qp->real_qp = qp;
776 qp->uobject = NULL; 782 qp->uobject = NULL;
777 qp->qp_type = qp_init_attr->qp_type; 783 qp->qp_type = qp_init_attr->qp_type;
784 qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl;
778 785
779 atomic_set(&qp->usecnt, 0); 786 atomic_set(&qp->usecnt, 0);
780 qp->mrs_used = 0; 787 qp->mrs_used = 0;
@@ -792,7 +799,8 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
792 qp->srq = NULL; 799 qp->srq = NULL;
793 } else { 800 } else {
794 qp->recv_cq = qp_init_attr->recv_cq; 801 qp->recv_cq = qp_init_attr->recv_cq;
795 atomic_inc(&qp_init_attr->recv_cq->usecnt); 802 if (qp_init_attr->recv_cq)
803 atomic_inc(&qp_init_attr->recv_cq->usecnt);
796 qp->srq = qp_init_attr->srq; 804 qp->srq = qp_init_attr->srq;
797 if (qp->srq) 805 if (qp->srq)
798 atomic_inc(&qp_init_attr->srq->usecnt); 806 atomic_inc(&qp_init_attr->srq->usecnt);
@@ -803,7 +811,10 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
803 qp->xrcd = NULL; 811 qp->xrcd = NULL;
804 812
805 atomic_inc(&pd->usecnt); 813 atomic_inc(&pd->usecnt);
806 atomic_inc(&qp_init_attr->send_cq->usecnt); 814 if (qp_init_attr->send_cq)
815 atomic_inc(&qp_init_attr->send_cq->usecnt);
816 if (qp_init_attr->rwq_ind_tbl)
817 atomic_inc(&qp->rwq_ind_tbl->usecnt);
807 818
808 if (qp_init_attr->cap.max_rdma_ctxs) { 819 if (qp_init_attr->cap.max_rdma_ctxs) {
809 ret = rdma_rw_init_mrs(qp, qp_init_attr); 820 ret = rdma_rw_init_mrs(qp, qp_init_attr);
@@ -1283,6 +1294,7 @@ int ib_destroy_qp(struct ib_qp *qp)
1283 struct ib_pd *pd; 1294 struct ib_pd *pd;
1284 struct ib_cq *scq, *rcq; 1295 struct ib_cq *scq, *rcq;
1285 struct ib_srq *srq; 1296 struct ib_srq *srq;
1297 struct ib_rwq_ind_table *ind_tbl;
1286 int ret; 1298 int ret;
1287 1299
1288 WARN_ON_ONCE(qp->mrs_used > 0); 1300 WARN_ON_ONCE(qp->mrs_used > 0);
@@ -1297,6 +1309,7 @@ int ib_destroy_qp(struct ib_qp *qp)
1297 scq = qp->send_cq; 1309 scq = qp->send_cq;
1298 rcq = qp->recv_cq; 1310 rcq = qp->recv_cq;
1299 srq = qp->srq; 1311 srq = qp->srq;
1312 ind_tbl = qp->rwq_ind_tbl;
1300 1313
1301 if (!qp->uobject) 1314 if (!qp->uobject)
1302 rdma_rw_cleanup_mrs(qp); 1315 rdma_rw_cleanup_mrs(qp);
@@ -1311,6 +1324,8 @@ int ib_destroy_qp(struct ib_qp *qp)
1311 atomic_dec(&rcq->usecnt); 1324 atomic_dec(&rcq->usecnt);
1312 if (srq) 1325 if (srq)
1313 atomic_dec(&srq->usecnt); 1326 atomic_dec(&srq->usecnt);
1327 if (ind_tbl)
1328 atomic_dec(&ind_tbl->usecnt);
1314 } 1329 }
1315 1330
1316 return ret; 1331 return ret;
@@ -1558,6 +1573,150 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
1558} 1573}
1559EXPORT_SYMBOL(ib_dealloc_xrcd); 1574EXPORT_SYMBOL(ib_dealloc_xrcd);
1560 1575
1576/**
1577 * ib_create_wq - Creates a WQ associated with the specified protection
1578 * domain.
1579 * @pd: The protection domain associated with the WQ.
1580 * @wq_init_attr: A list of initial attributes required to create the
1581 * WQ. If WQ creation succeeds, then the attributes are updated to
1582 * the actual capabilities of the created WQ.
1583 *
1584 * wq_init_attr->max_wr and wq_init_attr->max_sge determine
1585 * the requested size of the WQ, and set to the actual values allocated
1586 * on return.
1587 * If ib_create_wq() succeeds, then max_wr and max_sge will always be
1588 * at least as large as the requested values.
1589 */
1590struct ib_wq *ib_create_wq(struct ib_pd *pd,
1591 struct ib_wq_init_attr *wq_attr)
1592{
1593 struct ib_wq *wq;
1594
1595 if (!pd->device->create_wq)
1596 return ERR_PTR(-ENOSYS);
1597
1598 wq = pd->device->create_wq(pd, wq_attr, NULL);
1599 if (!IS_ERR(wq)) {
1600 wq->event_handler = wq_attr->event_handler;
1601 wq->wq_context = wq_attr->wq_context;
1602 wq->wq_type = wq_attr->wq_type;
1603 wq->cq = wq_attr->cq;
1604 wq->device = pd->device;
1605 wq->pd = pd;
1606 wq->uobject = NULL;
1607 atomic_inc(&pd->usecnt);
1608 atomic_inc(&wq_attr->cq->usecnt);
1609 atomic_set(&wq->usecnt, 0);
1610 }
1611 return wq;
1612}
1613EXPORT_SYMBOL(ib_create_wq);
1614
1615/**
1616 * ib_destroy_wq - Destroys the specified WQ.
1617 * @wq: The WQ to destroy.
1618 */
1619int ib_destroy_wq(struct ib_wq *wq)
1620{
1621 int err;
1622 struct ib_cq *cq = wq->cq;
1623 struct ib_pd *pd = wq->pd;
1624
1625 if (atomic_read(&wq->usecnt))
1626 return -EBUSY;
1627
1628 err = wq->device->destroy_wq(wq);
1629 if (!err) {
1630 atomic_dec(&pd->usecnt);
1631 atomic_dec(&cq->usecnt);
1632 }
1633 return err;
1634}
1635EXPORT_SYMBOL(ib_destroy_wq);
1636
1637/**
1638 * ib_modify_wq - Modifies the specified WQ.
1639 * @wq: The WQ to modify.
1640 * @wq_attr: On input, specifies the WQ attributes to modify.
1641 * @wq_attr_mask: A bit-mask used to specify which attributes of the WQ
1642 * are being modified.
1643 * On output, the current values of selected WQ attributes are returned.
1644 */
1645int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
1646 u32 wq_attr_mask)
1647{
1648 int err;
1649
1650 if (!wq->device->modify_wq)
1651 return -ENOSYS;
1652
1653 err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL);
1654 return err;
1655}
1656EXPORT_SYMBOL(ib_modify_wq);
1657
1658/*
1659 * ib_create_rwq_ind_table - Creates a RQ Indirection Table.
1660 * @device: The device on which to create the rwq indirection table.
1661 * @ib_rwq_ind_table_init_attr: A list of initial attributes required to
1662 * create the Indirection Table.
1663 *
1664 * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less
1665 * than the created ib_rwq_ind_table object and the caller is responsible
1666 * for its memory allocation/free.
1667 */
1668struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
1669 struct ib_rwq_ind_table_init_attr *init_attr)
1670{
1671 struct ib_rwq_ind_table *rwq_ind_table;
1672 int i;
1673 u32 table_size;
1674
1675 if (!device->create_rwq_ind_table)
1676 return ERR_PTR(-ENOSYS);
1677
1678 table_size = (1 << init_attr->log_ind_tbl_size);
1679 rwq_ind_table = device->create_rwq_ind_table(device,
1680 init_attr, NULL);
1681 if (IS_ERR(rwq_ind_table))
1682 return rwq_ind_table;
1683
1684 rwq_ind_table->ind_tbl = init_attr->ind_tbl;
1685 rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size;
1686 rwq_ind_table->device = device;
1687 rwq_ind_table->uobject = NULL;
1688 atomic_set(&rwq_ind_table->usecnt, 0);
1689
1690 for (i = 0; i < table_size; i++)
1691 atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt);
1692
1693 return rwq_ind_table;
1694}
1695EXPORT_SYMBOL(ib_create_rwq_ind_table);
1696
1697/*
1698 * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table.
1699 * @wq_ind_table: The Indirection Table to destroy.
1700*/
1701int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
1702{
1703 int err, i;
1704 u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size);
1705 struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl;
1706
1707 if (atomic_read(&rwq_ind_table->usecnt))
1708 return -EBUSY;
1709
1710 err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table);
1711 if (!err) {
1712 for (i = 0; i < table_size; i++)
1713 atomic_dec(&ind_tbl[i]->usecnt);
1714 }
1715
1716 return err;
1717}
1718EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
1719
1561struct ib_flow *ib_create_flow(struct ib_qp *qp, 1720struct ib_flow *ib_create_flow(struct ib_qp *qp,
1562 struct ib_flow_attr *flow_attr, 1721 struct ib_flow_attr *flow_attr,
1563 int domain) 1722 int domain)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 3e8431b5cad7..04bbf172abde 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1396,10 +1396,10 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1396 state_set(&child_ep->com, CONNECTING); 1396 state_set(&child_ep->com, CONNECTING);
1397 child_ep->com.tdev = tdev; 1397 child_ep->com.tdev = tdev;
1398 child_ep->com.cm_id = NULL; 1398 child_ep->com.cm_id = NULL;
1399 child_ep->com.local_addr.sin_family = PF_INET; 1399 child_ep->com.local_addr.sin_family = AF_INET;
1400 child_ep->com.local_addr.sin_port = req->local_port; 1400 child_ep->com.local_addr.sin_port = req->local_port;
1401 child_ep->com.local_addr.sin_addr.s_addr = req->local_ip; 1401 child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
1402 child_ep->com.remote_addr.sin_family = PF_INET; 1402 child_ep->com.remote_addr.sin_family = AF_INET;
1403 child_ep->com.remote_addr.sin_port = req->peer_port; 1403 child_ep->com.remote_addr.sin_port = req->peer_port;
1404 child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip; 1404 child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
1405 get_ep(&parent_ep->com); 1405 get_ep(&parent_ep->com);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index bb1a839d4d6d..3edb80644b53 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1183,18 +1183,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
1183 return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); 1183 return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
1184} 1184}
1185 1185
1186static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf)
1187{
1188 struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
1189 ibdev.dev);
1190 struct ethtool_drvinfo info;
1191 struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
1192
1193 PDBG("%s dev 0x%p\n", __func__, dev);
1194 lldev->ethtool_ops->get_drvinfo(lldev, &info);
1195 return sprintf(buf, "%s\n", info.fw_version);
1196}
1197
1198static ssize_t show_hca(struct device *dev, struct device_attribute *attr, 1186static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
1199 char *buf) 1187 char *buf)
1200{ 1188{
@@ -1334,13 +1322,11 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
1334} 1322}
1335 1323
1336static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 1324static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
1337static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
1338static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 1325static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
1339static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 1326static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
1340 1327
1341static struct device_attribute *iwch_class_attributes[] = { 1328static struct device_attribute *iwch_class_attributes[] = {
1342 &dev_attr_hw_rev, 1329 &dev_attr_hw_rev,
1343 &dev_attr_fw_ver,
1344 &dev_attr_hca_type, 1330 &dev_attr_hca_type,
1345 &dev_attr_board_id, 1331 &dev_attr_board_id,
1346}; 1332};
@@ -1362,6 +1348,18 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
1362 return 0; 1348 return 0;
1363} 1349}
1364 1350
1351static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str,
1352 size_t str_len)
1353{
1354 struct iwch_dev *iwch_dev = to_iwch_dev(ibdev);
1355 struct ethtool_drvinfo info;
1356 struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
1357
1358 PDBG("%s dev 0x%p\n", __func__, iwch_dev);
1359 lldev->ethtool_ops->get_drvinfo(lldev, &info);
1360 snprintf(str, str_len, "%s", info.fw_version);
1361}
1362
1365int iwch_register_device(struct iwch_dev *dev) 1363int iwch_register_device(struct iwch_dev *dev)
1366{ 1364{
1367 int ret; 1365 int ret;
@@ -1437,6 +1435,7 @@ int iwch_register_device(struct iwch_dev *dev)
1437 dev->ibdev.get_hw_stats = iwch_get_mib; 1435 dev->ibdev.get_hw_stats = iwch_get_mib;
1438 dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; 1436 dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
1439 dev->ibdev.get_port_immutable = iwch_port_immutable; 1437 dev->ibdev.get_port_immutable = iwch_port_immutable;
1438 dev->ibdev.get_dev_fw_str = get_dev_fw_ver_str;
1440 1439
1441 dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); 1440 dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
1442 if (!dev->ibdev.iwcm) 1441 if (!dev->ibdev.iwcm)
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index a3a67216bce6..3aca7f6171b4 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -294,6 +294,25 @@ static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
294 return; 294 return;
295} 295}
296 296
297static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size)
298{
299 struct sk_buff *skb;
300 unsigned int i;
301 size_t len;
302
303 len = roundup(sizeof(union cpl_wr_size), 16);
304 for (i = 0; i < size; i++) {
305 skb = alloc_skb(len, GFP_KERNEL);
306 if (!skb)
307 goto fail;
308 skb_queue_tail(ep_skb_list, skb);
309 }
310 return 0;
311fail:
312 skb_queue_purge(ep_skb_list);
313 return -ENOMEM;
314}
315
297static void *alloc_ep(int size, gfp_t gfp) 316static void *alloc_ep(int size, gfp_t gfp)
298{ 317{
299 struct c4iw_ep_common *epc; 318 struct c4iw_ep_common *epc;
@@ -384,6 +403,8 @@ void _c4iw_free_ep(struct kref *kref)
384 if (ep->mpa_skb) 403 if (ep->mpa_skb)
385 kfree_skb(ep->mpa_skb); 404 kfree_skb(ep->mpa_skb);
386 } 405 }
406 if (!skb_queue_empty(&ep->com.ep_skb_list))
407 skb_queue_purge(&ep->com.ep_skb_list);
387 kfree(ep); 408 kfree(ep);
388} 409}
389 410
@@ -620,25 +641,27 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb)
620 } 641 }
621} 642}
622 643
623static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) 644static int send_flowc(struct c4iw_ep *ep)
624{ 645{
625 unsigned int flowclen = 80;
626 struct fw_flowc_wr *flowc; 646 struct fw_flowc_wr *flowc;
647 struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
627 int i; 648 int i;
628 u16 vlan = ep->l2t->vlan; 649 u16 vlan = ep->l2t->vlan;
629 int nparams; 650 int nparams;
630 651
652 if (WARN_ON(!skb))
653 return -ENOMEM;
654
631 if (vlan == CPL_L2T_VLAN_NONE) 655 if (vlan == CPL_L2T_VLAN_NONE)
632 nparams = 8; 656 nparams = 8;
633 else 657 else
634 nparams = 9; 658 nparams = 9;
635 659
636 skb = get_skb(skb, flowclen, GFP_KERNEL); 660 flowc = (struct fw_flowc_wr *)__skb_put(skb, FLOWC_LEN);
637 flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen);
638 661
639 flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | 662 flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
640 FW_FLOWC_WR_NPARAMS_V(nparams)); 663 FW_FLOWC_WR_NPARAMS_V(nparams));
641 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen, 664 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(FLOWC_LEN,
642 16)) | FW_WR_FLOWID_V(ep->hwtid)); 665 16)) | FW_WR_FLOWID_V(ep->hwtid));
643 666
644 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 667 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
@@ -679,18 +702,16 @@ static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
679 return c4iw_ofld_send(&ep->com.dev->rdev, skb); 702 return c4iw_ofld_send(&ep->com.dev->rdev, skb);
680} 703}
681 704
682static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp) 705static int send_halfclose(struct c4iw_ep *ep)
683{ 706{
684 struct cpl_close_con_req *req; 707 struct cpl_close_con_req *req;
685 struct sk_buff *skb; 708 struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
686 int wrlen = roundup(sizeof *req, 16); 709 int wrlen = roundup(sizeof *req, 16);
687 710
688 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 711 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
689 skb = get_skb(NULL, wrlen, gfp); 712 if (WARN_ON(!skb))
690 if (!skb) {
691 printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
692 return -ENOMEM; 713 return -ENOMEM;
693 } 714
694 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 715 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
695 t4_set_arp_err_handler(skb, NULL, arp_failure_discard); 716 t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
696 req = (struct cpl_close_con_req *) skb_put(skb, wrlen); 717 req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
@@ -701,26 +722,24 @@ static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp)
701 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 722 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
702} 723}
703 724
704static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) 725static int send_abort(struct c4iw_ep *ep)
705{ 726{
706 struct cpl_abort_req *req; 727 struct cpl_abort_req *req;
707 int wrlen = roundup(sizeof *req, 16); 728 int wrlen = roundup(sizeof *req, 16);
729 struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
708 730
709 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 731 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
710 skb = get_skb(skb, wrlen, gfp); 732 if (WARN_ON(!req_skb))
711 if (!skb) {
712 printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
713 __func__);
714 return -ENOMEM; 733 return -ENOMEM;
715 } 734
716 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 735 set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx);
717 t4_set_arp_err_handler(skb, ep, abort_arp_failure); 736 t4_set_arp_err_handler(req_skb, ep, abort_arp_failure);
718 req = (struct cpl_abort_req *) skb_put(skb, wrlen); 737 req = (struct cpl_abort_req *)skb_put(req_skb, wrlen);
719 memset(req, 0, wrlen); 738 memset(req, 0, wrlen);
720 INIT_TP_WR(req, ep->hwtid); 739 INIT_TP_WR(req, ep->hwtid);
721 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); 740 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
722 req->cmd = CPL_ABORT_SEND_RST; 741 req->cmd = CPL_ABORT_SEND_RST;
723 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 742 return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
724} 743}
725 744
726static void best_mtu(const unsigned short *mtus, unsigned short mtu, 745static void best_mtu(const unsigned short *mtus, unsigned short mtu,
@@ -992,9 +1011,19 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
992 1011
993 mpa = (struct mpa_message *)(req + 1); 1012 mpa = (struct mpa_message *)(req + 1);
994 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); 1013 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
995 mpa->flags = (crc_enabled ? MPA_CRC : 0) | 1014
996 (markers_enabled ? MPA_MARKERS : 0) | 1015 mpa->flags = 0;
997 (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0); 1016 if (crc_enabled)
1017 mpa->flags |= MPA_CRC;
1018 if (markers_enabled) {
1019 mpa->flags |= MPA_MARKERS;
1020 ep->mpa_attr.recv_marker_enabled = 1;
1021 } else {
1022 ep->mpa_attr.recv_marker_enabled = 0;
1023 }
1024 if (mpa_rev_to_use == 2)
1025 mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1026
998 mpa->private_data_size = htons(ep->plen); 1027 mpa->private_data_size = htons(ep->plen);
999 mpa->revision = mpa_rev_to_use; 1028 mpa->revision = mpa_rev_to_use;
1000 if (mpa_rev_to_use == 1) { 1029 if (mpa_rev_to_use == 1) {
@@ -1169,8 +1198,11 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
1169 mpa = (struct mpa_message *)(req + 1); 1198 mpa = (struct mpa_message *)(req + 1);
1170 memset(mpa, 0, sizeof(*mpa)); 1199 memset(mpa, 0, sizeof(*mpa));
1171 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 1200 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1172 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | 1201 mpa->flags = 0;
1173 (markers_enabled ? MPA_MARKERS : 0); 1202 if (ep->mpa_attr.crc_enabled)
1203 mpa->flags |= MPA_CRC;
1204 if (ep->mpa_attr.recv_marker_enabled)
1205 mpa->flags |= MPA_MARKERS;
1174 mpa->revision = ep->mpa_attr.version; 1206 mpa->revision = ep->mpa_attr.version;
1175 mpa->private_data_size = htons(plen); 1207 mpa->private_data_size = htons(plen);
1176 1208
@@ -1248,7 +1280,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1248 set_bit(ACT_ESTAB, &ep->com.history); 1280 set_bit(ACT_ESTAB, &ep->com.history);
1249 1281
1250 /* start MPA negotiation */ 1282 /* start MPA negotiation */
1251 ret = send_flowc(ep, NULL); 1283 ret = send_flowc(ep);
1252 if (ret) 1284 if (ret)
1253 goto err; 1285 goto err;
1254 if (ep->retry_with_mpa_v1) 1286 if (ep->retry_with_mpa_v1)
@@ -1555,7 +1587,6 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1555 */ 1587 */
1556 __state_set(&ep->com, FPDU_MODE); 1588 __state_set(&ep->com, FPDU_MODE);
1557 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 1589 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1558 ep->mpa_attr.recv_marker_enabled = markers_enabled;
1559 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 1590 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1560 ep->mpa_attr.version = mpa->revision; 1591 ep->mpa_attr.version = mpa->revision;
1561 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1592 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
@@ -2004,12 +2035,17 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
2004} 2035}
2005 2036
2006/* 2037/*
2007 * Return whether a failed active open has allocated a TID 2038 * Some of the error codes above implicitly indicate that there is no TID
2039 * allocated with the result of an ACT_OPEN. We use this predicate to make
2040 * that explicit.
2008 */ 2041 */
2009static inline int act_open_has_tid(int status) 2042static inline int act_open_has_tid(int status)
2010{ 2043{
2011 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && 2044 return (status != CPL_ERR_TCAM_PARITY &&
2012 status != CPL_ERR_ARP_MISS; 2045 status != CPL_ERR_TCAM_MISS &&
2046 status != CPL_ERR_TCAM_FULL &&
2047 status != CPL_ERR_CONN_EXIST_SYNRECV &&
2048 status != CPL_ERR_CONN_EXIST);
2013} 2049}
2014 2050
2015/* Returns whether a CPL status conveys negative advice. 2051/* Returns whether a CPL status conveys negative advice.
@@ -2130,6 +2166,7 @@ out:
2130static int c4iw_reconnect(struct c4iw_ep *ep) 2166static int c4iw_reconnect(struct c4iw_ep *ep)
2131{ 2167{
2132 int err = 0; 2168 int err = 0;
2169 int size = 0;
2133 struct sockaddr_in *laddr = (struct sockaddr_in *) 2170 struct sockaddr_in *laddr = (struct sockaddr_in *)
2134 &ep->com.cm_id->m_local_addr; 2171 &ep->com.cm_id->m_local_addr;
2135 struct sockaddr_in *raddr = (struct sockaddr_in *) 2172 struct sockaddr_in *raddr = (struct sockaddr_in *)
@@ -2145,6 +2182,21 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
2145 init_timer(&ep->timer); 2182 init_timer(&ep->timer);
2146 c4iw_init_wr_wait(&ep->com.wr_wait); 2183 c4iw_init_wr_wait(&ep->com.wr_wait);
2147 2184
2185 /* When MPA revision is different on nodes, the node with MPA_rev=2
2186 * tries to reconnect with MPA_rev 1 for the same EP through
2187 * c4iw_reconnect(), where the same EP is assigned with new tid for
2188 * further connection establishment. As we are using the same EP pointer
2189 * for reconnect, few skbs are used during the previous c4iw_connect(),
2190 * which leaves the EP with inadequate skbs for further
2191 * c4iw_reconnect(), Further causing an assert BUG_ON() due to empty
2192 * skb_list() during peer_abort(). Allocate skbs which is already used.
2193 */
2194 size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list));
2195 if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) {
2196 err = -ENOMEM;
2197 goto fail1;
2198 }
2199
2148 /* 2200 /*
2149 * Allocate an active TID to initiate a TCP connection. 2201 * Allocate an active TID to initiate a TCP connection.
2150 */ 2202 */
@@ -2210,6 +2262,7 @@ fail2:
2210 * response of 1st connect request. 2262 * response of 1st connect request.
2211 */ 2263 */
2212 connect_reply_upcall(ep, -ECONNRESET); 2264 connect_reply_upcall(ep, -ECONNRESET);
2265fail1:
2213 c4iw_put_ep(&ep->com); 2266 c4iw_put_ep(&ep->com);
2214out: 2267out:
2215 return err; 2268 return err;
@@ -2576,6 +2629,10 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2576 if (peer_mss && child_ep->mtu > (peer_mss + hdrs)) 2629 if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
2577 child_ep->mtu = peer_mss + hdrs; 2630 child_ep->mtu = peer_mss + hdrs;
2578 2631
2632 skb_queue_head_init(&child_ep->com.ep_skb_list);
2633 if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF))
2634 goto fail;
2635
2579 state_set(&child_ep->com, CONNECTING); 2636 state_set(&child_ep->com, CONNECTING);
2580 child_ep->com.dev = dev; 2637 child_ep->com.dev = dev;
2581 child_ep->com.cm_id = NULL; 2638 child_ep->com.cm_id = NULL;
@@ -2640,6 +2697,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2640 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 2697 (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2641 } 2698 }
2642 goto out; 2699 goto out;
2700fail:
2701 c4iw_put_ep(&child_ep->com);
2643reject: 2702reject:
2644 reject_cr(dev, hwtid, skb); 2703 reject_cr(dev, hwtid, skb);
2645 if (parent_ep) 2704 if (parent_ep)
@@ -2670,7 +2729,7 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
2670 ep->com.state = MPA_REQ_WAIT; 2729 ep->com.state = MPA_REQ_WAIT;
2671 start_ep_timer(ep); 2730 start_ep_timer(ep);
2672 set_bit(PASS_ESTAB, &ep->com.history); 2731 set_bit(PASS_ESTAB, &ep->com.history);
2673 ret = send_flowc(ep, skb); 2732 ret = send_flowc(ep);
2674 mutex_unlock(&ep->com.mutex); 2733 mutex_unlock(&ep->com.mutex);
2675 if (ret) 2734 if (ret)
2676 c4iw_ep_disconnect(ep, 1, GFP_KERNEL); 2735 c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
@@ -2871,10 +2930,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2871 } 2930 }
2872 mutex_unlock(&ep->com.mutex); 2931 mutex_unlock(&ep->com.mutex);
2873 2932
2874 rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); 2933 rpl_skb = skb_dequeue(&ep->com.ep_skb_list);
2875 if (!rpl_skb) { 2934 if (WARN_ON(!rpl_skb)) {
2876 printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
2877 __func__);
2878 release = 1; 2935 release = 1;
2879 goto out; 2936 goto out;
2880 } 2937 }
@@ -3011,9 +3068,9 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
3011 PDBG("%s last streaming msg ack ep %p tid %u state %u " 3068 PDBG("%s last streaming msg ack ep %p tid %u state %u "
3012 "initiator %u freeing skb\n", __func__, ep, ep->hwtid, 3069 "initiator %u freeing skb\n", __func__, ep, ep->hwtid,
3013 state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0); 3070 state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
3071 mutex_lock(&ep->com.mutex);
3014 kfree_skb(ep->mpa_skb); 3072 kfree_skb(ep->mpa_skb);
3015 ep->mpa_skb = NULL; 3073 ep->mpa_skb = NULL;
3016 mutex_lock(&ep->com.mutex);
3017 if (test_bit(STOP_MPA_TIMER, &ep->com.flags)) 3074 if (test_bit(STOP_MPA_TIMER, &ep->com.flags))
3018 stop_ep_timer(ep); 3075 stop_ep_timer(ep);
3019 mutex_unlock(&ep->com.mutex); 3076 mutex_unlock(&ep->com.mutex);
@@ -3025,9 +3082,9 @@ out:
3025 3082
3026int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) 3083int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
3027{ 3084{
3028 int err = 0; 3085 int abort;
3029 int disconnect = 0;
3030 struct c4iw_ep *ep = to_ep(cm_id); 3086 struct c4iw_ep *ep = to_ep(cm_id);
3087
3031 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 3088 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
3032 3089
3033 mutex_lock(&ep->com.mutex); 3090 mutex_lock(&ep->com.mutex);
@@ -3038,16 +3095,13 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
3038 } 3095 }
3039 set_bit(ULP_REJECT, &ep->com.history); 3096 set_bit(ULP_REJECT, &ep->com.history);
3040 if (mpa_rev == 0) 3097 if (mpa_rev == 0)
3041 disconnect = 2; 3098 abort = 1;
3042 else { 3099 else
3043 err = send_mpa_reject(ep, pdata, pdata_len); 3100 abort = send_mpa_reject(ep, pdata, pdata_len);
3044 disconnect = 1;
3045 }
3046 mutex_unlock(&ep->com.mutex); 3101 mutex_unlock(&ep->com.mutex);
3047 if (disconnect) { 3102
3048 stop_ep_timer(ep); 3103 stop_ep_timer(ep);
3049 err = c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL); 3104 c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
3050 }
3051 c4iw_put_ep(&ep->com); 3105 c4iw_put_ep(&ep->com);
3052 return 0; 3106 return 0;
3053} 3107}
@@ -3248,6 +3302,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3248 err = -ENOMEM; 3302 err = -ENOMEM;
3249 goto out; 3303 goto out;
3250 } 3304 }
3305
3306 skb_queue_head_init(&ep->com.ep_skb_list);
3307 if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) {
3308 err = -ENOMEM;
3309 goto fail1;
3310 }
3311
3251 init_timer(&ep->timer); 3312 init_timer(&ep->timer);
3252 ep->plen = conn_param->private_data_len; 3313 ep->plen = conn_param->private_data_len;
3253 if (ep->plen) 3314 if (ep->plen)
@@ -3266,7 +3327,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3266 if (!ep->com.qp) { 3327 if (!ep->com.qp) {
3267 PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn); 3328 PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
3268 err = -EINVAL; 3329 err = -EINVAL;
3269 goto fail1; 3330 goto fail2;
3270 } 3331 }
3271 ref_qp(ep); 3332 ref_qp(ep);
3272 PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, 3333 PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
@@ -3279,7 +3340,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3279 if (ep->atid == -1) { 3340 if (ep->atid == -1) {
3280 printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); 3341 printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
3281 err = -ENOMEM; 3342 err = -ENOMEM;
3282 goto fail1; 3343 goto fail2;
3283 } 3344 }
3284 insert_handle(dev, &dev->atid_idr, ep, ep->atid); 3345 insert_handle(dev, &dev->atid_idr, ep, ep->atid);
3285 3346
@@ -3303,7 +3364,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3303 if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) { 3364 if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
3304 err = pick_local_ipaddrs(dev, cm_id); 3365 err = pick_local_ipaddrs(dev, cm_id);
3305 if (err) 3366 if (err)
3306 goto fail1; 3367 goto fail2;
3307 } 3368 }
3308 3369
3309 /* find a route */ 3370 /* find a route */
@@ -3323,7 +3384,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3323 if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) { 3384 if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
3324 err = pick_local_ip6addrs(dev, cm_id); 3385 err = pick_local_ip6addrs(dev, cm_id);
3325 if (err) 3386 if (err)
3326 goto fail1; 3387 goto fail2;
3327 } 3388 }
3328 3389
3329 /* find a route */ 3390 /* find a route */
@@ -3339,14 +3400,14 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3339 if (!ep->dst) { 3400 if (!ep->dst) {
3340 printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); 3401 printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
3341 err = -EHOSTUNREACH; 3402 err = -EHOSTUNREACH;
3342 goto fail2; 3403 goto fail3;
3343 } 3404 }
3344 3405
3345 err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true, 3406 err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
3346 ep->com.dev->rdev.lldi.adapter_type, cm_id->tos); 3407 ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
3347 if (err) { 3408 if (err) {
3348 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); 3409 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
3349 goto fail3; 3410 goto fail4;
3350 } 3411 }
3351 3412
3352 PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", 3413 PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
@@ -3362,13 +3423,15 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3362 goto out; 3423 goto out;
3363 3424
3364 cxgb4_l2t_release(ep->l2t); 3425 cxgb4_l2t_release(ep->l2t);
3365fail3: 3426fail4:
3366 dst_release(ep->dst); 3427 dst_release(ep->dst);
3367fail2: 3428fail3:
3368 remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); 3429 remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
3369 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); 3430 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
3370fail1: 3431fail2:
3432 skb_queue_purge(&ep->com.ep_skb_list);
3371 deref_cm_id(&ep->com); 3433 deref_cm_id(&ep->com);
3434fail1:
3372 c4iw_put_ep(&ep->com); 3435 c4iw_put_ep(&ep->com);
3373out: 3436out:
3374 return err; 3437 return err;
@@ -3461,6 +3524,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3461 err = -ENOMEM; 3524 err = -ENOMEM;
3462 goto fail1; 3525 goto fail1;
3463 } 3526 }
3527 skb_queue_head_init(&ep->com.ep_skb_list);
3464 PDBG("%s ep %p\n", __func__, ep); 3528 PDBG("%s ep %p\n", __func__, ep);
3465 ep->com.cm_id = cm_id; 3529 ep->com.cm_id = cm_id;
3466 ref_cm_id(&ep->com); 3530 ref_cm_id(&ep->com);
@@ -3577,11 +3641,22 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
3577 case MPA_REQ_RCVD: 3641 case MPA_REQ_RCVD:
3578 case MPA_REP_SENT: 3642 case MPA_REP_SENT:
3579 case FPDU_MODE: 3643 case FPDU_MODE:
3644 case CONNECTING:
3580 close = 1; 3645 close = 1;
3581 if (abrupt) 3646 if (abrupt)
3582 ep->com.state = ABORTING; 3647 ep->com.state = ABORTING;
3583 else { 3648 else {
3584 ep->com.state = CLOSING; 3649 ep->com.state = CLOSING;
3650
3651 /*
3652 * if we close before we see the fw4_ack() then we fix
3653 * up the timer state since we're reusing it.
3654 */
3655 if (ep->mpa_skb &&
3656 test_bit(STOP_MPA_TIMER, &ep->com.flags)) {
3657 clear_bit(STOP_MPA_TIMER, &ep->com.flags);
3658 stop_ep_timer(ep);
3659 }
3585 start_ep_timer(ep); 3660 start_ep_timer(ep);
3586 } 3661 }
3587 set_bit(CLOSE_SENT, &ep->com.flags); 3662 set_bit(CLOSE_SENT, &ep->com.flags);
@@ -3611,10 +3686,10 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
3611 if (abrupt) { 3686 if (abrupt) {
3612 set_bit(EP_DISC_ABORT, &ep->com.history); 3687 set_bit(EP_DISC_ABORT, &ep->com.history);
3613 close_complete_upcall(ep, -ECONNRESET); 3688 close_complete_upcall(ep, -ECONNRESET);
3614 ret = send_abort(ep, NULL, gfp); 3689 ret = send_abort(ep);
3615 } else { 3690 } else {
3616 set_bit(EP_DISC_CLOSE, &ep->com.history); 3691 set_bit(EP_DISC_CLOSE, &ep->com.history);
3617 ret = send_halfclose(ep, gfp); 3692 ret = send_halfclose(ep);
3618 } 3693 }
3619 if (ret) { 3694 if (ret) {
3620 set_bit(EP_DISC_FAIL, &ep->com.history); 3695 set_bit(EP_DISC_FAIL, &ep->com.history);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index b0b955724458..812ab7278b8e 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -33,19 +33,15 @@
33#include "iw_cxgb4.h" 33#include "iw_cxgb4.h"
34 34
35static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, 35static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
36 struct c4iw_dev_ucontext *uctx) 36 struct c4iw_dev_ucontext *uctx, struct sk_buff *skb)
37{ 37{
38 struct fw_ri_res_wr *res_wr; 38 struct fw_ri_res_wr *res_wr;
39 struct fw_ri_res *res; 39 struct fw_ri_res *res;
40 int wr_len; 40 int wr_len;
41 struct c4iw_wr_wait wr_wait; 41 struct c4iw_wr_wait wr_wait;
42 struct sk_buff *skb;
43 int ret; 42 int ret;
44 43
45 wr_len = sizeof *res_wr + sizeof *res; 44 wr_len = sizeof *res_wr + sizeof *res;
46 skb = alloc_skb(wr_len, GFP_KERNEL);
47 if (!skb)
48 return -ENOMEM;
49 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); 45 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
50 46
51 res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); 47 res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
@@ -863,7 +859,9 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq)
863 ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context) 859 ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context)
864 : NULL; 860 : NULL;
865 destroy_cq(&chp->rhp->rdev, &chp->cq, 861 destroy_cq(&chp->rhp->rdev, &chp->cq,
866 ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx); 862 ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx,
863 chp->destroy_skb);
864 chp->destroy_skb = NULL;
867 kfree(chp); 865 kfree(chp);
868 return 0; 866 return 0;
869} 867}
@@ -879,7 +877,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
879 struct c4iw_cq *chp; 877 struct c4iw_cq *chp;
880 struct c4iw_create_cq_resp uresp; 878 struct c4iw_create_cq_resp uresp;
881 struct c4iw_ucontext *ucontext = NULL; 879 struct c4iw_ucontext *ucontext = NULL;
882 int ret; 880 int ret, wr_len;
883 size_t memsize, hwentries; 881 size_t memsize, hwentries;
884 struct c4iw_mm_entry *mm, *mm2; 882 struct c4iw_mm_entry *mm, *mm2;
885 883
@@ -896,6 +894,13 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
896 if (!chp) 894 if (!chp)
897 return ERR_PTR(-ENOMEM); 895 return ERR_PTR(-ENOMEM);
898 896
897 wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
898 chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
899 if (!chp->destroy_skb) {
900 ret = -ENOMEM;
901 goto err1;
902 }
903
899 if (ib_context) 904 if (ib_context)
900 ucontext = to_c4iw_ucontext(ib_context); 905 ucontext = to_c4iw_ucontext(ib_context);
901 906
@@ -936,7 +941,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
936 ret = create_cq(&rhp->rdev, &chp->cq, 941 ret = create_cq(&rhp->rdev, &chp->cq,
937 ucontext ? &ucontext->uctx : &rhp->rdev.uctx); 942 ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
938 if (ret) 943 if (ret)
939 goto err1; 944 goto err2;
940 945
941 chp->rhp = rhp; 946 chp->rhp = rhp;
942 chp->cq.size--; /* status page */ 947 chp->cq.size--; /* status page */
@@ -947,15 +952,15 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
947 init_waitqueue_head(&chp->wait); 952 init_waitqueue_head(&chp->wait);
948 ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); 953 ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
949 if (ret) 954 if (ret)
950 goto err2; 955 goto err3;
951 956
952 if (ucontext) { 957 if (ucontext) {
953 mm = kmalloc(sizeof *mm, GFP_KERNEL); 958 mm = kmalloc(sizeof *mm, GFP_KERNEL);
954 if (!mm) 959 if (!mm)
955 goto err3; 960 goto err4;
956 mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); 961 mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
957 if (!mm2) 962 if (!mm2)
958 goto err4; 963 goto err5;
959 964
960 uresp.qid_mask = rhp->rdev.cqmask; 965 uresp.qid_mask = rhp->rdev.cqmask;
961 uresp.cqid = chp->cq.cqid; 966 uresp.cqid = chp->cq.cqid;
@@ -970,7 +975,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
970 ret = ib_copy_to_udata(udata, &uresp, 975 ret = ib_copy_to_udata(udata, &uresp,
971 sizeof(uresp) - sizeof(uresp.reserved)); 976 sizeof(uresp) - sizeof(uresp.reserved));
972 if (ret) 977 if (ret)
973 goto err5; 978 goto err6;
974 979
975 mm->key = uresp.key; 980 mm->key = uresp.key;
976 mm->addr = virt_to_phys(chp->cq.queue); 981 mm->addr = virt_to_phys(chp->cq.queue);
@@ -986,15 +991,18 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
986 __func__, chp->cq.cqid, chp, chp->cq.size, 991 __func__, chp->cq.cqid, chp, chp->cq.size,
987 chp->cq.memsize, (unsigned long long) chp->cq.dma_addr); 992 chp->cq.memsize, (unsigned long long) chp->cq.dma_addr);
988 return &chp->ibcq; 993 return &chp->ibcq;
989err5: 994err6:
990 kfree(mm2); 995 kfree(mm2);
991err4: 996err5:
992 kfree(mm); 997 kfree(mm);
993err3: 998err4:
994 remove_handle(rhp, &rhp->cqidr, chp->cq.cqid); 999 remove_handle(rhp, &rhp->cqidr, chp->cq.cqid);
995err2: 1000err3:
996 destroy_cq(&chp->rhp->rdev, &chp->cq, 1001 destroy_cq(&chp->rhp->rdev, &chp->cq,
997 ucontext ? &ucontext->uctx : &rhp->rdev.uctx); 1002 ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
1003 chp->destroy_skb);
1004err2:
1005 kfree_skb(chp->destroy_skb);
998err1: 1006err1:
999 kfree(chp); 1007 kfree(chp);
1000 return ERR_PTR(ret); 1008 return ERR_PTR(ret);
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index ae2e8b23d2dd..071d7332ec06 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -317,7 +317,7 @@ static int qp_open(struct inode *inode, struct file *file)
317 idr_for_each(&qpd->devp->qpidr, count_idrs, &count); 317 idr_for_each(&qpd->devp->qpidr, count_idrs, &count);
318 spin_unlock_irq(&qpd->devp->lock); 318 spin_unlock_irq(&qpd->devp->lock);
319 319
320 qpd->bufsize = count * 128; 320 qpd->bufsize = count * 180;
321 qpd->buf = vmalloc(qpd->bufsize); 321 qpd->buf = vmalloc(qpd->bufsize);
322 if (!qpd->buf) { 322 if (!qpd->buf) {
323 kfree(qpd); 323 kfree(qpd);
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index f6f34a75af27..aa47e0ae80bc 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -384,6 +384,7 @@ struct c4iw_mr {
384 struct ib_mr ibmr; 384 struct ib_mr ibmr;
385 struct ib_umem *umem; 385 struct ib_umem *umem;
386 struct c4iw_dev *rhp; 386 struct c4iw_dev *rhp;
387 struct sk_buff *dereg_skb;
387 u64 kva; 388 u64 kva;
388 struct tpt_attributes attr; 389 struct tpt_attributes attr;
389 u64 *mpl; 390 u64 *mpl;
@@ -400,6 +401,7 @@ static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr)
400struct c4iw_mw { 401struct c4iw_mw {
401 struct ib_mw ibmw; 402 struct ib_mw ibmw;
402 struct c4iw_dev *rhp; 403 struct c4iw_dev *rhp;
404 struct sk_buff *dereg_skb;
403 u64 kva; 405 u64 kva;
404 struct tpt_attributes attr; 406 struct tpt_attributes attr;
405}; 407};
@@ -412,6 +414,7 @@ static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw)
412struct c4iw_cq { 414struct c4iw_cq {
413 struct ib_cq ibcq; 415 struct ib_cq ibcq;
414 struct c4iw_dev *rhp; 416 struct c4iw_dev *rhp;
417 struct sk_buff *destroy_skb;
415 struct t4_cq cq; 418 struct t4_cq cq;
416 spinlock_t lock; 419 spinlock_t lock;
417 spinlock_t comp_handler_lock; 420 spinlock_t comp_handler_lock;
@@ -472,7 +475,7 @@ struct c4iw_qp {
472 struct t4_wq wq; 475 struct t4_wq wq;
473 spinlock_t lock; 476 spinlock_t lock;
474 struct mutex mutex; 477 struct mutex mutex;
475 atomic_t refcnt; 478 struct kref kref;
476 wait_queue_head_t wait; 479 wait_queue_head_t wait;
477 struct timer_list timer; 480 struct timer_list timer;
478 int sq_sig_all; 481 int sq_sig_all;
@@ -789,10 +792,29 @@ enum c4iw_ep_history {
789 CM_ID_DEREFED = 28, 792 CM_ID_DEREFED = 28,
790}; 793};
791 794
795enum conn_pre_alloc_buffers {
796 CN_ABORT_REQ_BUF,
797 CN_ABORT_RPL_BUF,
798 CN_CLOSE_CON_REQ_BUF,
799 CN_DESTROY_BUF,
800 CN_FLOWC_BUF,
801 CN_MAX_CON_BUF
802};
803
804#define FLOWC_LEN 80
805union cpl_wr_size {
806 struct cpl_abort_req abrt_req;
807 struct cpl_abort_rpl abrt_rpl;
808 struct fw_ri_wr ri_req;
809 struct cpl_close_con_req close_req;
810 char flowc_buf[FLOWC_LEN];
811};
812
792struct c4iw_ep_common { 813struct c4iw_ep_common {
793 struct iw_cm_id *cm_id; 814 struct iw_cm_id *cm_id;
794 struct c4iw_qp *qp; 815 struct c4iw_qp *qp;
795 struct c4iw_dev *dev; 816 struct c4iw_dev *dev;
817 struct sk_buff_head ep_skb_list;
796 enum c4iw_ep_state state; 818 enum c4iw_ep_state state;
797 struct kref kref; 819 struct kref kref;
798 struct mutex mutex; 820 struct mutex mutex;
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 55d0651ee4de..0b91b0f4df71 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -59,9 +59,9 @@ static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length)
59} 59}
60 60
61static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, 61static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
62 u32 len, dma_addr_t data, int wait) 62 u32 len, dma_addr_t data,
63 int wait, struct sk_buff *skb)
63{ 64{
64 struct sk_buff *skb;
65 struct ulp_mem_io *req; 65 struct ulp_mem_io *req;
66 struct ulptx_sgl *sgl; 66 struct ulptx_sgl *sgl;
67 u8 wr_len; 67 u8 wr_len;
@@ -74,9 +74,11 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
74 c4iw_init_wr_wait(&wr_wait); 74 c4iw_init_wr_wait(&wr_wait);
75 wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16); 75 wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16);
76 76
77 skb = alloc_skb(wr_len, GFP_KERNEL); 77 if (!skb) {
78 if (!skb) 78 skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
79 return -ENOMEM; 79 if (!skb)
80 return -ENOMEM;
81 }
80 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); 82 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
81 83
82 req = (struct ulp_mem_io *)__skb_put(skb, wr_len); 84 req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
@@ -108,9 +110,8 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
108} 110}
109 111
110static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, 112static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
111 void *data) 113 void *data, struct sk_buff *skb)
112{ 114{
113 struct sk_buff *skb;
114 struct ulp_mem_io *req; 115 struct ulp_mem_io *req;
115 struct ulptx_idata *sc; 116 struct ulptx_idata *sc;
116 u8 wr_len, *to_dp, *from_dp; 117 u8 wr_len, *to_dp, *from_dp;
@@ -134,9 +135,11 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
134 wr_len = roundup(sizeof *req + sizeof *sc + 135 wr_len = roundup(sizeof *req + sizeof *sc +
135 roundup(copy_len, T4_ULPTX_MIN_IO), 16); 136 roundup(copy_len, T4_ULPTX_MIN_IO), 16);
136 137
137 skb = alloc_skb(wr_len, GFP_KERNEL); 138 if (!skb) {
138 if (!skb) 139 skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
139 return -ENOMEM; 140 if (!skb)
141 return -ENOMEM;
142 }
140 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); 143 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
141 144
142 req = (struct ulp_mem_io *)__skb_put(skb, wr_len); 145 req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
@@ -173,6 +176,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
173 memset(to_dp + copy_len, 0, T4_ULPTX_MIN_IO - 176 memset(to_dp + copy_len, 0, T4_ULPTX_MIN_IO -
174 (copy_len % T4_ULPTX_MIN_IO)); 177 (copy_len % T4_ULPTX_MIN_IO));
175 ret = c4iw_ofld_send(rdev, skb); 178 ret = c4iw_ofld_send(rdev, skb);
179 skb = NULL;
176 if (ret) 180 if (ret)
177 return ret; 181 return ret;
178 len -= C4IW_MAX_INLINE_SIZE; 182 len -= C4IW_MAX_INLINE_SIZE;
@@ -182,7 +186,8 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
182 return ret; 186 return ret;
183} 187}
184 188
185static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data) 189static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len,
190 void *data, struct sk_buff *skb)
186{ 191{
187 u32 remain = len; 192 u32 remain = len;
188 u32 dmalen; 193 u32 dmalen;
@@ -205,7 +210,7 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *
205 dmalen = T4_ULPTX_MAX_DMA; 210 dmalen = T4_ULPTX_MAX_DMA;
206 remain -= dmalen; 211 remain -= dmalen;
207 ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr, 212 ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr,
208 !remain); 213 !remain, skb);
209 if (ret) 214 if (ret)
210 goto out; 215 goto out;
211 addr += dmalen >> 5; 216 addr += dmalen >> 5;
@@ -213,7 +218,7 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *
213 daddr += dmalen; 218 daddr += dmalen;
214 } 219 }
215 if (remain) 220 if (remain)
216 ret = _c4iw_write_mem_inline(rdev, addr, remain, data); 221 ret = _c4iw_write_mem_inline(rdev, addr, remain, data, skb);
217out: 222out:
218 dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE); 223 dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE);
219 return ret; 224 return ret;
@@ -224,23 +229,25 @@ out:
224 * If data is NULL, clear len byte of memory to zero. 229 * If data is NULL, clear len byte of memory to zero.
225 */ 230 */
226static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, 231static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len,
227 void *data) 232 void *data, struct sk_buff *skb)
228{ 233{
229 if (is_t5(rdev->lldi.adapter_type) && use_dsgl) { 234 if (is_t5(rdev->lldi.adapter_type) && use_dsgl) {
230 if (len > inline_threshold) { 235 if (len > inline_threshold) {
231 if (_c4iw_write_mem_dma(rdev, addr, len, data)) { 236 if (_c4iw_write_mem_dma(rdev, addr, len, data, skb)) {
232 printk_ratelimited(KERN_WARNING 237 printk_ratelimited(KERN_WARNING
233 "%s: dma map" 238 "%s: dma map"
234 " failure (non fatal)\n", 239 " failure (non fatal)\n",
235 pci_name(rdev->lldi.pdev)); 240 pci_name(rdev->lldi.pdev));
236 return _c4iw_write_mem_inline(rdev, addr, len, 241 return _c4iw_write_mem_inline(rdev, addr, len,
237 data); 242 data, skb);
238 } else 243 } else {
239 return 0; 244 return 0;
245 }
240 } else 246 } else
241 return _c4iw_write_mem_inline(rdev, addr, len, data); 247 return _c4iw_write_mem_inline(rdev, addr,
248 len, data, skb);
242 } else 249 } else
243 return _c4iw_write_mem_inline(rdev, addr, len, data); 250 return _c4iw_write_mem_inline(rdev, addr, len, data, skb);
244} 251}
245 252
246/* 253/*
@@ -253,7 +260,8 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
253 u32 *stag, u8 stag_state, u32 pdid, 260 u32 *stag, u8 stag_state, u32 pdid,
254 enum fw_ri_stag_type type, enum fw_ri_mem_perms perm, 261 enum fw_ri_stag_type type, enum fw_ri_mem_perms perm,
255 int bind_enabled, u32 zbva, u64 to, 262 int bind_enabled, u32 zbva, u64 to,
256 u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr) 263 u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr,
264 struct sk_buff *skb)
257{ 265{
258 int err; 266 int err;
259 struct fw_ri_tpte tpt; 267 struct fw_ri_tpte tpt;
@@ -307,7 +315,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
307 } 315 }
308 err = write_adapter_mem(rdev, stag_idx + 316 err = write_adapter_mem(rdev, stag_idx +
309 (rdev->lldi.vr->stag.start >> 5), 317 (rdev->lldi.vr->stag.start >> 5),
310 sizeof(tpt), &tpt); 318 sizeof(tpt), &tpt, skb);
311 319
312 if (reset_tpt_entry) { 320 if (reset_tpt_entry) {
313 c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); 321 c4iw_put_resource(&rdev->resource.tpt_table, stag_idx);
@@ -327,28 +335,29 @@ static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl,
327 __func__, pbl_addr, rdev->lldi.vr->pbl.start, 335 __func__, pbl_addr, rdev->lldi.vr->pbl.start,
328 pbl_size); 336 pbl_size);
329 337
330 err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl); 338 err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL);
331 return err; 339 return err;
332} 340}
333 341
334static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size, 342static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size,
335 u32 pbl_addr) 343 u32 pbl_addr, struct sk_buff *skb)
336{ 344{
337 return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 345 return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0,
338 pbl_size, pbl_addr); 346 pbl_size, pbl_addr, skb);
339} 347}
340 348
341static int allocate_window(struct c4iw_rdev *rdev, u32 * stag, u32 pdid) 349static int allocate_window(struct c4iw_rdev *rdev, u32 * stag, u32 pdid)
342{ 350{
343 *stag = T4_STAG_UNSET; 351 *stag = T4_STAG_UNSET;
344 return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0, 352 return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0,
345 0UL, 0, 0, 0, 0); 353 0UL, 0, 0, 0, 0, NULL);
346} 354}
347 355
348static int deallocate_window(struct c4iw_rdev *rdev, u32 stag) 356static int deallocate_window(struct c4iw_rdev *rdev, u32 stag,
357 struct sk_buff *skb)
349{ 358{
350 return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0, 359 return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0,
351 0); 360 0, skb);
352} 361}
353 362
354static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, 363static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid,
@@ -356,7 +365,7 @@ static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid,
356{ 365{
357 *stag = T4_STAG_UNSET; 366 *stag = T4_STAG_UNSET;
358 return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_NSMR, 0, 0, 0, 367 return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_NSMR, 0, 0, 0,
359 0UL, 0, 0, pbl_size, pbl_addr); 368 0UL, 0, 0, pbl_size, pbl_addr, NULL);
360} 369}
361 370
362static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) 371static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
@@ -383,14 +392,16 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
383 mhp->attr.mw_bind_enable, mhp->attr.zbva, 392 mhp->attr.mw_bind_enable, mhp->attr.zbva,
384 mhp->attr.va_fbo, mhp->attr.len ? 393 mhp->attr.va_fbo, mhp->attr.len ?
385 mhp->attr.len : -1, shift - 12, 394 mhp->attr.len : -1, shift - 12,
386 mhp->attr.pbl_size, mhp->attr.pbl_addr); 395 mhp->attr.pbl_size, mhp->attr.pbl_addr, NULL);
387 if (ret) 396 if (ret)
388 return ret; 397 return ret;
389 398
390 ret = finish_mem_reg(mhp, stag); 399 ret = finish_mem_reg(mhp, stag);
391 if (ret) 400 if (ret) {
392 dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, 401 dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
393 mhp->attr.pbl_addr); 402 mhp->attr.pbl_addr, mhp->dereg_skb);
403 mhp->dereg_skb = NULL;
404 }
394 return ret; 405 return ret;
395} 406}
396 407
@@ -423,6 +434,12 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
423 if (!mhp) 434 if (!mhp)
424 return ERR_PTR(-ENOMEM); 435 return ERR_PTR(-ENOMEM);
425 436
437 mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
438 if (!mhp->dereg_skb) {
439 ret = -ENOMEM;
440 goto err0;
441 }
442
426 mhp->rhp = rhp; 443 mhp->rhp = rhp;
427 mhp->attr.pdid = php->pdid; 444 mhp->attr.pdid = php->pdid;
428 mhp->attr.perms = c4iw_ib_to_tpt_access(acc); 445 mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
@@ -435,7 +452,8 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
435 452
436 ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid, 453 ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid,
437 FW_RI_STAG_NSMR, mhp->attr.perms, 454 FW_RI_STAG_NSMR, mhp->attr.perms,
438 mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0); 455 mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0,
456 NULL);
439 if (ret) 457 if (ret)
440 goto err1; 458 goto err1;
441 459
@@ -445,8 +463,10 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
445 return &mhp->ibmr; 463 return &mhp->ibmr;
446err2: 464err2:
447 dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, 465 dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
448 mhp->attr.pbl_addr); 466 mhp->attr.pbl_addr, mhp->dereg_skb);
449err1: 467err1:
468 kfree_skb(mhp->dereg_skb);
469err0:
450 kfree(mhp); 470 kfree(mhp);
451 return ERR_PTR(ret); 471 return ERR_PTR(ret);
452} 472}
@@ -481,11 +501,18 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
481 if (!mhp) 501 if (!mhp)
482 return ERR_PTR(-ENOMEM); 502 return ERR_PTR(-ENOMEM);
483 503
504 mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
505 if (!mhp->dereg_skb) {
506 kfree(mhp);
507 return ERR_PTR(-ENOMEM);
508 }
509
484 mhp->rhp = rhp; 510 mhp->rhp = rhp;
485 511
486 mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); 512 mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
487 if (IS_ERR(mhp->umem)) { 513 if (IS_ERR(mhp->umem)) {
488 err = PTR_ERR(mhp->umem); 514 err = PTR_ERR(mhp->umem);
515 kfree_skb(mhp->dereg_skb);
489 kfree(mhp); 516 kfree(mhp);
490 return ERR_PTR(err); 517 return ERR_PTR(err);
491 } 518 }
@@ -550,6 +577,7 @@ err_pbl:
550 577
551err: 578err:
552 ib_umem_release(mhp->umem); 579 ib_umem_release(mhp->umem);
580 kfree_skb(mhp->dereg_skb);
553 kfree(mhp); 581 kfree(mhp);
554 return ERR_PTR(err); 582 return ERR_PTR(err);
555} 583}
@@ -572,11 +600,16 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
572 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); 600 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
573 if (!mhp) 601 if (!mhp)
574 return ERR_PTR(-ENOMEM); 602 return ERR_PTR(-ENOMEM);
575 ret = allocate_window(&rhp->rdev, &stag, php->pdid); 603
576 if (ret) { 604 mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
577 kfree(mhp); 605 if (!mhp->dereg_skb) {
578 return ERR_PTR(ret); 606 ret = -ENOMEM;
607 goto free_mhp;
579 } 608 }
609
610 ret = allocate_window(&rhp->rdev, &stag, php->pdid);
611 if (ret)
612 goto free_skb;
580 mhp->rhp = rhp; 613 mhp->rhp = rhp;
581 mhp->attr.pdid = php->pdid; 614 mhp->attr.pdid = php->pdid;
582 mhp->attr.type = FW_RI_STAG_MW; 615 mhp->attr.type = FW_RI_STAG_MW;
@@ -584,12 +617,19 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
584 mmid = (stag) >> 8; 617 mmid = (stag) >> 8;
585 mhp->ibmw.rkey = stag; 618 mhp->ibmw.rkey = stag;
586 if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { 619 if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
587 deallocate_window(&rhp->rdev, mhp->attr.stag); 620 ret = -ENOMEM;
588 kfree(mhp); 621 goto dealloc_win;
589 return ERR_PTR(-ENOMEM);
590 } 622 }
591 PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); 623 PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
592 return &(mhp->ibmw); 624 return &(mhp->ibmw);
625
626dealloc_win:
627 deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb);
628free_skb:
629 kfree_skb(mhp->dereg_skb);
630free_mhp:
631 kfree(mhp);
632 return ERR_PTR(ret);
593} 633}
594 634
595int c4iw_dealloc_mw(struct ib_mw *mw) 635int c4iw_dealloc_mw(struct ib_mw *mw)
@@ -602,7 +642,8 @@ int c4iw_dealloc_mw(struct ib_mw *mw)
602 rhp = mhp->rhp; 642 rhp = mhp->rhp;
603 mmid = (mw->rkey) >> 8; 643 mmid = (mw->rkey) >> 8;
604 remove_handle(rhp, &rhp->mmidr, mmid); 644 remove_handle(rhp, &rhp->mmidr, mmid);
605 deallocate_window(&rhp->rdev, mhp->attr.stag); 645 deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb);
646 kfree_skb(mhp->dereg_skb);
606 kfree(mhp); 647 kfree(mhp);
607 PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); 648 PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
608 return 0; 649 return 0;
@@ -666,7 +707,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
666 return &(mhp->ibmr); 707 return &(mhp->ibmr);
667err3: 708err3:
668 dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, 709 dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
669 mhp->attr.pbl_addr); 710 mhp->attr.pbl_addr, mhp->dereg_skb);
670err2: 711err2:
671 c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, 712 c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
672 mhp->attr.pbl_size << 3); 713 mhp->attr.pbl_size << 3);
@@ -717,7 +758,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
717 dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, 758 dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
718 mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); 759 mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
719 dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, 760 dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
720 mhp->attr.pbl_addr); 761 mhp->attr.pbl_addr, mhp->dereg_skb);
721 if (mhp->attr.pbl_size) 762 if (mhp->attr.pbl_size)
722 c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, 763 c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
723 mhp->attr.pbl_size << 3); 764 mhp->attr.pbl_size << 3);
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index dd8a86b726d2..df127ce6b6ec 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -409,20 +409,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
409 CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); 409 CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
410} 410}
411 411
412static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
413 char *buf)
414{
415 struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
416 ibdev.dev);
417 PDBG("%s dev 0x%p\n", __func__, dev);
418
419 return sprintf(buf, "%u.%u.%u.%u\n",
420 FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers),
421 FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers),
422 FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers),
423 FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers));
424}
425
426static ssize_t show_hca(struct device *dev, struct device_attribute *attr, 412static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
427 char *buf) 413 char *buf)
428{ 414{
@@ -502,13 +488,11 @@ static int c4iw_get_mib(struct ib_device *ibdev,
502} 488}
503 489
504static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 490static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
505static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
506static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 491static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
507static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 492static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
508 493
509static struct device_attribute *c4iw_class_attributes[] = { 494static struct device_attribute *c4iw_class_attributes[] = {
510 &dev_attr_hw_rev, 495 &dev_attr_hw_rev,
511 &dev_attr_fw_ver,
512 &dev_attr_hca_type, 496 &dev_attr_hca_type,
513 &dev_attr_board_id, 497 &dev_attr_board_id,
514}; 498};
@@ -530,6 +514,20 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
530 return 0; 514 return 0;
531} 515}
532 516
517static void get_dev_fw_str(struct ib_device *dev, char *str,
518 size_t str_len)
519{
520 struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
521 ibdev);
522 PDBG("%s dev 0x%p\n", __func__, dev);
523
524 snprintf(str, str_len, "%u.%u.%u.%u",
525 FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers),
526 FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers),
527 FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers),
528 FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers));
529}
530
533int c4iw_register_device(struct c4iw_dev *dev) 531int c4iw_register_device(struct c4iw_dev *dev)
534{ 532{
535 int ret; 533 int ret;
@@ -605,6 +603,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
605 dev->ibdev.get_hw_stats = c4iw_get_mib; 603 dev->ibdev.get_hw_stats = c4iw_get_mib;
606 dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; 604 dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
607 dev->ibdev.get_port_immutable = c4iw_port_immutable; 605 dev->ibdev.get_port_immutable = c4iw_port_immutable;
606 dev->ibdev.get_dev_fw_str = get_dev_fw_str;
608 dev->ibdev.drain_sq = c4iw_drain_sq; 607 dev->ibdev.drain_sq = c4iw_drain_sq;
609 dev->ibdev.drain_rq = c4iw_drain_rq; 608 dev->ibdev.drain_rq = c4iw_drain_rq;
610 609
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index e8993e49b8b3..edb1172b6f54 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -683,17 +683,25 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr,
683 return 0; 683 return 0;
684} 684}
685 685
686void _free_qp(struct kref *kref)
687{
688 struct c4iw_qp *qhp;
689
690 qhp = container_of(kref, struct c4iw_qp, kref);
691 PDBG("%s qhp %p\n", __func__, qhp);
692 kfree(qhp);
693}
694
686void c4iw_qp_add_ref(struct ib_qp *qp) 695void c4iw_qp_add_ref(struct ib_qp *qp)
687{ 696{
688 PDBG("%s ib_qp %p\n", __func__, qp); 697 PDBG("%s ib_qp %p\n", __func__, qp);
689 atomic_inc(&(to_c4iw_qp(qp)->refcnt)); 698 kref_get(&to_c4iw_qp(qp)->kref);
690} 699}
691 700
692void c4iw_qp_rem_ref(struct ib_qp *qp) 701void c4iw_qp_rem_ref(struct ib_qp *qp)
693{ 702{
694 PDBG("%s ib_qp %p\n", __func__, qp); 703 PDBG("%s ib_qp %p\n", __func__, qp);
695 if (atomic_dec_and_test(&(to_c4iw_qp(qp)->refcnt))) 704 kref_put(&to_c4iw_qp(qp)->kref, _free_qp);
696 wake_up(&(to_c4iw_qp(qp)->wait));
697} 705}
698 706
699static void add_to_fc_list(struct list_head *head, struct list_head *entry) 707static void add_to_fc_list(struct list_head *head, struct list_head *entry)
@@ -1081,9 +1089,10 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
1081 PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, 1089 PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
1082 qhp->ep->hwtid); 1090 qhp->ep->hwtid);
1083 1091
1084 skb = alloc_skb(sizeof *wqe, gfp); 1092 skb = skb_dequeue(&qhp->ep->com.ep_skb_list);
1085 if (!skb) 1093 if (WARN_ON(!skb))
1086 return; 1094 return;
1095
1087 set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); 1096 set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
1088 1097
1089 wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); 1098 wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
@@ -1202,9 +1211,10 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1202 PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, 1211 PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
1203 ep->hwtid); 1212 ep->hwtid);
1204 1213
1205 skb = alloc_skb(sizeof *wqe, GFP_KERNEL); 1214 skb = skb_dequeue(&ep->com.ep_skb_list);
1206 if (!skb) 1215 if (WARN_ON(!skb))
1207 return -ENOMEM; 1216 return -ENOMEM;
1217
1208 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 1218 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1209 1219
1210 wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); 1220 wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
@@ -1592,8 +1602,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
1592 wait_event(qhp->wait, !qhp->ep); 1602 wait_event(qhp->wait, !qhp->ep);
1593 1603
1594 remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); 1604 remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
1595 atomic_dec(&qhp->refcnt);
1596 wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
1597 1605
1598 spin_lock_irq(&rhp->lock); 1606 spin_lock_irq(&rhp->lock);
1599 if (!list_empty(&qhp->db_fc_entry)) 1607 if (!list_empty(&qhp->db_fc_entry))
@@ -1606,8 +1614,9 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
1606 destroy_qp(&rhp->rdev, &qhp->wq, 1614 destroy_qp(&rhp->rdev, &qhp->wq,
1607 ucontext ? &ucontext->uctx : &rhp->rdev.uctx); 1615 ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
1608 1616
1617 c4iw_qp_rem_ref(ib_qp);
1618
1609 PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid); 1619 PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
1610 kfree(qhp);
1611 return 0; 1620 return 0;
1612} 1621}
1613 1622
@@ -1704,7 +1713,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1704 init_completion(&qhp->rq_drained); 1713 init_completion(&qhp->rq_drained);
1705 mutex_init(&qhp->mutex); 1714 mutex_init(&qhp->mutex);
1706 init_waitqueue_head(&qhp->wait); 1715 init_waitqueue_head(&qhp->wait);
1707 atomic_set(&qhp->refcnt, 1); 1716 kref_init(&qhp->kref);
1708 1717
1709 ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); 1718 ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
1710 if (ret) 1719 if (ret)
@@ -1896,12 +1905,20 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1896 return 0; 1905 return 0;
1897} 1906}
1898 1907
1908static void move_qp_to_err(struct c4iw_qp *qp)
1909{
1910 struct c4iw_qp_attributes attrs = { .next_state = C4IW_QP_STATE_ERROR };
1911
1912 (void)c4iw_modify_qp(qp->rhp, qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1913}
1914
1899void c4iw_drain_sq(struct ib_qp *ibqp) 1915void c4iw_drain_sq(struct ib_qp *ibqp)
1900{ 1916{
1901 struct c4iw_qp *qp = to_c4iw_qp(ibqp); 1917 struct c4iw_qp *qp = to_c4iw_qp(ibqp);
1902 unsigned long flag; 1918 unsigned long flag;
1903 bool need_to_wait; 1919 bool need_to_wait;
1904 1920
1921 move_qp_to_err(qp);
1905 spin_lock_irqsave(&qp->lock, flag); 1922 spin_lock_irqsave(&qp->lock, flag);
1906 need_to_wait = !t4_sq_empty(&qp->wq); 1923 need_to_wait = !t4_sq_empty(&qp->wq);
1907 spin_unlock_irqrestore(&qp->lock, flag); 1924 spin_unlock_irqrestore(&qp->lock, flag);
@@ -1916,6 +1933,7 @@ void c4iw_drain_rq(struct ib_qp *ibqp)
1916 unsigned long flag; 1933 unsigned long flag;
1917 bool need_to_wait; 1934 bool need_to_wait;
1918 1935
1936 move_qp_to_err(qp);
1919 spin_lock_irqsave(&qp->lock, flag); 1937 spin_lock_irqsave(&qp->lock, flag);
1920 need_to_wait = !t4_rq_empty(&qp->wq); 1938 need_to_wait = !t4_rq_empty(&qp->wq);
1921 spin_unlock_irqrestore(&qp->lock, flag); 1939 spin_unlock_irqrestore(&qp->lock, flag);
diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig
index a925fb0db706..f846fd51b85b 100644
--- a/drivers/infiniband/hw/hfi1/Kconfig
+++ b/drivers/infiniband/hw/hfi1/Kconfig
@@ -3,7 +3,6 @@ config INFINIBAND_HFI1
3 depends on X86_64 && INFINIBAND_RDMAVT 3 depends on X86_64 && INFINIBAND_RDMAVT
4 select MMU_NOTIFIER 4 select MMU_NOTIFIER
5 select CRC32 5 select CRC32
6 default m
7 ---help--- 6 ---help---
8 This is a low-level driver for Intel OPA Gen1 adapter. 7 This is a low-level driver for Intel OPA Gen1 adapter.
9config HFI1_DEBUG_SDMA_ORDER 8config HFI1_DEBUG_SDMA_ORDER
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index c702a009608f..32c19fad12a4 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -228,7 +228,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
228 sizeof(struct hfi1_base_info)); 228 sizeof(struct hfi1_base_info));
229 break; 229 break;
230 case HFI1_IOCTL_CREDIT_UPD: 230 case HFI1_IOCTL_CREDIT_UPD:
231 if (uctxt && uctxt->sc) 231 if (uctxt)
232 sc_return_credits(uctxt->sc); 232 sc_return_credits(uctxt->sc);
233 break; 233 break;
234 234
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 4417a0fd3ef9..49a71e24a8f0 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1174,6 +1174,8 @@ struct hfi1_devdata {
1174 1174
1175/* 8051 firmware version helper */ 1175/* 8051 firmware version helper */
1176#define dc8051_ver(a, b) ((a) << 8 | (b)) 1176#define dc8051_ver(a, b) ((a) << 8 | (b))
1177#define dc8051_ver_maj(a) ((a & 0xff00) >> 8)
1178#define dc8051_ver_min(a) (a & 0x00ff)
1177 1179
1178/* f_put_tid types */ 1180/* f_put_tid types */
1179#define PT_EXPECTED 0 1181#define PT_EXPECTED 0
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 849c4b9399d4..dd4be3c2b225 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1291,9 +1291,12 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
1291static void hfi1_fill_device_attr(struct hfi1_devdata *dd) 1291static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
1292{ 1292{
1293 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; 1293 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
1294 u16 ver = dd->dc8051_ver;
1294 1295
1295 memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props)); 1296 memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
1296 1297
1298 rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) |
1299 (u64)dc8051_ver_min(ver);
1297 rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | 1300 rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
1298 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | 1301 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
1299 IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | 1302 IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
@@ -1567,6 +1570,17 @@ static void init_ibport(struct hfi1_pportdata *ppd)
1567 RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); 1570 RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
1568} 1571}
1569 1572
1573static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
1574 size_t str_len)
1575{
1576 struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
1577 struct hfi1_ibdev *dev = dev_from_rdi(rdi);
1578 u16 ver = dd_from_dev(dev)->dc8051_ver;
1579
1580 snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver),
1581 dc8051_ver_min(ver));
1582}
1583
1570/** 1584/**
1571 * hfi1_register_ib_device - register our device with the infiniband core 1585 * hfi1_register_ib_device - register our device with the infiniband core
1572 * @dd: the device data structure 1586 * @dd: the device data structure
@@ -1613,6 +1627,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
1613 1627
1614 /* keep process mad in the driver */ 1628 /* keep process mad in the driver */
1615 ibdev->process_mad = hfi1_process_mad; 1629 ibdev->process_mad = hfi1_process_mad;
1630 ibdev->get_dev_fw_str = hfi1_get_dev_fw_str;
1616 1631
1617 strncpy(ibdev->node_desc, init_utsname()->nodename, 1632 strncpy(ibdev->node_desc, init_utsname()->nodename,
1618 sizeof(ibdev->node_desc)); 1633 sizeof(ibdev->node_desc));
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index d2fa72516960..5026dc79978a 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -1567,12 +1567,12 @@ static enum i40iw_status_code i40iw_del_multiple_qhash(
1567 ret = i40iw_manage_qhash(iwdev, cm_info, 1567 ret = i40iw_manage_qhash(iwdev, cm_info,
1568 I40IW_QHASH_TYPE_TCP_SYN, 1568 I40IW_QHASH_TYPE_TCP_SYN,
1569 I40IW_QHASH_MANAGE_TYPE_DELETE, NULL, false); 1569 I40IW_QHASH_MANAGE_TYPE_DELETE, NULL, false);
1570 kfree(child_listen_node);
1571 cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
1572 i40iw_debug(&iwdev->sc_dev, 1570 i40iw_debug(&iwdev->sc_dev,
1573 I40IW_DEBUG_CM, 1571 I40IW_DEBUG_CM,
1574 "freed pointer = %p\n", 1572 "freed pointer = %p\n",
1575 child_listen_node); 1573 child_listen_node);
1574 kfree(child_listen_node);
1575 cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
1576 } 1576 }
1577 spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags); 1577 spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
1578 1578
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
index bd942da91a27..2fac1db0e0a0 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -1557,6 +1557,9 @@ enum i40iw_alignment {
1557#define I40IW_RING_MOVE_TAIL(_ring) \ 1557#define I40IW_RING_MOVE_TAIL(_ring) \
1558 (_ring).tail = ((_ring).tail + 1) % (_ring).size 1558 (_ring).tail = ((_ring).tail + 1) % (_ring).size
1559 1559
1560#define I40IW_RING_MOVE_HEAD_NOCHECK(_ring) \
1561 (_ring).head = ((_ring).head + 1) % (_ring).size
1562
1560#define I40IW_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \ 1563#define I40IW_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \
1561 (_ring).tail = ((_ring).tail + (_count)) % (_ring).size 1564 (_ring).tail = ((_ring).tail + (_count)) % (_ring).size
1562 1565
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index e9c6e82af9c7..c62d354f7810 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -1025,6 +1025,8 @@ static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq,
1025 u16 txoffset, bufoffset; 1025 u16 txoffset, bufoffset;
1026 1026
1027 buf = i40iw_puda_get_listbuf(pbufl); 1027 buf = i40iw_puda_get_listbuf(pbufl);
1028 if (!buf)
1029 return;
1028 nextseqnum = buf->seqnum + fpdu_len; 1030 nextseqnum = buf->seqnum + fpdu_len;
1029 txbuf->totallen = buf->hdrlen + fpdu_len; 1031 txbuf->totallen = buf->hdrlen + fpdu_len;
1030 txbuf->data = (u8 *)txbuf->mem.va + buf->hdrlen; 1032 txbuf->data = (u8 *)txbuf->mem.va + buf->hdrlen;
@@ -1048,6 +1050,8 @@ static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq,
1048 fpdu_len -= buf->datalen; 1050 fpdu_len -= buf->datalen;
1049 i40iw_puda_ret_bufpool(ieq, buf); 1051 i40iw_puda_ret_bufpool(ieq, buf);
1050 buf = i40iw_puda_get_listbuf(pbufl); 1052 buf = i40iw_puda_get_listbuf(pbufl);
1053 if (!buf)
1054 return;
1051 bufoffset = (u16)(buf->data - (u8 *)buf->mem.va); 1055 bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
1052 } while (1); 1056 } while (1);
1053 1057
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
index 16cc61720b53..2b1a04e9ca3c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -667,7 +667,7 @@ struct i40iw_tcp_offload_info {
667 bool time_stamp; 667 bool time_stamp;
668 u8 cwnd_inc_limit; 668 u8 cwnd_inc_limit;
669 bool drop_ooo_seg; 669 bool drop_ooo_seg;
670 bool dup_ack_thresh; 670 u8 dup_ack_thresh;
671 u8 ttl; 671 u8 ttl;
672 u8 src_mac_addr_idx; 672 u8 src_mac_addr_idx;
673 bool avoid_stretch_ack; 673 bool avoid_stretch_ack;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
index e35faea88c13..4d28c3cb03cc 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -291,9 +291,9 @@ static enum i40iw_status_code i40iw_rdma_write(struct i40iw_qp_uk *qp,
291 291
292 i40iw_set_fragment(wqe, 0, op_info->lo_sg_list); 292 i40iw_set_fragment(wqe, 0, op_info->lo_sg_list);
293 293
294 for (i = 1; i < op_info->num_lo_sges; i++) { 294 for (i = 1, byte_off = 32; i < op_info->num_lo_sges; i++) {
295 byte_off = 32 + (i - 1) * 16;
296 i40iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i]); 295 i40iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i]);
296 byte_off += 16;
297 } 297 }
298 298
299 wmb(); /* make sure WQE is populated before valid bit is set */ 299 wmb(); /* make sure WQE is populated before valid bit is set */
@@ -401,9 +401,9 @@ static enum i40iw_status_code i40iw_send(struct i40iw_qp_uk *qp,
401 401
402 i40iw_set_fragment(wqe, 0, op_info->sg_list); 402 i40iw_set_fragment(wqe, 0, op_info->sg_list);
403 403
404 for (i = 1; i < op_info->num_sges; i++) { 404 for (i = 1, byte_off = 32; i < op_info->num_sges; i++) {
405 byte_off = 32 + (i - 1) * 16;
406 i40iw_set_fragment(wqe, byte_off, &op_info->sg_list[i]); 405 i40iw_set_fragment(wqe, byte_off, &op_info->sg_list[i]);
406 byte_off += 16;
407 } 407 }
408 408
409 wmb(); /* make sure WQE is populated before valid bit is set */ 409 wmb(); /* make sure WQE is populated before valid bit is set */
@@ -685,9 +685,9 @@ static enum i40iw_status_code i40iw_post_receive(struct i40iw_qp_uk *qp,
685 685
686 i40iw_set_fragment(wqe, 0, info->sg_list); 686 i40iw_set_fragment(wqe, 0, info->sg_list);
687 687
688 for (i = 1; i < info->num_sges; i++) { 688 for (i = 1, byte_off = 32; i < info->num_sges; i++) {
689 byte_off = 32 + (i - 1) * 16;
690 i40iw_set_fragment(wqe, byte_off, &info->sg_list[i]); 689 i40iw_set_fragment(wqe, byte_off, &info->sg_list[i]);
690 byte_off += 16;
691 } 691 }
692 692
693 wmb(); /* make sure WQE is populated before valid bit is set */ 693 wmb(); /* make sure WQE is populated before valid bit is set */
@@ -753,8 +753,7 @@ static enum i40iw_status_code i40iw_cq_post_entries(struct i40iw_cq_uk *cq,
753 * @post_cq: update cq tail 753 * @post_cq: update cq tail
754 */ 754 */
755static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, 755static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
756 struct i40iw_cq_poll_info *info, 756 struct i40iw_cq_poll_info *info)
757 bool post_cq)
758{ 757{
759 u64 comp_ctx, qword0, qword2, qword3, wqe_qword; 758 u64 comp_ctx, qword0, qword2, qword3, wqe_qword;
760 u64 *cqe, *sw_wqe; 759 u64 *cqe, *sw_wqe;
@@ -762,7 +761,6 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
762 struct i40iw_ring *pring = NULL; 761 struct i40iw_ring *pring = NULL;
763 u32 wqe_idx, q_type, array_idx = 0; 762 u32 wqe_idx, q_type, array_idx = 0;
764 enum i40iw_status_code ret_code = 0; 763 enum i40iw_status_code ret_code = 0;
765 enum i40iw_status_code ret_code2 = 0;
766 bool move_cq_head = true; 764 bool move_cq_head = true;
767 u8 polarity; 765 u8 polarity;
768 u8 addl_wqes = 0; 766 u8 addl_wqes = 0;
@@ -870,19 +868,14 @@ exit:
870 move_cq_head = false; 868 move_cq_head = false;
871 869
872 if (move_cq_head) { 870 if (move_cq_head) {
873 I40IW_RING_MOVE_HEAD(cq->cq_ring, ret_code2); 871 I40IW_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
874
875 if (ret_code2 && !ret_code)
876 ret_code = ret_code2;
877 872
878 if (I40IW_RING_GETCURRENT_HEAD(cq->cq_ring) == 0) 873 if (I40IW_RING_GETCURRENT_HEAD(cq->cq_ring) == 0)
879 cq->polarity ^= 1; 874 cq->polarity ^= 1;
880 875
881 if (post_cq) { 876 I40IW_RING_MOVE_TAIL(cq->cq_ring);
882 I40IW_RING_MOVE_TAIL(cq->cq_ring); 877 set_64bit_val(cq->shadow_area, 0,
883 set_64bit_val(cq->shadow_area, 0, 878 I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
884 I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
885 }
886 } else { 879 } else {
887 if (info->is_srq) 880 if (info->is_srq)
888 return ret_code; 881 return ret_code;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
index 4627646fe8cd..276bcefffd7e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_user.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h
@@ -327,7 +327,7 @@ struct i40iw_cq_ops {
327 void (*iw_cq_request_notification)(struct i40iw_cq_uk *, 327 void (*iw_cq_request_notification)(struct i40iw_cq_uk *,
328 enum i40iw_completion_notify); 328 enum i40iw_completion_notify);
329 enum i40iw_status_code (*iw_cq_poll_completion)(struct i40iw_cq_uk *, 329 enum i40iw_status_code (*iw_cq_poll_completion)(struct i40iw_cq_uk *,
330 struct i40iw_cq_poll_info *, bool); 330 struct i40iw_cq_poll_info *);
331 enum i40iw_status_code (*iw_cq_post_entries)(struct i40iw_cq_uk *, u8 count); 331 enum i40iw_status_code (*iw_cq_post_entries)(struct i40iw_cq_uk *, u8 count);
332 void (*iw_cq_clean)(void *, struct i40iw_cq_uk *); 332 void (*iw_cq_clean)(void *, struct i40iw_cq_uk *);
333}; 333};
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 283b64c942ee..2360338877bf 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -529,7 +529,7 @@ static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev,
529 status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, 0, &rqshift); 529 status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, 0, &rqshift);
530 530
531 if (status) 531 if (status)
532 return -ENOSYS; 532 return -ENOMEM;
533 533
534 sqdepth = sq_size << sqshift; 534 sqdepth = sq_size << sqshift;
535 rqdepth = rq_size << rqshift; 535 rqdepth = rq_size << rqshift;
@@ -671,7 +671,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
671 iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp; 671 iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
672 672
673 if (init_attr->qp_type != IB_QPT_RC) { 673 if (init_attr->qp_type != IB_QPT_RC) {
674 err_code = -ENOSYS; 674 err_code = -EINVAL;
675 goto error; 675 goto error;
676 } 676 }
677 if (iwdev->push_mode) 677 if (iwdev->push_mode)
@@ -1840,6 +1840,7 @@ struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *pd,
1840 iwmr->ibmr.lkey = stag; 1840 iwmr->ibmr.lkey = stag;
1841 iwmr->page_cnt = 1; 1841 iwmr->page_cnt = 1;
1842 iwmr->pgaddrmem[0] = addr; 1842 iwmr->pgaddrmem[0] = addr;
1843 iwmr->length = size;
1843 status = i40iw_hwreg_mr(iwdev, iwmr, access); 1844 status = i40iw_hwreg_mr(iwdev, iwmr, access);
1844 if (status) { 1845 if (status) {
1845 i40iw_free_stag(iwdev, stag); 1846 i40iw_free_stag(iwdev, stag);
@@ -1863,7 +1864,7 @@ static struct ib_mr *i40iw_get_dma_mr(struct ib_pd *pd, int acc)
1863{ 1864{
1864 u64 kva = 0; 1865 u64 kva = 0;
1865 1866
1866 return i40iw_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva); 1867 return i40iw_reg_phys_mr(pd, 0, 0, acc, &kva);
1867} 1868}
1868 1869
1869/** 1870/**
@@ -1975,18 +1976,6 @@ static ssize_t i40iw_show_rev(struct device *dev,
1975} 1976}
1976 1977
1977/** 1978/**
1978 * i40iw_show_fw_ver
1979 */
1980static ssize_t i40iw_show_fw_ver(struct device *dev,
1981 struct device_attribute *attr, char *buf)
1982{
1983 u32 firmware_version = I40IW_FW_VERSION;
1984
1985 return sprintf(buf, "%u.%u\n", firmware_version,
1986 (firmware_version & 0x000000ff));
1987}
1988
1989/**
1990 * i40iw_show_hca 1979 * i40iw_show_hca
1991 */ 1980 */
1992static ssize_t i40iw_show_hca(struct device *dev, 1981static ssize_t i40iw_show_hca(struct device *dev,
@@ -2006,13 +1995,11 @@ static ssize_t i40iw_show_board(struct device *dev,
2006} 1995}
2007 1996
2008static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL); 1997static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL);
2009static DEVICE_ATTR(fw_ver, S_IRUGO, i40iw_show_fw_ver, NULL);
2010static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL); 1998static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL);
2011static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL); 1999static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL);
2012 2000
2013static struct device_attribute *i40iw_dev_attributes[] = { 2001static struct device_attribute *i40iw_dev_attributes[] = {
2014 &dev_attr_hw_rev, 2002 &dev_attr_hw_rev,
2015 &dev_attr_fw_ver,
2016 &dev_attr_hca_type, 2003 &dev_attr_hca_type,
2017 &dev_attr_board_id 2004 &dev_attr_board_id
2018}; 2005};
@@ -2091,8 +2078,12 @@ static int i40iw_post_send(struct ib_qp *ibqp,
2091 ret = ukqp->ops.iw_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false); 2078 ret = ukqp->ops.iw_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
2092 } 2079 }
2093 2080
2094 if (ret) 2081 if (ret) {
2095 err = -EIO; 2082 if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
2083 err = -ENOMEM;
2084 else
2085 err = -EINVAL;
2086 }
2096 break; 2087 break;
2097 case IB_WR_RDMA_WRITE: 2088 case IB_WR_RDMA_WRITE:
2098 info.op_type = I40IW_OP_TYPE_RDMA_WRITE; 2089 info.op_type = I40IW_OP_TYPE_RDMA_WRITE;
@@ -2113,8 +2104,12 @@ static int i40iw_post_send(struct ib_qp *ibqp,
2113 ret = ukqp->ops.iw_rdma_write(ukqp, &info, false); 2104 ret = ukqp->ops.iw_rdma_write(ukqp, &info, false);
2114 } 2105 }
2115 2106
2116 if (ret) 2107 if (ret) {
2117 err = -EIO; 2108 if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
2109 err = -ENOMEM;
2110 else
2111 err = -EINVAL;
2112 }
2118 break; 2113 break;
2119 case IB_WR_RDMA_READ_WITH_INV: 2114 case IB_WR_RDMA_READ_WITH_INV:
2120 inv_stag = true; 2115 inv_stag = true;
@@ -2132,15 +2127,19 @@ static int i40iw_post_send(struct ib_qp *ibqp,
2132 info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey; 2127 info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
2133 info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length; 2128 info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
2134 ret = ukqp->ops.iw_rdma_read(ukqp, &info, inv_stag, false); 2129 ret = ukqp->ops.iw_rdma_read(ukqp, &info, inv_stag, false);
2135 if (ret) 2130 if (ret) {
2136 err = -EIO; 2131 if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
2132 err = -ENOMEM;
2133 else
2134 err = -EINVAL;
2135 }
2137 break; 2136 break;
2138 case IB_WR_LOCAL_INV: 2137 case IB_WR_LOCAL_INV:
2139 info.op_type = I40IW_OP_TYPE_INV_STAG; 2138 info.op_type = I40IW_OP_TYPE_INV_STAG;
2140 info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey; 2139 info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
2141 ret = ukqp->ops.iw_stag_local_invalidate(ukqp, &info, true); 2140 ret = ukqp->ops.iw_stag_local_invalidate(ukqp, &info, true);
2142 if (ret) 2141 if (ret)
2143 err = -EIO; 2142 err = -ENOMEM;
2144 break; 2143 break;
2145 case IB_WR_REG_MR: 2144 case IB_WR_REG_MR:
2146 { 2145 {
@@ -2174,7 +2173,7 @@ static int i40iw_post_send(struct ib_qp *ibqp,
2174 2173
2175 ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true); 2174 ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true);
2176 if (ret) 2175 if (ret)
2177 err = -EIO; 2176 err = -ENOMEM;
2178 break; 2177 break;
2179 } 2178 }
2180 default: 2179 default:
@@ -2214,6 +2213,7 @@ static int i40iw_post_recv(struct ib_qp *ibqp,
2214 struct i40iw_sge sg_list[I40IW_MAX_WQ_FRAGMENT_COUNT]; 2213 struct i40iw_sge sg_list[I40IW_MAX_WQ_FRAGMENT_COUNT];
2215 enum i40iw_status_code ret = 0; 2214 enum i40iw_status_code ret = 0;
2216 unsigned long flags; 2215 unsigned long flags;
2216 int err = 0;
2217 2217
2218 iwqp = (struct i40iw_qp *)ibqp; 2218 iwqp = (struct i40iw_qp *)ibqp;
2219 ukqp = &iwqp->sc_qp.qp_uk; 2219 ukqp = &iwqp->sc_qp.qp_uk;
@@ -2228,6 +2228,10 @@ static int i40iw_post_recv(struct ib_qp *ibqp,
2228 ret = ukqp->ops.iw_post_receive(ukqp, &post_recv); 2228 ret = ukqp->ops.iw_post_receive(ukqp, &post_recv);
2229 if (ret) { 2229 if (ret) {
2230 i40iw_pr_err(" post_recv err %d\n", ret); 2230 i40iw_pr_err(" post_recv err %d\n", ret);
2231 if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
2232 err = -ENOMEM;
2233 else
2234 err = -EINVAL;
2231 *bad_wr = ib_wr; 2235 *bad_wr = ib_wr;
2232 goto out; 2236 goto out;
2233 } 2237 }
@@ -2235,9 +2239,7 @@ static int i40iw_post_recv(struct ib_qp *ibqp,
2235 } 2239 }
2236 out: 2240 out:
2237 spin_unlock_irqrestore(&iwqp->lock, flags); 2241 spin_unlock_irqrestore(&iwqp->lock, flags);
2238 if (ret) 2242 return err;
2239 return -ENOSYS;
2240 return 0;
2241} 2243}
2242 2244
2243/** 2245/**
@@ -2264,7 +2266,7 @@ static int i40iw_poll_cq(struct ib_cq *ibcq,
2264 2266
2265 spin_lock_irqsave(&iwcq->lock, flags); 2267 spin_lock_irqsave(&iwcq->lock, flags);
2266 while (cqe_count < num_entries) { 2268 while (cqe_count < num_entries) {
2267 ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info, true); 2269 ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info);
2268 if (ret == I40IW_ERR_QUEUE_EMPTY) { 2270 if (ret == I40IW_ERR_QUEUE_EMPTY) {
2269 break; 2271 break;
2270 } else if (ret == I40IW_ERR_QUEUE_DESTROYED) { 2272 } else if (ret == I40IW_ERR_QUEUE_DESTROYED) {
@@ -2437,6 +2439,15 @@ static const char * const i40iw_hw_stat_names[] = {
2437 "iwRdmaInv" 2439 "iwRdmaInv"
2438}; 2440};
2439 2441
2442static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str,
2443 size_t str_len)
2444{
2445 u32 firmware_version = I40IW_FW_VERSION;
2446
2447 snprintf(str, str_len, "%u.%u", firmware_version,
2448 (firmware_version & 0x000000ff));
2449}
2450
2440/** 2451/**
2441 * i40iw_alloc_hw_stats - Allocate a hw stats structure 2452 * i40iw_alloc_hw_stats - Allocate a hw stats structure
2442 * @ibdev: device pointer from stack 2453 * @ibdev: device pointer from stack
@@ -2528,7 +2539,7 @@ static int i40iw_modify_port(struct ib_device *ibdev,
2528 int port_modify_mask, 2539 int port_modify_mask,
2529 struct ib_port_modify *props) 2540 struct ib_port_modify *props)
2530{ 2541{
2531 return 0; 2542 return -ENOSYS;
2532} 2543}
2533 2544
2534/** 2545/**
@@ -2660,6 +2671,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
2660 memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name, 2671 memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name,
2661 sizeof(iwibdev->ibdev.iwcm->ifname)); 2672 sizeof(iwibdev->ibdev.iwcm->ifname));
2662 iwibdev->ibdev.get_port_immutable = i40iw_port_immutable; 2673 iwibdev->ibdev.get_port_immutable = i40iw_port_immutable;
2674 iwibdev->ibdev.get_dev_fw_str = i40iw_get_dev_fw_str;
2663 iwibdev->ibdev.poll_cq = i40iw_poll_cq; 2675 iwibdev->ibdev.poll_cq = i40iw_poll_cq;
2664 iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq; 2676 iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq;
2665 iwibdev->ibdev.post_send = i40iw_post_send; 2677 iwibdev->ibdev.post_send = i40iw_post_send;
@@ -2723,7 +2735,7 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
2723 2735
2724 iwdev->iwibdev = i40iw_init_rdma_device(iwdev); 2736 iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
2725 if (!iwdev->iwibdev) 2737 if (!iwdev->iwibdev)
2726 return -ENOSYS; 2738 return -ENOMEM;
2727 iwibdev = iwdev->iwibdev; 2739 iwibdev = iwdev->iwibdev;
2728 2740
2729 ret = ib_register_device(&iwibdev->ibdev, NULL); 2741 ret = ib_register_device(&iwibdev->ibdev, NULL);
@@ -2748,5 +2760,5 @@ error:
2748 kfree(iwdev->iwibdev->ibdev.iwcm); 2760 kfree(iwdev->iwibdev->ibdev.iwcm);
2749 iwdev->iwibdev->ibdev.iwcm = NULL; 2761 iwdev->iwibdev->ibdev.iwcm = NULL;
2750 ib_dealloc_device(&iwdev->iwibdev->ibdev); 2762 ib_dealloc_device(&iwdev->iwibdev->ibdev);
2751 return -ENOSYS; 2763 return ret;
2752} 2764}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 9f8b516eb2b0..d6fc8a6e8c33 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -288,7 +288,7 @@ static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
288 if (cq->resize_buf) 288 if (cq->resize_buf)
289 return -EBUSY; 289 return -EBUSY;
290 290
291 cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); 291 cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL);
292 if (!cq->resize_buf) 292 if (!cq->resize_buf)
293 return -ENOMEM; 293 return -ENOMEM;
294 294
@@ -316,7 +316,7 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq
316 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) 316 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
317 return -EFAULT; 317 return -EFAULT;
318 318
319 cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); 319 cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL);
320 if (!cq->resize_buf) 320 if (!cq->resize_buf)
321 return -ENOMEM; 321 return -ENOMEM;
322 322
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 42a46078d7d5..2af44c2de262 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2025,16 +2025,6 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
2025 return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); 2025 return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
2026} 2026}
2027 2027
2028static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
2029 char *buf)
2030{
2031 struct mlx4_ib_dev *dev =
2032 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
2033 return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32),
2034 (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
2035 (int) dev->dev->caps.fw_ver & 0xffff);
2036}
2037
2038static ssize_t show_rev(struct device *device, struct device_attribute *attr, 2028static ssize_t show_rev(struct device *device, struct device_attribute *attr,
2039 char *buf) 2029 char *buf)
2040{ 2030{
@@ -2053,17 +2043,204 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr,
2053} 2043}
2054 2044
2055static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 2045static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
2056static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
2057static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 2046static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
2058static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 2047static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
2059 2048
2060static struct device_attribute *mlx4_class_attributes[] = { 2049static struct device_attribute *mlx4_class_attributes[] = {
2061 &dev_attr_hw_rev, 2050 &dev_attr_hw_rev,
2062 &dev_attr_fw_ver,
2063 &dev_attr_hca_type, 2051 &dev_attr_hca_type,
2064 &dev_attr_board_id 2052 &dev_attr_board_id
2065}; 2053};
2066 2054
2055struct diag_counter {
2056 const char *name;
2057 u32 offset;
2058};
2059
2060#define DIAG_COUNTER(_name, _offset) \
2061 { .name = #_name, .offset = _offset }
2062
2063static const struct diag_counter diag_basic[] = {
2064 DIAG_COUNTER(rq_num_lle, 0x00),
2065 DIAG_COUNTER(sq_num_lle, 0x04),
2066 DIAG_COUNTER(rq_num_lqpoe, 0x08),
2067 DIAG_COUNTER(sq_num_lqpoe, 0x0C),
2068 DIAG_COUNTER(rq_num_lpe, 0x18),
2069 DIAG_COUNTER(sq_num_lpe, 0x1C),
2070 DIAG_COUNTER(rq_num_wrfe, 0x20),
2071 DIAG_COUNTER(sq_num_wrfe, 0x24),
2072 DIAG_COUNTER(sq_num_mwbe, 0x2C),
2073 DIAG_COUNTER(sq_num_bre, 0x34),
2074 DIAG_COUNTER(sq_num_rire, 0x44),
2075 DIAG_COUNTER(rq_num_rire, 0x48),
2076 DIAG_COUNTER(sq_num_rae, 0x4C),
2077 DIAG_COUNTER(rq_num_rae, 0x50),
2078 DIAG_COUNTER(sq_num_roe, 0x54),
2079 DIAG_COUNTER(sq_num_tree, 0x5C),
2080 DIAG_COUNTER(sq_num_rree, 0x64),
2081 DIAG_COUNTER(rq_num_rnr, 0x68),
2082 DIAG_COUNTER(sq_num_rnr, 0x6C),
2083 DIAG_COUNTER(rq_num_oos, 0x100),
2084 DIAG_COUNTER(sq_num_oos, 0x104),
2085};
2086
2087static const struct diag_counter diag_ext[] = {
2088 DIAG_COUNTER(rq_num_dup, 0x130),
2089 DIAG_COUNTER(sq_num_to, 0x134),
2090};
2091
2092static const struct diag_counter diag_device_only[] = {
2093 DIAG_COUNTER(num_cqovf, 0x1A0),
2094 DIAG_COUNTER(rq_num_udsdprd, 0x118),
2095};
2096
2097static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev,
2098 u8 port_num)
2099{
2100 struct mlx4_ib_dev *dev = to_mdev(ibdev);
2101 struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2102
2103 if (!diag[!!port_num].name)
2104 return NULL;
2105
2106 return rdma_alloc_hw_stats_struct(diag[!!port_num].name,
2107 diag[!!port_num].num_counters,
2108 RDMA_HW_STATS_DEFAULT_LIFESPAN);
2109}
2110
2111static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
2112 struct rdma_hw_stats *stats,
2113 u8 port, int index)
2114{
2115 struct mlx4_ib_dev *dev = to_mdev(ibdev);
2116 struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2117 u32 hw_value[ARRAY_SIZE(diag_device_only) +
2118 ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {};
2119 int ret;
2120 int i;
2121
2122 ret = mlx4_query_diag_counters(dev->dev,
2123 MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS,
2124 diag[!!port].offset, hw_value,
2125 diag[!!port].num_counters, port);
2126
2127 if (ret)
2128 return ret;
2129
2130 for (i = 0; i < diag[!!port].num_counters; i++)
2131 stats->value[i] = hw_value[i];
2132
2133 return diag[!!port].num_counters;
2134}
2135
2136static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
2137 const char ***name,
2138 u32 **offset,
2139 u32 *num,
2140 bool port)
2141{
2142 u32 num_counters;
2143
2144 num_counters = ARRAY_SIZE(diag_basic);
2145
2146 if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT)
2147 num_counters += ARRAY_SIZE(diag_ext);
2148
2149 if (!port)
2150 num_counters += ARRAY_SIZE(diag_device_only);
2151
2152 *name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL);
2153 if (!*name)
2154 return -ENOMEM;
2155
2156 *offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
2157 if (!*offset)
2158 goto err_name;
2159
2160 *num = num_counters;
2161
2162 return 0;
2163
2164err_name:
2165 kfree(*name);
2166 return -ENOMEM;
2167}
2168
2169static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
2170 const char **name,
2171 u32 *offset,
2172 bool port)
2173{
2174 int i;
2175 int j;
2176
2177 for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
2178 name[i] = diag_basic[i].name;
2179 offset[i] = diag_basic[i].offset;
2180 }
2181
2182 if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
2183 for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
2184 name[j] = diag_ext[i].name;
2185 offset[j] = diag_ext[i].offset;
2186 }
2187 }
2188
2189 if (!port) {
2190 for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
2191 name[j] = diag_device_only[i].name;
2192 offset[j] = diag_device_only[i].offset;
2193 }
2194 }
2195}
2196
2197static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
2198{
2199 struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
2200 int i;
2201 int ret;
2202 bool per_port = !!(ibdev->dev->caps.flags2 &
2203 MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
2204
2205 for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
2206 /* i == 1 means we are building port counters */
2207 if (i && !per_port)
2208 continue;
2209
2210 ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name,
2211 &diag[i].offset,
2212 &diag[i].num_counters, i);
2213 if (ret)
2214 goto err_alloc;
2215
2216 mlx4_ib_fill_diag_counters(ibdev, diag[i].name,
2217 diag[i].offset, i);
2218 }
2219
2220 ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats;
2221 ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats;
2222
2223 return 0;
2224
2225err_alloc:
2226 if (i) {
2227 kfree(diag[i - 1].name);
2228 kfree(diag[i - 1].offset);
2229 }
2230
2231 return ret;
2232}
2233
2234static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
2235{
2236 int i;
2237
2238 for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
2239 kfree(ibdev->diag_counters[i].offset);
2240 kfree(ibdev->diag_counters[i].name);
2241 }
2242}
2243
2067#define MLX4_IB_INVALID_MAC ((u64)-1) 2244#define MLX4_IB_INVALID_MAC ((u64)-1)
2068static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, 2245static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
2069 struct net_device *dev, 2246 struct net_device *dev,
@@ -2280,6 +2457,17 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
2280 return 0; 2457 return 0;
2281} 2458}
2282 2459
2460static void get_fw_ver_str(struct ib_device *device, char *str,
2461 size_t str_len)
2462{
2463 struct mlx4_ib_dev *dev =
2464 container_of(device, struct mlx4_ib_dev, ib_dev);
2465 snprintf(str, str_len, "%d.%d.%d",
2466 (int) (dev->dev->caps.fw_ver >> 32),
2467 (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
2468 (int) dev->dev->caps.fw_ver & 0xffff);
2469}
2470
2283static void *mlx4_ib_add(struct mlx4_dev *dev) 2471static void *mlx4_ib_add(struct mlx4_dev *dev)
2284{ 2472{
2285 struct mlx4_ib_dev *ibdev; 2473 struct mlx4_ib_dev *ibdev;
@@ -2413,6 +2601,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2413 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; 2601 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
2414 ibdev->ib_dev.process_mad = mlx4_ib_process_mad; 2602 ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
2415 ibdev->ib_dev.get_port_immutable = mlx4_port_immutable; 2603 ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
2604 ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str;
2416 ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext; 2605 ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
2417 2606
2418 if (!mlx4_is_slave(ibdev->dev)) { 2607 if (!mlx4_is_slave(ibdev->dev)) {
@@ -2555,9 +2744,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2555 for (j = 1; j <= ibdev->dev->caps.num_ports; j++) 2744 for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
2556 atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]); 2745 atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
2557 2746
2558 if (ib_register_device(&ibdev->ib_dev, NULL)) 2747 if (mlx4_ib_alloc_diag_counters(ibdev))
2559 goto err_steer_free_bitmap; 2748 goto err_steer_free_bitmap;
2560 2749
2750 if (ib_register_device(&ibdev->ib_dev, NULL))
2751 goto err_diag_counters;
2752
2561 if (mlx4_ib_mad_init(ibdev)) 2753 if (mlx4_ib_mad_init(ibdev))
2562 goto err_reg; 2754 goto err_reg;
2563 2755
@@ -2623,6 +2815,9 @@ err_mad:
2623err_reg: 2815err_reg:
2624 ib_unregister_device(&ibdev->ib_dev); 2816 ib_unregister_device(&ibdev->ib_dev);
2625 2817
2818err_diag_counters:
2819 mlx4_ib_diag_cleanup(ibdev);
2820
2626err_steer_free_bitmap: 2821err_steer_free_bitmap:
2627 kfree(ibdev->ib_uc_qpns_bitmap); 2822 kfree(ibdev->ib_uc_qpns_bitmap);
2628 2823
@@ -2726,6 +2921,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
2726 mlx4_ib_close_sriov(ibdev); 2921 mlx4_ib_close_sriov(ibdev);
2727 mlx4_ib_mad_cleanup(ibdev); 2922 mlx4_ib_mad_cleanup(ibdev);
2728 ib_unregister_device(&ibdev->ib_dev); 2923 ib_unregister_device(&ibdev->ib_dev);
2924 mlx4_ib_diag_cleanup(ibdev);
2729 if (ibdev->iboe.nb.notifier_call) { 2925 if (ibdev->iboe.nb.notifier_call) {
2730 if (unregister_netdevice_notifier(&ibdev->iboe.nb)) 2926 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
2731 pr_warn("failure unregistering notifier\n"); 2927 pr_warn("failure unregistering notifier\n");
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 29acda249612..7c5832ede4bd 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -549,6 +549,14 @@ struct mlx4_ib_counters {
549 u32 default_counter; 549 u32 default_counter;
550}; 550};
551 551
552#define MLX4_DIAG_COUNTERS_TYPES 2
553
554struct mlx4_ib_diag_counters {
555 const char **name;
556 u32 *offset;
557 u32 num_counters;
558};
559
552struct mlx4_ib_dev { 560struct mlx4_ib_dev {
553 struct ib_device ib_dev; 561 struct ib_device ib_dev;
554 struct mlx4_dev *dev; 562 struct mlx4_dev *dev;
@@ -585,6 +593,7 @@ struct mlx4_ib_dev {
585 /* protect resources needed as part of reset flow */ 593 /* protect resources needed as part of reset flow */
586 spinlock_t reset_flow_resource_lock; 594 spinlock_t reset_flow_resource_lock;
587 struct list_head qp_list; 595 struct list_head qp_list;
596 struct mlx4_ib_diag_counters diag_counters[MLX4_DIAG_COUNTERS_TYPES];
588}; 597};
589 598
590struct ib_event_work { 599struct ib_event_work {
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 9c0e67bd2ba7..308a358e5b46 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -424,6 +424,83 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
424 item->key = be32_to_cpu(cqe->mkey); 424 item->key = be32_to_cpu(cqe->mkey);
425} 425}
426 426
427static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries,
428 struct ib_wc *wc, int *npolled)
429{
430 struct mlx5_ib_wq *wq;
431 unsigned int cur;
432 unsigned int idx;
433 int np;
434 int i;
435
436 wq = &qp->sq;
437 cur = wq->head - wq->tail;
438 np = *npolled;
439
440 if (cur == 0)
441 return;
442
443 for (i = 0; i < cur && np < num_entries; i++) {
444 idx = wq->last_poll & (wq->wqe_cnt - 1);
445 wc->wr_id = wq->wrid[idx];
446 wc->status = IB_WC_WR_FLUSH_ERR;
447 wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
448 wq->tail++;
449 np++;
450 wc->qp = &qp->ibqp;
451 wc++;
452 wq->last_poll = wq->w_list[idx].next;
453 }
454 *npolled = np;
455}
456
457static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries,
458 struct ib_wc *wc, int *npolled)
459{
460 struct mlx5_ib_wq *wq;
461 unsigned int cur;
462 int np;
463 int i;
464
465 wq = &qp->rq;
466 cur = wq->head - wq->tail;
467 np = *npolled;
468
469 if (cur == 0)
470 return;
471
472 for (i = 0; i < cur && np < num_entries; i++) {
473 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
474 wc->status = IB_WC_WR_FLUSH_ERR;
475 wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
476 wq->tail++;
477 np++;
478 wc->qp = &qp->ibqp;
479 wc++;
480 }
481 *npolled = np;
482}
483
484static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
485 struct ib_wc *wc, int *npolled)
486{
487 struct mlx5_ib_qp *qp;
488
489 *npolled = 0;
490 /* Find uncompleted WQEs belonging to that cq and retrun mmics ones */
491 list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
492 sw_send_comp(qp, num_entries, wc + *npolled, npolled);
493 if (*npolled >= num_entries)
494 return;
495 }
496
497 list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
498 sw_recv_comp(qp, num_entries, wc + *npolled, npolled);
499 if (*npolled >= num_entries)
500 return;
501 }
502}
503
427static int mlx5_poll_one(struct mlx5_ib_cq *cq, 504static int mlx5_poll_one(struct mlx5_ib_cq *cq,
428 struct mlx5_ib_qp **cur_qp, 505 struct mlx5_ib_qp **cur_qp,
429 struct ib_wc *wc) 506 struct ib_wc *wc)
@@ -594,12 +671,18 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
594{ 671{
595 struct mlx5_ib_cq *cq = to_mcq(ibcq); 672 struct mlx5_ib_cq *cq = to_mcq(ibcq);
596 struct mlx5_ib_qp *cur_qp = NULL; 673 struct mlx5_ib_qp *cur_qp = NULL;
674 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
675 struct mlx5_core_dev *mdev = dev->mdev;
597 unsigned long flags; 676 unsigned long flags;
598 int soft_polled = 0; 677 int soft_polled = 0;
599 int npolled; 678 int npolled;
600 int err = 0; 679 int err = 0;
601 680
602 spin_lock_irqsave(&cq->lock, flags); 681 spin_lock_irqsave(&cq->lock, flags);
682 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
683 mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled);
684 goto out;
685 }
603 686
604 if (unlikely(!list_empty(&cq->wc_list))) 687 if (unlikely(!list_empty(&cq->wc_list)))
605 soft_polled = poll_soft_wc(cq, num_entries, wc); 688 soft_polled = poll_soft_wc(cq, num_entries, wc);
@@ -612,7 +695,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
612 695
613 if (npolled) 696 if (npolled)
614 mlx5_cq_set_ci(&cq->mcq); 697 mlx5_cq_set_ci(&cq->mcq);
615 698out:
616 spin_unlock_irqrestore(&cq->lock, flags); 699 spin_unlock_irqrestore(&cq->lock, flags);
617 700
618 if (err == 0 || err == -EAGAIN) 701 if (err == 0 || err == -EAGAIN)
@@ -843,6 +926,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
843 cq->resize_buf = NULL; 926 cq->resize_buf = NULL;
844 cq->resize_umem = NULL; 927 cq->resize_umem = NULL;
845 cq->create_flags = attr->flags; 928 cq->create_flags = attr->flags;
929 INIT_LIST_HEAD(&cq->list_send_qp);
930 INIT_LIST_HEAD(&cq->list_recv_qp);
846 931
847 if (context) { 932 if (context) {
848 err = create_cq_user(dev, udata, context, cq, entries, 933 err = create_cq_user(dev, udata, context, cq, entries,
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 53e03c8ede79..79e6309460dc 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -69,15 +69,6 @@ static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
69 return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn); 69 return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
70} 70}
71 71
72static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index)
73{
74 return ++index % gsi->cap.max_send_wr;
75}
76
77#define for_each_outstanding_wr(gsi, index) \
78 for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \
79 index = next_outstanding(gsi, index))
80
81/* Call with gsi->lock locked */ 72/* Call with gsi->lock locked */
82static void generate_completions(struct mlx5_ib_gsi_qp *gsi) 73static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
83{ 74{
@@ -85,8 +76,9 @@ static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
85 struct mlx5_ib_gsi_wr *wr; 76 struct mlx5_ib_gsi_wr *wr;
86 u32 index; 77 u32 index;
87 78
88 for_each_outstanding_wr(gsi, index) { 79 for (index = gsi->outstanding_ci; index != gsi->outstanding_pi;
89 wr = &gsi->outstanding_wrs[index]; 80 index++) {
81 wr = &gsi->outstanding_wrs[index % gsi->cap.max_send_wr];
90 82
91 if (!wr->completed) 83 if (!wr->completed)
92 break; 84 break;
@@ -430,8 +422,9 @@ static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
430 return -ENOMEM; 422 return -ENOMEM;
431 } 423 }
432 424
433 gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi]; 425 gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi %
434 gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi); 426 gsi->cap.max_send_wr];
427 gsi->outstanding_pi++;
435 428
436 if (!wc) { 429 if (!wc) {
437 memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc)); 430 memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index dad63f038bb8..a84bb766fc62 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -42,11 +42,13 @@
42#include <asm/pat.h> 42#include <asm/pat.h>
43#endif 43#endif
44#include <linux/sched.h> 44#include <linux/sched.h>
45#include <linux/delay.h>
45#include <rdma/ib_user_verbs.h> 46#include <rdma/ib_user_verbs.h>
46#include <rdma/ib_addr.h> 47#include <rdma/ib_addr.h>
47#include <rdma/ib_cache.h> 48#include <rdma/ib_cache.h>
48#include <linux/mlx5/port.h> 49#include <linux/mlx5/port.h>
49#include <linux/mlx5/vport.h> 50#include <linux/mlx5/vport.h>
51#include <linux/list.h>
50#include <rdma/ib_smi.h> 52#include <rdma/ib_smi.h>
51#include <rdma/ib_umem.h> 53#include <rdma/ib_umem.h>
52#include <linux/in.h> 54#include <linux/in.h>
@@ -457,8 +459,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
457 int max_rq_sg; 459 int max_rq_sg;
458 int max_sq_sg; 460 int max_sq_sg;
459 u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); 461 u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
462 struct mlx5_ib_query_device_resp resp = {};
463 size_t resp_len;
464 u64 max_tso;
460 465
461 if (uhw->inlen || uhw->outlen) 466 resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length);
467 if (uhw->outlen && uhw->outlen < resp_len)
468 return -EINVAL;
469 else
470 resp.response_length = resp_len;
471
472 if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
462 return -EINVAL; 473 return -EINVAL;
463 474
464 memset(props, 0, sizeof(*props)); 475 memset(props, 0, sizeof(*props));
@@ -511,10 +522,21 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
511 if (MLX5_CAP_GEN(mdev, block_lb_mc)) 522 if (MLX5_CAP_GEN(mdev, block_lb_mc))
512 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; 523 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
513 524
514 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && 525 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
515 (MLX5_CAP_ETH(dev->mdev, csum_cap))) 526 if (MLX5_CAP_ETH(mdev, csum_cap))
516 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; 527 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
517 528
529 if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
530 max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
531 if (max_tso) {
532 resp.tso_caps.max_tso = 1 << max_tso;
533 resp.tso_caps.supported_qpts |=
534 1 << IB_QPT_RAW_PACKET;
535 resp.response_length += sizeof(resp.tso_caps);
536 }
537 }
538 }
539
518 if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { 540 if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
519 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; 541 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
520 props->device_cap_flags |= IB_DEVICE_UD_TSO; 542 props->device_cap_flags |= IB_DEVICE_UD_TSO;
@@ -576,6 +598,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
576 if (!mlx5_core_is_pf(mdev)) 598 if (!mlx5_core_is_pf(mdev))
577 props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; 599 props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
578 600
601 if (uhw->outlen) {
602 err = ib_copy_to_udata(uhw, &resp, resp.response_length);
603
604 if (err)
605 return err;
606 }
607
579 return 0; 608 return 0;
580} 609}
581 610
@@ -983,6 +1012,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
983 goto out_uars; 1012 goto out_uars;
984 } 1013 }
985 1014
1015 INIT_LIST_HEAD(&context->vma_private_list);
986 INIT_LIST_HEAD(&context->db_page_list); 1016 INIT_LIST_HEAD(&context->db_page_list);
987 mutex_init(&context->db_page_mutex); 1017 mutex_init(&context->db_page_mutex);
988 1018
@@ -992,6 +1022,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
992 if (field_avail(typeof(resp), cqe_version, udata->outlen)) 1022 if (field_avail(typeof(resp), cqe_version, udata->outlen))
993 resp.response_length += sizeof(resp.cqe_version); 1023 resp.response_length += sizeof(resp.cqe_version);
994 1024
1025 if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
1026 resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE;
1027 resp.response_length += sizeof(resp.cmds_supp_uhw);
1028 }
1029
995 /* 1030 /*
996 * We don't want to expose information from the PCI bar that is located 1031 * We don't want to expose information from the PCI bar that is located
997 * after 4096 bytes, so if the arch only supports larger pages, let's 1032 * after 4096 bytes, so if the arch only supports larger pages, let's
@@ -1006,8 +1041,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1006 offsetof(struct mlx5_init_seg, internal_timer_h) % 1041 offsetof(struct mlx5_init_seg, internal_timer_h) %
1007 PAGE_SIZE; 1042 PAGE_SIZE;
1008 resp.response_length += sizeof(resp.hca_core_clock_offset) + 1043 resp.response_length += sizeof(resp.hca_core_clock_offset) +
1009 sizeof(resp.reserved2) + 1044 sizeof(resp.reserved2);
1010 sizeof(resp.reserved3);
1011 } 1045 }
1012 1046
1013 err = ib_copy_to_udata(udata, &resp, resp.response_length); 1047 err = ib_copy_to_udata(udata, &resp, resp.response_length);
@@ -1086,6 +1120,125 @@ static int get_index(unsigned long offset)
1086 return get_arg(offset); 1120 return get_arg(offset);
1087} 1121}
1088 1122
1123static void mlx5_ib_vma_open(struct vm_area_struct *area)
1124{
1125 /* vma_open is called when a new VMA is created on top of our VMA. This
1126 * is done through either mremap flow or split_vma (usually due to
1127 * mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
1128 * as this VMA is strongly hardware related. Therefore we set the
1129 * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
1130 * calling us again and trying to do incorrect actions. We assume that
1131 * the original VMA size is exactly a single page, and therefore all
1132 * "splitting" operation will not happen to it.
1133 */
1134 area->vm_ops = NULL;
1135}
1136
1137static void mlx5_ib_vma_close(struct vm_area_struct *area)
1138{
1139 struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
1140
1141 /* It's guaranteed that all VMAs opened on a FD are closed before the
1142 * file itself is closed, therefore no sync is needed with the regular
1143 * closing flow. (e.g. mlx5 ib_dealloc_ucontext)
1144 * However need a sync with accessing the vma as part of
1145 * mlx5_ib_disassociate_ucontext.
1146 * The close operation is usually called under mm->mmap_sem except when
1147 * process is exiting.
1148 * The exiting case is handled explicitly as part of
1149 * mlx5_ib_disassociate_ucontext.
1150 */
1151 mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
1152
1153 /* setting the vma context pointer to null in the mlx5_ib driver's
1154 * private data, to protect a race condition in
1155 * mlx5_ib_disassociate_ucontext().
1156 */
1157 mlx5_ib_vma_priv_data->vma = NULL;
1158 list_del(&mlx5_ib_vma_priv_data->list);
1159 kfree(mlx5_ib_vma_priv_data);
1160}
1161
1162static const struct vm_operations_struct mlx5_ib_vm_ops = {
1163 .open = mlx5_ib_vma_open,
1164 .close = mlx5_ib_vma_close
1165};
1166
1167static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
1168 struct mlx5_ib_ucontext *ctx)
1169{
1170 struct mlx5_ib_vma_private_data *vma_prv;
1171 struct list_head *vma_head = &ctx->vma_private_list;
1172
1173 vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
1174 if (!vma_prv)
1175 return -ENOMEM;
1176
1177 vma_prv->vma = vma;
1178 vma->vm_private_data = vma_prv;
1179 vma->vm_ops = &mlx5_ib_vm_ops;
1180
1181 list_add(&vma_prv->list, vma_head);
1182
1183 return 0;
1184}
1185
1186static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
1187{
1188 int ret;
1189 struct vm_area_struct *vma;
1190 struct mlx5_ib_vma_private_data *vma_private, *n;
1191 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1192 struct task_struct *owning_process = NULL;
1193 struct mm_struct *owning_mm = NULL;
1194
1195 owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
1196 if (!owning_process)
1197 return;
1198
1199 owning_mm = get_task_mm(owning_process);
1200 if (!owning_mm) {
1201 pr_info("no mm, disassociate ucontext is pending task termination\n");
1202 while (1) {
1203 put_task_struct(owning_process);
1204 usleep_range(1000, 2000);
1205 owning_process = get_pid_task(ibcontext->tgid,
1206 PIDTYPE_PID);
1207 if (!owning_process ||
1208 owning_process->state == TASK_DEAD) {
1209 pr_info("disassociate ucontext done, task was terminated\n");
1210 /* in case task was dead need to release the
1211 * task struct.
1212 */
1213 if (owning_process)
1214 put_task_struct(owning_process);
1215 return;
1216 }
1217 }
1218 }
1219
1220 /* need to protect from a race on closing the vma as part of
1221 * mlx5_ib_vma_close.
1222 */
1223 down_read(&owning_mm->mmap_sem);
1224 list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
1225 list) {
1226 vma = vma_private->vma;
1227 ret = zap_vma_ptes(vma, vma->vm_start,
1228 PAGE_SIZE);
1229 WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__);
1230 /* context going to be destroyed, should
1231 * not access ops any more.
1232 */
1233 vma->vm_ops = NULL;
1234 list_del(&vma_private->list);
1235 kfree(vma_private);
1236 }
1237 up_read(&owning_mm->mmap_sem);
1238 mmput(owning_mm);
1239 put_task_struct(owning_process);
1240}
1241
1089static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) 1242static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
1090{ 1243{
1091 switch (cmd) { 1244 switch (cmd) {
@@ -1101,8 +1254,10 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
1101} 1254}
1102 1255
1103static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, 1256static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
1104 struct vm_area_struct *vma, struct mlx5_uuar_info *uuari) 1257 struct vm_area_struct *vma,
1258 struct mlx5_ib_ucontext *context)
1105{ 1259{
1260 struct mlx5_uuar_info *uuari = &context->uuari;
1106 int err; 1261 int err;
1107 unsigned long idx; 1262 unsigned long idx;
1108 phys_addr_t pfn, pa; 1263 phys_addr_t pfn, pa;
@@ -1152,14 +1307,13 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
1152 mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd), 1307 mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
1153 vma->vm_start, &pa); 1308 vma->vm_start, &pa);
1154 1309
1155 return 0; 1310 return mlx5_ib_set_vma_data(vma, context);
1156} 1311}
1157 1312
1158static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) 1313static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
1159{ 1314{
1160 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 1315 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1161 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 1316 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1162 struct mlx5_uuar_info *uuari = &context->uuari;
1163 unsigned long command; 1317 unsigned long command;
1164 phys_addr_t pfn; 1318 phys_addr_t pfn;
1165 1319
@@ -1168,7 +1322,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
1168 case MLX5_IB_MMAP_WC_PAGE: 1322 case MLX5_IB_MMAP_WC_PAGE:
1169 case MLX5_IB_MMAP_NC_PAGE: 1323 case MLX5_IB_MMAP_NC_PAGE:
1170 case MLX5_IB_MMAP_REGULAR_PAGE: 1324 case MLX5_IB_MMAP_REGULAR_PAGE:
1171 return uar_mmap(dev, command, vma, uuari); 1325 return uar_mmap(dev, command, vma, context);
1172 1326
1173 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: 1327 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
1174 return -ENOSYS; 1328 return -ENOSYS;
@@ -1331,6 +1485,32 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
1331 &ib_spec->ipv4.val.dst_ip, 1485 &ib_spec->ipv4.val.dst_ip,
1332 sizeof(ib_spec->ipv4.val.dst_ip)); 1486 sizeof(ib_spec->ipv4.val.dst_ip));
1333 break; 1487 break;
1488 case IB_FLOW_SPEC_IPV6:
1489 if (ib_spec->size != sizeof(ib_spec->ipv6))
1490 return -EINVAL;
1491
1492 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
1493 ethertype, 0xffff);
1494 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
1495 ethertype, ETH_P_IPV6);
1496
1497 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
1498 src_ipv4_src_ipv6.ipv6_layout.ipv6),
1499 &ib_spec->ipv6.mask.src_ip,
1500 sizeof(ib_spec->ipv6.mask.src_ip));
1501 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
1502 src_ipv4_src_ipv6.ipv6_layout.ipv6),
1503 &ib_spec->ipv6.val.src_ip,
1504 sizeof(ib_spec->ipv6.val.src_ip));
1505 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
1506 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1507 &ib_spec->ipv6.mask.dst_ip,
1508 sizeof(ib_spec->ipv6.mask.dst_ip));
1509 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
1510 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1511 &ib_spec->ipv6.val.dst_ip,
1512 sizeof(ib_spec->ipv6.val.dst_ip));
1513 break;
1334 case IB_FLOW_SPEC_TCP: 1514 case IB_FLOW_SPEC_TCP:
1335 if (ib_spec->size != sizeof(ib_spec->tcp_udp)) 1515 if (ib_spec->size != sizeof(ib_spec->tcp_udp))
1336 return -EINVAL; 1516 return -EINVAL;
@@ -1801,15 +1981,6 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
1801 return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); 1981 return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
1802} 1982}
1803 1983
1804static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1805 char *buf)
1806{
1807 struct mlx5_ib_dev *dev =
1808 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1809 return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
1810 fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
1811}
1812
1813static ssize_t show_rev(struct device *device, struct device_attribute *attr, 1984static ssize_t show_rev(struct device *device, struct device_attribute *attr,
1814 char *buf) 1985 char *buf)
1815{ 1986{
@@ -1828,7 +1999,6 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr,
1828} 1999}
1829 2000
1830static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 2001static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
1831static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
1832static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 2002static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
1833static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 2003static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
1834static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); 2004static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
@@ -1836,7 +2006,6 @@ static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
1836 2006
1837static struct device_attribute *mlx5_class_attributes[] = { 2007static struct device_attribute *mlx5_class_attributes[] = {
1838 &dev_attr_hw_rev, 2008 &dev_attr_hw_rev,
1839 &dev_attr_fw_ver,
1840 &dev_attr_hca_type, 2009 &dev_attr_hca_type,
1841 &dev_attr_board_id, 2010 &dev_attr_board_id,
1842 &dev_attr_fw_pages, 2011 &dev_attr_fw_pages,
@@ -1854,6 +2023,65 @@ static void pkey_change_handler(struct work_struct *work)
1854 mutex_unlock(&ports->devr->mutex); 2023 mutex_unlock(&ports->devr->mutex);
1855} 2024}
1856 2025
2026static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
2027{
2028 struct mlx5_ib_qp *mqp;
2029 struct mlx5_ib_cq *send_mcq, *recv_mcq;
2030 struct mlx5_core_cq *mcq;
2031 struct list_head cq_armed_list;
2032 unsigned long flags_qp;
2033 unsigned long flags_cq;
2034 unsigned long flags;
2035
2036 INIT_LIST_HEAD(&cq_armed_list);
2037
2038 /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
2039 spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
2040 list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
2041 spin_lock_irqsave(&mqp->sq.lock, flags_qp);
2042 if (mqp->sq.tail != mqp->sq.head) {
2043 send_mcq = to_mcq(mqp->ibqp.send_cq);
2044 spin_lock_irqsave(&send_mcq->lock, flags_cq);
2045 if (send_mcq->mcq.comp &&
2046 mqp->ibqp.send_cq->comp_handler) {
2047 if (!send_mcq->mcq.reset_notify_added) {
2048 send_mcq->mcq.reset_notify_added = 1;
2049 list_add_tail(&send_mcq->mcq.reset_notify,
2050 &cq_armed_list);
2051 }
2052 }
2053 spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
2054 }
2055 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
2056 spin_lock_irqsave(&mqp->rq.lock, flags_qp);
2057 /* no handling is needed for SRQ */
2058 if (!mqp->ibqp.srq) {
2059 if (mqp->rq.tail != mqp->rq.head) {
2060 recv_mcq = to_mcq(mqp->ibqp.recv_cq);
2061 spin_lock_irqsave(&recv_mcq->lock, flags_cq);
2062 if (recv_mcq->mcq.comp &&
2063 mqp->ibqp.recv_cq->comp_handler) {
2064 if (!recv_mcq->mcq.reset_notify_added) {
2065 recv_mcq->mcq.reset_notify_added = 1;
2066 list_add_tail(&recv_mcq->mcq.reset_notify,
2067 &cq_armed_list);
2068 }
2069 }
2070 spin_unlock_irqrestore(&recv_mcq->lock,
2071 flags_cq);
2072 }
2073 }
2074 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
2075 }
2076 /*At that point all inflight post send were put to be executed as of we
2077 * lock/unlock above locks Now need to arm all involved CQs.
2078 */
2079 list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
2080 mcq->comp(mcq);
2081 }
2082 spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
2083}
2084
1857static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, 2085static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1858 enum mlx5_dev_event event, unsigned long param) 2086 enum mlx5_dev_event event, unsigned long param)
1859{ 2087{
@@ -1866,6 +2094,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1866 case MLX5_DEV_EVENT_SYS_ERROR: 2094 case MLX5_DEV_EVENT_SYS_ERROR:
1867 ibdev->ib_active = false; 2095 ibdev->ib_active = false;
1868 ibev.event = IB_EVENT_DEVICE_FATAL; 2096 ibev.event = IB_EVENT_DEVICE_FATAL;
2097 mlx5_ib_handle_internal_error(ibdev);
1869 break; 2098 break;
1870 2099
1871 case MLX5_DEV_EVENT_PORT_UP: 2100 case MLX5_DEV_EVENT_PORT_UP:
@@ -2272,6 +2501,15 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
2272 return 0; 2501 return 0;
2273} 2502}
2274 2503
2504static void get_dev_fw_str(struct ib_device *ibdev, char *str,
2505 size_t str_len)
2506{
2507 struct mlx5_ib_dev *dev =
2508 container_of(ibdev, struct mlx5_ib_dev, ib_dev);
2509 snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev),
2510 fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
2511}
2512
2275static int mlx5_enable_roce(struct mlx5_ib_dev *dev) 2513static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
2276{ 2514{
2277 int err; 2515 int err;
@@ -2298,6 +2536,113 @@ static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
2298 unregister_netdevice_notifier(&dev->roce.nb); 2536 unregister_netdevice_notifier(&dev->roce.nb);
2299} 2537}
2300 2538
2539static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
2540{
2541 unsigned int i;
2542
2543 for (i = 0; i < dev->num_ports; i++)
2544 mlx5_core_dealloc_q_counter(dev->mdev,
2545 dev->port[i].q_cnt_id);
2546}
2547
2548static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
2549{
2550 int i;
2551 int ret;
2552
2553 for (i = 0; i < dev->num_ports; i++) {
2554 ret = mlx5_core_alloc_q_counter(dev->mdev,
2555 &dev->port[i].q_cnt_id);
2556 if (ret) {
2557 mlx5_ib_warn(dev,
2558 "couldn't allocate queue counter for port %d, err %d\n",
2559 i + 1, ret);
2560 goto dealloc_counters;
2561 }
2562 }
2563
2564 return 0;
2565
2566dealloc_counters:
2567 while (--i >= 0)
2568 mlx5_core_dealloc_q_counter(dev->mdev,
2569 dev->port[i].q_cnt_id);
2570
2571 return ret;
2572}
2573
2574static const char * const names[] = {
2575 "rx_write_requests",
2576 "rx_read_requests",
2577 "rx_atomic_requests",
2578 "out_of_buffer",
2579 "out_of_sequence",
2580 "duplicate_request",
2581 "rnr_nak_retry_err",
2582 "packet_seq_err",
2583 "implied_nak_seq_err",
2584 "local_ack_timeout_err",
2585};
2586
2587static const size_t stats_offsets[] = {
2588 MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
2589 MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
2590 MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
2591 MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
2592 MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
2593 MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
2594 MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
2595 MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
2596 MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
2597 MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
2598};
2599
2600static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
2601 u8 port_num)
2602{
2603 BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets));
2604
2605 /* We support only per port stats */
2606 if (port_num == 0)
2607 return NULL;
2608
2609 return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names),
2610 RDMA_HW_STATS_DEFAULT_LIFESPAN);
2611}
2612
2613static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
2614 struct rdma_hw_stats *stats,
2615 u8 port, int index)
2616{
2617 struct mlx5_ib_dev *dev = to_mdev(ibdev);
2618 int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
2619 void *out;
2620 __be32 val;
2621 int ret;
2622 int i;
2623
2624 if (!port || !stats)
2625 return -ENOSYS;
2626
2627 out = mlx5_vzalloc(outlen);
2628 if (!out)
2629 return -ENOMEM;
2630
2631 ret = mlx5_core_query_q_counter(dev->mdev,
2632 dev->port[port - 1].q_cnt_id, 0,
2633 out, outlen);
2634 if (ret)
2635 goto free;
2636
2637 for (i = 0; i < ARRAY_SIZE(names); i++) {
2638 val = *(__be32 *)(out + stats_offsets[i]);
2639 stats->value[i] = (u64)be32_to_cpu(val);
2640 }
2641free:
2642 kvfree(out);
2643 return ARRAY_SIZE(names);
2644}
2645
2301static void *mlx5_ib_add(struct mlx5_core_dev *mdev) 2646static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2302{ 2647{
2303 struct mlx5_ib_dev *dev; 2648 struct mlx5_ib_dev *dev;
@@ -2320,10 +2665,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2320 2665
2321 dev->mdev = mdev; 2666 dev->mdev = mdev;
2322 2667
2668 dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
2669 GFP_KERNEL);
2670 if (!dev->port)
2671 goto err_dealloc;
2672
2323 rwlock_init(&dev->roce.netdev_lock); 2673 rwlock_init(&dev->roce.netdev_lock);
2324 err = get_port_caps(dev); 2674 err = get_port_caps(dev);
2325 if (err) 2675 if (err)
2326 goto err_dealloc; 2676 goto err_free_port;
2327 2677
2328 if (mlx5_use_mad_ifc(dev)) 2678 if (mlx5_use_mad_ifc(dev))
2329 get_ext_port_caps(dev); 2679 get_ext_port_caps(dev);
@@ -2418,6 +2768,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2418 dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; 2768 dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
2419 dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; 2769 dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
2420 dev->ib_dev.get_port_immutable = mlx5_port_immutable; 2770 dev->ib_dev.get_port_immutable = mlx5_port_immutable;
2771 dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
2421 if (mlx5_core_is_pf(mdev)) { 2772 if (mlx5_core_is_pf(mdev)) {
2422 dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; 2773 dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
2423 dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state; 2774 dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
@@ -2425,6 +2776,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2425 dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid; 2776 dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid;
2426 } 2777 }
2427 2778
2779 dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext;
2780
2428 mlx5_ib_internal_fill_odp_caps(dev); 2781 mlx5_ib_internal_fill_odp_caps(dev);
2429 2782
2430 if (MLX5_CAP_GEN(mdev, imaicl)) { 2783 if (MLX5_CAP_GEN(mdev, imaicl)) {
@@ -2435,6 +2788,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2435 (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); 2788 (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
2436 } 2789 }
2437 2790
2791 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
2792 MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
2793 dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
2794 dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
2795 }
2796
2438 if (MLX5_CAP_GEN(mdev, xrc)) { 2797 if (MLX5_CAP_GEN(mdev, xrc)) {
2439 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; 2798 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
2440 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; 2799 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
@@ -2447,9 +2806,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2447 IB_LINK_LAYER_ETHERNET) { 2806 IB_LINK_LAYER_ETHERNET) {
2448 dev->ib_dev.create_flow = mlx5_ib_create_flow; 2807 dev->ib_dev.create_flow = mlx5_ib_create_flow;
2449 dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; 2808 dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
2809 dev->ib_dev.create_wq = mlx5_ib_create_wq;
2810 dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
2811 dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
2812 dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
2813 dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
2450 dev->ib_dev.uverbs_ex_cmd_mask |= 2814 dev->ib_dev.uverbs_ex_cmd_mask |=
2451 (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | 2815 (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
2452 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); 2816 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) |
2817 (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
2818 (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
2819 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
2820 (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
2821 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
2453 } 2822 }
2454 err = init_node_data(dev); 2823 err = init_node_data(dev);
2455 if (err) 2824 if (err)
@@ -2457,6 +2826,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2457 2826
2458 mutex_init(&dev->flow_db.lock); 2827 mutex_init(&dev->flow_db.lock);
2459 mutex_init(&dev->cap_mask_mutex); 2828 mutex_init(&dev->cap_mask_mutex);
2829 INIT_LIST_HEAD(&dev->qp_list);
2830 spin_lock_init(&dev->reset_flow_resource_lock);
2460 2831
2461 if (ll == IB_LINK_LAYER_ETHERNET) { 2832 if (ll == IB_LINK_LAYER_ETHERNET) {
2462 err = mlx5_enable_roce(dev); 2833 err = mlx5_enable_roce(dev);
@@ -2472,10 +2843,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2472 if (err) 2843 if (err)
2473 goto err_rsrc; 2844 goto err_rsrc;
2474 2845
2475 err = ib_register_device(&dev->ib_dev, NULL); 2846 err = mlx5_ib_alloc_q_counters(dev);
2476 if (err) 2847 if (err)
2477 goto err_odp; 2848 goto err_odp;
2478 2849
2850 err = ib_register_device(&dev->ib_dev, NULL);
2851 if (err)
2852 goto err_q_cnt;
2853
2479 err = create_umr_res(dev); 2854 err = create_umr_res(dev);
2480 if (err) 2855 if (err)
2481 goto err_dev; 2856 goto err_dev;
@@ -2497,6 +2872,9 @@ err_umrc:
2497err_dev: 2872err_dev:
2498 ib_unregister_device(&dev->ib_dev); 2873 ib_unregister_device(&dev->ib_dev);
2499 2874
2875err_q_cnt:
2876 mlx5_ib_dealloc_q_counters(dev);
2877
2500err_odp: 2878err_odp:
2501 mlx5_ib_odp_remove_one(dev); 2879 mlx5_ib_odp_remove_one(dev);
2502 2880
@@ -2507,6 +2885,9 @@ err_disable_roce:
2507 if (ll == IB_LINK_LAYER_ETHERNET) 2885 if (ll == IB_LINK_LAYER_ETHERNET)
2508 mlx5_disable_roce(dev); 2886 mlx5_disable_roce(dev);
2509 2887
2888err_free_port:
2889 kfree(dev->port);
2890
2510err_dealloc: 2891err_dealloc:
2511 ib_dealloc_device((struct ib_device *)dev); 2892 ib_dealloc_device((struct ib_device *)dev);
2512 2893
@@ -2519,11 +2900,13 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
2519 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); 2900 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
2520 2901
2521 ib_unregister_device(&dev->ib_dev); 2902 ib_unregister_device(&dev->ib_dev);
2903 mlx5_ib_dealloc_q_counters(dev);
2522 destroy_umrc_res(dev); 2904 destroy_umrc_res(dev);
2523 mlx5_ib_odp_remove_one(dev); 2905 mlx5_ib_odp_remove_one(dev);
2524 destroy_dev_resources(&dev->devr); 2906 destroy_dev_resources(&dev->devr);
2525 if (ll == IB_LINK_LAYER_ETHERNET) 2907 if (ll == IB_LINK_LAYER_ETHERNET)
2526 mlx5_disable_roce(dev); 2908 mlx5_disable_roce(dev);
2909 kfree(dev->port);
2527 ib_dealloc_device(&dev->ib_dev); 2910 ib_dealloc_device(&dev->ib_dev);
2528} 2911}
2529 2912
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index c4a9825828bc..372385d0f993 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -105,6 +105,11 @@ enum {
105 MLX5_CQE_VERSION_V1, 105 MLX5_CQE_VERSION_V1,
106}; 106};
107 107
108struct mlx5_ib_vma_private_data {
109 struct list_head list;
110 struct vm_area_struct *vma;
111};
112
108struct mlx5_ib_ucontext { 113struct mlx5_ib_ucontext {
109 struct ib_ucontext ibucontext; 114 struct ib_ucontext ibucontext;
110 struct list_head db_page_list; 115 struct list_head db_page_list;
@@ -116,6 +121,7 @@ struct mlx5_ib_ucontext {
116 u8 cqe_version; 121 u8 cqe_version;
117 /* Transport Domain number */ 122 /* Transport Domain number */
118 u32 tdn; 123 u32 tdn;
124 struct list_head vma_private_list;
119}; 125};
120 126
121static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) 127static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -217,12 +223,41 @@ struct mlx5_ib_wq {
217 void *qend; 223 void *qend;
218}; 224};
219 225
226struct mlx5_ib_rwq {
227 struct ib_wq ibwq;
228 u32 rqn;
229 u32 rq_num_pas;
230 u32 log_rq_stride;
231 u32 log_rq_size;
232 u32 rq_page_offset;
233 u32 log_page_size;
234 struct ib_umem *umem;
235 size_t buf_size;
236 unsigned int page_shift;
237 int create_type;
238 struct mlx5_db db;
239 u32 user_index;
240 u32 wqe_count;
241 u32 wqe_shift;
242 int wq_sig;
243};
244
220enum { 245enum {
221 MLX5_QP_USER, 246 MLX5_QP_USER,
222 MLX5_QP_KERNEL, 247 MLX5_QP_KERNEL,
223 MLX5_QP_EMPTY 248 MLX5_QP_EMPTY
224}; 249};
225 250
251enum {
252 MLX5_WQ_USER,
253 MLX5_WQ_KERNEL
254};
255
256struct mlx5_ib_rwq_ind_table {
257 struct ib_rwq_ind_table ib_rwq_ind_tbl;
258 u32 rqtn;
259};
260
226/* 261/*
227 * Connect-IB can trigger up to four concurrent pagefaults 262 * Connect-IB can trigger up to four concurrent pagefaults
228 * per-QP. 263 * per-QP.
@@ -266,6 +301,10 @@ struct mlx5_ib_qp_trans {
266 u8 resp_depth; 301 u8 resp_depth;
267}; 302};
268 303
304struct mlx5_ib_rss_qp {
305 u32 tirn;
306};
307
269struct mlx5_ib_rq { 308struct mlx5_ib_rq {
270 struct mlx5_ib_qp_base base; 309 struct mlx5_ib_qp_base base;
271 struct mlx5_ib_wq *rq; 310 struct mlx5_ib_wq *rq;
@@ -294,6 +333,7 @@ struct mlx5_ib_qp {
294 union { 333 union {
295 struct mlx5_ib_qp_trans trans_qp; 334 struct mlx5_ib_qp_trans trans_qp;
296 struct mlx5_ib_raw_packet_qp raw_packet_qp; 335 struct mlx5_ib_raw_packet_qp raw_packet_qp;
336 struct mlx5_ib_rss_qp rss_qp;
297 }; 337 };
298 struct mlx5_buf buf; 338 struct mlx5_buf buf;
299 339
@@ -340,6 +380,9 @@ struct mlx5_ib_qp {
340 spinlock_t disable_page_faults_lock; 380 spinlock_t disable_page_faults_lock;
341 struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS]; 381 struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
342#endif 382#endif
383 struct list_head qps_list;
384 struct list_head cq_recv_list;
385 struct list_head cq_send_list;
343}; 386};
344 387
345struct mlx5_ib_cq_buf { 388struct mlx5_ib_cq_buf {
@@ -401,6 +444,8 @@ struct mlx5_ib_cq {
401 struct mlx5_ib_cq_buf *resize_buf; 444 struct mlx5_ib_cq_buf *resize_buf;
402 struct ib_umem *resize_umem; 445 struct ib_umem *resize_umem;
403 int cqe_size; 446 int cqe_size;
447 struct list_head list_send_qp;
448 struct list_head list_recv_qp;
404 u32 create_flags; 449 u32 create_flags;
405 struct list_head wc_list; 450 struct list_head wc_list;
406 enum ib_cq_notify_flags notify_flags; 451 enum ib_cq_notify_flags notify_flags;
@@ -546,6 +591,10 @@ struct mlx5_ib_resources {
546 struct mutex mutex; 591 struct mutex mutex;
547}; 592};
548 593
594struct mlx5_ib_port {
595 u16 q_cnt_id;
596};
597
549struct mlx5_roce { 598struct mlx5_roce {
550 /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL 599 /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
551 * netdev pointer 600 * netdev pointer
@@ -581,6 +630,11 @@ struct mlx5_ib_dev {
581 struct srcu_struct mr_srcu; 630 struct srcu_struct mr_srcu;
582#endif 631#endif
583 struct mlx5_ib_flow_db flow_db; 632 struct mlx5_ib_flow_db flow_db;
633 /* protect resources needed as part of reset flow */
634 spinlock_t reset_flow_resource_lock;
635 struct list_head qp_list;
636 /* Array with num_ports elements */
637 struct mlx5_ib_port *port;
584}; 638};
585 639
586static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) 640static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -628,6 +682,16 @@ static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp)
628 return container_of(ibqp, struct mlx5_ib_qp, ibqp); 682 return container_of(ibqp, struct mlx5_ib_qp, ibqp);
629} 683}
630 684
685static inline struct mlx5_ib_rwq *to_mrwq(struct ib_wq *ibwq)
686{
687 return container_of(ibwq, struct mlx5_ib_rwq, ibwq);
688}
689
690static inline struct mlx5_ib_rwq_ind_table *to_mrwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
691{
692 return container_of(ib_rwq_ind_tbl, struct mlx5_ib_rwq_ind_table, ib_rwq_ind_tbl);
693}
694
631static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq) 695static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
632{ 696{
633 return container_of(msrq, struct mlx5_ib_srq, msrq); 697 return container_of(msrq, struct mlx5_ib_srq, msrq);
@@ -762,6 +826,16 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
762int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); 826int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
763int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 827int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
764 struct ib_mr_status *mr_status); 828 struct ib_mr_status *mr_status);
829struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
830 struct ib_wq_init_attr *init_attr,
831 struct ib_udata *udata);
832int mlx5_ib_destroy_wq(struct ib_wq *wq);
833int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
834 u32 wq_attr_mask, struct ib_udata *udata);
835struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
836 struct ib_rwq_ind_table_init_attr *init_attr,
837 struct ib_udata *udata);
838int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
765 839
766#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 840#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
767extern struct workqueue_struct *mlx5_ib_page_fault_wq; 841extern struct workqueue_struct *mlx5_ib_page_fault_wq;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 8cf2ce50511f..4b021305c321 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1193,12 +1193,16 @@ error:
1193 1193
1194static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1194static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1195{ 1195{
1196 struct mlx5_core_dev *mdev = dev->mdev;
1196 struct umr_common *umrc = &dev->umrc; 1197 struct umr_common *umrc = &dev->umrc;
1197 struct mlx5_ib_umr_context umr_context; 1198 struct mlx5_ib_umr_context umr_context;
1198 struct mlx5_umr_wr umrwr = {}; 1199 struct mlx5_umr_wr umrwr = {};
1199 struct ib_send_wr *bad; 1200 struct ib_send_wr *bad;
1200 int err; 1201 int err;
1201 1202
1203 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
1204 return 0;
1205
1202 mlx5_ib_init_umr_context(&umr_context); 1206 mlx5_ib_init_umr_context(&umr_context);
1203 1207
1204 umrwr.wr.wr_cqe = &umr_context.cqe; 1208 umrwr.wr.wr_cqe = &umr_context.cqe;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index ce0a7ab35a22..0dd7d93cac95 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -77,6 +77,10 @@ struct mlx5_wqe_eth_pad {
77 u8 rsvd0[16]; 77 u8 rsvd0[16];
78}; 78};
79 79
80static void get_cqs(enum ib_qp_type qp_type,
81 struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
82 struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq);
83
80static int is_qp0(enum ib_qp_type qp_type) 84static int is_qp0(enum ib_qp_type qp_type)
81{ 85{
82 return qp_type == IB_QPT_SMI; 86 return qp_type == IB_QPT_SMI;
@@ -609,6 +613,11 @@ static int to_mlx5_st(enum ib_qp_type type)
609 } 613 }
610} 614}
611 615
616static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
617 struct mlx5_ib_cq *recv_cq);
618static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
619 struct mlx5_ib_cq *recv_cq);
620
612static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn) 621static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
613{ 622{
614 return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index; 623 return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
@@ -649,6 +658,71 @@ err_umem:
649 return err; 658 return err;
650} 659}
651 660
661static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq)
662{
663 struct mlx5_ib_ucontext *context;
664
665 context = to_mucontext(pd->uobject->context);
666 mlx5_ib_db_unmap_user(context, &rwq->db);
667 if (rwq->umem)
668 ib_umem_release(rwq->umem);
669}
670
671static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
672 struct mlx5_ib_rwq *rwq,
673 struct mlx5_ib_create_wq *ucmd)
674{
675 struct mlx5_ib_ucontext *context;
676 int page_shift = 0;
677 int npages;
678 u32 offset = 0;
679 int ncont = 0;
680 int err;
681
682 if (!ucmd->buf_addr)
683 return -EINVAL;
684
685 context = to_mucontext(pd->uobject->context);
686 rwq->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr,
687 rwq->buf_size, 0, 0);
688 if (IS_ERR(rwq->umem)) {
689 mlx5_ib_dbg(dev, "umem_get failed\n");
690 err = PTR_ERR(rwq->umem);
691 return err;
692 }
693
694 mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift,
695 &ncont, NULL);
696 err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift,
697 &rwq->rq_page_offset);
698 if (err) {
699 mlx5_ib_warn(dev, "bad offset\n");
700 goto err_umem;
701 }
702
703 rwq->rq_num_pas = ncont;
704 rwq->page_shift = page_shift;
705 rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
706 rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE);
707
708 mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n",
709 (unsigned long long)ucmd->buf_addr, rwq->buf_size,
710 npages, page_shift, ncont, offset);
711
712 err = mlx5_ib_db_map_user(context, ucmd->db_addr, &rwq->db);
713 if (err) {
714 mlx5_ib_dbg(dev, "map failed\n");
715 goto err_umem;
716 }
717
718 rwq->create_type = MLX5_WQ_USER;
719 return 0;
720
721err_umem:
722 ib_umem_release(rwq->umem);
723 return err;
724}
725
652static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, 726static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
653 struct mlx5_ib_qp *qp, struct ib_udata *udata, 727 struct mlx5_ib_qp *qp, struct ib_udata *udata,
654 struct ib_qp_init_attr *attr, 728 struct ib_qp_init_attr *attr,
@@ -1201,6 +1275,187 @@ static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
1201 rq->doorbell = &qp->db; 1275 rq->doorbell = &qp->db;
1202} 1276}
1203 1277
1278static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
1279{
1280 mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn);
1281}
1282
1283static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1284 struct ib_pd *pd,
1285 struct ib_qp_init_attr *init_attr,
1286 struct ib_udata *udata)
1287{
1288 struct ib_uobject *uobj = pd->uobject;
1289 struct ib_ucontext *ucontext = uobj->context;
1290 struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
1291 struct mlx5_ib_create_qp_resp resp = {};
1292 int inlen;
1293 int err;
1294 u32 *in;
1295 void *tirc;
1296 void *hfso;
1297 u32 selected_fields = 0;
1298 size_t min_resp_len;
1299 u32 tdn = mucontext->tdn;
1300 struct mlx5_ib_create_qp_rss ucmd = {};
1301 size_t required_cmd_sz;
1302
1303 if (init_attr->qp_type != IB_QPT_RAW_PACKET)
1304 return -EOPNOTSUPP;
1305
1306 if (init_attr->create_flags || init_attr->send_cq)
1307 return -EINVAL;
1308
1309 min_resp_len = offsetof(typeof(resp), uuar_index) + sizeof(resp.uuar_index);
1310 if (udata->outlen < min_resp_len)
1311 return -EINVAL;
1312
1313 required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1);
1314 if (udata->inlen < required_cmd_sz) {
1315 mlx5_ib_dbg(dev, "invalid inlen\n");
1316 return -EINVAL;
1317 }
1318
1319 if (udata->inlen > sizeof(ucmd) &&
1320 !ib_is_udata_cleared(udata, sizeof(ucmd),
1321 udata->inlen - sizeof(ucmd))) {
1322 mlx5_ib_dbg(dev, "inlen is not supported\n");
1323 return -EOPNOTSUPP;
1324 }
1325
1326 if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
1327 mlx5_ib_dbg(dev, "copy failed\n");
1328 return -EFAULT;
1329 }
1330
1331 if (ucmd.comp_mask) {
1332 mlx5_ib_dbg(dev, "invalid comp mask\n");
1333 return -EOPNOTSUPP;
1334 }
1335
1336 if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)) || ucmd.reserved1) {
1337 mlx5_ib_dbg(dev, "invalid reserved\n");
1338 return -EOPNOTSUPP;
1339 }
1340
1341 err = ib_copy_to_udata(udata, &resp, min_resp_len);
1342 if (err) {
1343 mlx5_ib_dbg(dev, "copy failed\n");
1344 return -EINVAL;
1345 }
1346
1347 inlen = MLX5_ST_SZ_BYTES(create_tir_in);
1348 in = mlx5_vzalloc(inlen);
1349 if (!in)
1350 return -ENOMEM;
1351
1352 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1353 MLX5_SET(tirc, tirc, disp_type,
1354 MLX5_TIRC_DISP_TYPE_INDIRECT);
1355 MLX5_SET(tirc, tirc, indirect_table,
1356 init_attr->rwq_ind_tbl->ind_tbl_num);
1357 MLX5_SET(tirc, tirc, transport_domain, tdn);
1358
1359 hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1360 switch (ucmd.rx_hash_function) {
1361 case MLX5_RX_HASH_FUNC_TOEPLITZ:
1362 {
1363 void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1364 size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
1365
1366 if (len != ucmd.rx_key_len) {
1367 err = -EINVAL;
1368 goto err;
1369 }
1370
1371 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1372 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1373 memcpy(rss_key, ucmd.rx_hash_key, len);
1374 break;
1375 }
1376 default:
1377 err = -EOPNOTSUPP;
1378 goto err;
1379 }
1380
1381 if (!ucmd.rx_hash_fields_mask) {
1382 /* special case when this TIR serves as steering entry without hashing */
1383 if (!init_attr->rwq_ind_tbl->log_ind_tbl_size)
1384 goto create_tir;
1385 err = -EINVAL;
1386 goto err;
1387 }
1388
1389 if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
1390 (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) &&
1391 ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
1392 (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) {
1393 err = -EINVAL;
1394 goto err;
1395 }
1396
1397 /* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */
1398 if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
1399 (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4))
1400 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1401 MLX5_L3_PROT_TYPE_IPV4);
1402 else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
1403 (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
1404 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1405 MLX5_L3_PROT_TYPE_IPV6);
1406
1407 if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
1408 (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) &&
1409 ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
1410 (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) {
1411 err = -EINVAL;
1412 goto err;
1413 }
1414
1415 /* If none of TCP & UDP SRC/DST was set - this bit field is ignored */
1416 if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
1417 (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
1418 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1419 MLX5_L4_PROT_TYPE_TCP);
1420 else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
1421 (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
1422 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1423 MLX5_L4_PROT_TYPE_UDP);
1424
1425 if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
1426 (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6))
1427 selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP;
1428
1429 if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) ||
1430 (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
1431 selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP;
1432
1433 if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
1434 (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP))
1435 selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT;
1436
1437 if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) ||
1438 (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
1439 selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT;
1440
1441 MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
1442
1443create_tir:
1444 err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
1445
1446 if (err)
1447 goto err;
1448
1449 kvfree(in);
1450 /* qpn is reserved for that QP */
1451 qp->trans_qp.base.mqp.qpn = 0;
1452 return 0;
1453
1454err:
1455 kvfree(in);
1456 return err;
1457}
1458
1204static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, 1459static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1205 struct ib_qp_init_attr *init_attr, 1460 struct ib_qp_init_attr *init_attr,
1206 struct ib_udata *udata, struct mlx5_ib_qp *qp) 1461 struct ib_udata *udata, struct mlx5_ib_qp *qp)
@@ -1211,6 +1466,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1211 struct mlx5_ib_create_qp_resp resp; 1466 struct mlx5_ib_create_qp_resp resp;
1212 struct mlx5_create_qp_mbox_in *in; 1467 struct mlx5_create_qp_mbox_in *in;
1213 struct mlx5_ib_create_qp ucmd; 1468 struct mlx5_ib_create_qp ucmd;
1469 struct mlx5_ib_cq *send_cq;
1470 struct mlx5_ib_cq *recv_cq;
1471 unsigned long flags;
1214 int inlen = sizeof(*in); 1472 int inlen = sizeof(*in);
1215 int err; 1473 int err;
1216 u32 uidx = MLX5_IB_DEFAULT_UIDX; 1474 u32 uidx = MLX5_IB_DEFAULT_UIDX;
@@ -1227,6 +1485,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1227 spin_lock_init(&qp->sq.lock); 1485 spin_lock_init(&qp->sq.lock);
1228 spin_lock_init(&qp->rq.lock); 1486 spin_lock_init(&qp->rq.lock);
1229 1487
1488 if (init_attr->rwq_ind_tbl) {
1489 if (!udata)
1490 return -ENOSYS;
1491
1492 err = create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata);
1493 return err;
1494 }
1495
1230 if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { 1496 if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
1231 if (!MLX5_CAP_GEN(mdev, block_lb_mc)) { 1497 if (!MLX5_CAP_GEN(mdev, block_lb_mc)) {
1232 mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); 1498 mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
@@ -1460,6 +1726,23 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1460 base->container_mibqp = qp; 1726 base->container_mibqp = qp;
1461 base->mqp.event = mlx5_ib_qp_event; 1727 base->mqp.event = mlx5_ib_qp_event;
1462 1728
1729 get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq,
1730 &send_cq, &recv_cq);
1731 spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
1732 mlx5_ib_lock_cqs(send_cq, recv_cq);
1733 /* Maintain device to QPs access, needed for further handling via reset
1734 * flow
1735 */
1736 list_add_tail(&qp->qps_list, &dev->qp_list);
1737 /* Maintain CQ to QPs access, needed for further handling via reset flow
1738 */
1739 if (send_cq)
1740 list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
1741 if (recv_cq)
1742 list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
1743 mlx5_ib_unlock_cqs(send_cq, recv_cq);
1744 spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
1745
1463 return 0; 1746 return 0;
1464 1747
1465err_create: 1748err_create:
@@ -1478,23 +1761,23 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv
1478 if (send_cq) { 1761 if (send_cq) {
1479 if (recv_cq) { 1762 if (recv_cq) {
1480 if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { 1763 if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
1481 spin_lock_irq(&send_cq->lock); 1764 spin_lock(&send_cq->lock);
1482 spin_lock_nested(&recv_cq->lock, 1765 spin_lock_nested(&recv_cq->lock,
1483 SINGLE_DEPTH_NESTING); 1766 SINGLE_DEPTH_NESTING);
1484 } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { 1767 } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
1485 spin_lock_irq(&send_cq->lock); 1768 spin_lock(&send_cq->lock);
1486 __acquire(&recv_cq->lock); 1769 __acquire(&recv_cq->lock);
1487 } else { 1770 } else {
1488 spin_lock_irq(&recv_cq->lock); 1771 spin_lock(&recv_cq->lock);
1489 spin_lock_nested(&send_cq->lock, 1772 spin_lock_nested(&send_cq->lock,
1490 SINGLE_DEPTH_NESTING); 1773 SINGLE_DEPTH_NESTING);
1491 } 1774 }
1492 } else { 1775 } else {
1493 spin_lock_irq(&send_cq->lock); 1776 spin_lock(&send_cq->lock);
1494 __acquire(&recv_cq->lock); 1777 __acquire(&recv_cq->lock);
1495 } 1778 }
1496 } else if (recv_cq) { 1779 } else if (recv_cq) {
1497 spin_lock_irq(&recv_cq->lock); 1780 spin_lock(&recv_cq->lock);
1498 __acquire(&send_cq->lock); 1781 __acquire(&send_cq->lock);
1499 } else { 1782 } else {
1500 __acquire(&send_cq->lock); 1783 __acquire(&send_cq->lock);
@@ -1509,21 +1792,21 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re
1509 if (recv_cq) { 1792 if (recv_cq) {
1510 if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { 1793 if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
1511 spin_unlock(&recv_cq->lock); 1794 spin_unlock(&recv_cq->lock);
1512 spin_unlock_irq(&send_cq->lock); 1795 spin_unlock(&send_cq->lock);
1513 } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { 1796 } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
1514 __release(&recv_cq->lock); 1797 __release(&recv_cq->lock);
1515 spin_unlock_irq(&send_cq->lock); 1798 spin_unlock(&send_cq->lock);
1516 } else { 1799 } else {
1517 spin_unlock(&send_cq->lock); 1800 spin_unlock(&send_cq->lock);
1518 spin_unlock_irq(&recv_cq->lock); 1801 spin_unlock(&recv_cq->lock);
1519 } 1802 }
1520 } else { 1803 } else {
1521 __release(&recv_cq->lock); 1804 __release(&recv_cq->lock);
1522 spin_unlock_irq(&send_cq->lock); 1805 spin_unlock(&send_cq->lock);
1523 } 1806 }
1524 } else if (recv_cq) { 1807 } else if (recv_cq) {
1525 __release(&send_cq->lock); 1808 __release(&send_cq->lock);
1526 spin_unlock_irq(&recv_cq->lock); 1809 spin_unlock(&recv_cq->lock);
1527 } else { 1810 } else {
1528 __release(&recv_cq->lock); 1811 __release(&recv_cq->lock);
1529 __release(&send_cq->lock); 1812 __release(&send_cq->lock);
@@ -1535,17 +1818,18 @@ static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
1535 return to_mpd(qp->ibqp.pd); 1818 return to_mpd(qp->ibqp.pd);
1536} 1819}
1537 1820
1538static void get_cqs(struct mlx5_ib_qp *qp, 1821static void get_cqs(enum ib_qp_type qp_type,
1822 struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
1539 struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq) 1823 struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
1540{ 1824{
1541 switch (qp->ibqp.qp_type) { 1825 switch (qp_type) {
1542 case IB_QPT_XRC_TGT: 1826 case IB_QPT_XRC_TGT:
1543 *send_cq = NULL; 1827 *send_cq = NULL;
1544 *recv_cq = NULL; 1828 *recv_cq = NULL;
1545 break; 1829 break;
1546 case MLX5_IB_QPT_REG_UMR: 1830 case MLX5_IB_QPT_REG_UMR:
1547 case IB_QPT_XRC_INI: 1831 case IB_QPT_XRC_INI:
1548 *send_cq = to_mcq(qp->ibqp.send_cq); 1832 *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
1549 *recv_cq = NULL; 1833 *recv_cq = NULL;
1550 break; 1834 break;
1551 1835
@@ -1557,8 +1841,8 @@ static void get_cqs(struct mlx5_ib_qp *qp,
1557 case IB_QPT_RAW_IPV6: 1841 case IB_QPT_RAW_IPV6:
1558 case IB_QPT_RAW_ETHERTYPE: 1842 case IB_QPT_RAW_ETHERTYPE:
1559 case IB_QPT_RAW_PACKET: 1843 case IB_QPT_RAW_PACKET:
1560 *send_cq = to_mcq(qp->ibqp.send_cq); 1844 *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
1561 *recv_cq = to_mcq(qp->ibqp.recv_cq); 1845 *recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL;
1562 break; 1846 break;
1563 1847
1564 case IB_QPT_MAX: 1848 case IB_QPT_MAX:
@@ -1577,8 +1861,14 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
1577 struct mlx5_ib_cq *send_cq, *recv_cq; 1861 struct mlx5_ib_cq *send_cq, *recv_cq;
1578 struct mlx5_ib_qp_base *base = &qp->trans_qp.base; 1862 struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
1579 struct mlx5_modify_qp_mbox_in *in; 1863 struct mlx5_modify_qp_mbox_in *in;
1864 unsigned long flags;
1580 int err; 1865 int err;
1581 1866
1867 if (qp->ibqp.rwq_ind_tbl) {
1868 destroy_rss_raw_qp_tir(dev, qp);
1869 return;
1870 }
1871
1582 base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ? 1872 base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
1583 &qp->raw_packet_qp.rq.base : 1873 &qp->raw_packet_qp.rq.base :
1584 &qp->trans_qp.base; 1874 &qp->trans_qp.base;
@@ -1602,17 +1892,28 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
1602 base->mqp.qpn); 1892 base->mqp.qpn);
1603 } 1893 }
1604 1894
1605 get_cqs(qp, &send_cq, &recv_cq); 1895 get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
1896 &send_cq, &recv_cq);
1897
1898 spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
1899 mlx5_ib_lock_cqs(send_cq, recv_cq);
1900 /* del from lists under both locks above to protect reset flow paths */
1901 list_del(&qp->qps_list);
1902 if (send_cq)
1903 list_del(&qp->cq_send_list);
1904
1905 if (recv_cq)
1906 list_del(&qp->cq_recv_list);
1606 1907
1607 if (qp->create_type == MLX5_QP_KERNEL) { 1908 if (qp->create_type == MLX5_QP_KERNEL) {
1608 mlx5_ib_lock_cqs(send_cq, recv_cq);
1609 __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, 1909 __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
1610 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 1910 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1611 if (send_cq != recv_cq) 1911 if (send_cq != recv_cq)
1612 __mlx5_ib_cq_clean(send_cq, base->mqp.qpn, 1912 __mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
1613 NULL); 1913 NULL);
1614 mlx5_ib_unlock_cqs(send_cq, recv_cq);
1615 } 1914 }
1915 mlx5_ib_unlock_cqs(send_cq, recv_cq);
1916 spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
1616 1917
1617 if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { 1918 if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
1618 destroy_raw_packet_qp(dev, qp); 1919 destroy_raw_packet_qp(dev, qp);
@@ -2300,7 +2601,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
2300 } 2601 }
2301 2602
2302 pd = get_pd(qp); 2603 pd = get_pd(qp);
2303 get_cqs(qp, &send_cq, &recv_cq); 2604 get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
2605 &send_cq, &recv_cq);
2304 2606
2305 context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn); 2607 context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
2306 context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0; 2608 context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
@@ -2349,6 +2651,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
2349 else 2651 else
2350 sqd_event = 0; 2652 sqd_event = 0;
2351 2653
2654 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
2655 u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
2656 qp->port) - 1;
2657 struct mlx5_ib_port *mibport = &dev->port[port_num];
2658
2659 context->qp_counter_set_usr_page |=
2660 cpu_to_be32((u32)(mibport->q_cnt_id) << 24);
2661 }
2662
2352 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 2663 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
2353 context->sq_crq_size |= cpu_to_be16(1 << 4); 2664 context->sq_crq_size |= cpu_to_be16(1 << 4);
2354 2665
@@ -2439,6 +2750,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2439 int port; 2750 int port;
2440 enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; 2751 enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
2441 2752
2753 if (ibqp->rwq_ind_tbl)
2754 return -ENOSYS;
2755
2442 if (unlikely(ibqp->qp_type == IB_QPT_GSI)) 2756 if (unlikely(ibqp->qp_type == IB_QPT_GSI))
2443 return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); 2757 return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
2444 2758
@@ -3397,6 +3711,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3397{ 3711{
3398 struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ 3712 struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
3399 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 3713 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
3714 struct mlx5_core_dev *mdev = dev->mdev;
3400 struct mlx5_ib_qp *qp; 3715 struct mlx5_ib_qp *qp;
3401 struct mlx5_ib_mr *mr; 3716 struct mlx5_ib_mr *mr;
3402 struct mlx5_wqe_data_seg *dpseg; 3717 struct mlx5_wqe_data_seg *dpseg;
@@ -3424,6 +3739,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3424 3739
3425 spin_lock_irqsave(&qp->sq.lock, flags); 3740 spin_lock_irqsave(&qp->sq.lock, flags);
3426 3741
3742 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
3743 err = -EIO;
3744 *bad_wr = wr;
3745 nreq = 0;
3746 goto out;
3747 }
3748
3427 for (nreq = 0; wr; nreq++, wr = wr->next) { 3749 for (nreq = 0; wr; nreq++, wr = wr->next) {
3428 if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { 3750 if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
3429 mlx5_ib_warn(dev, "\n"); 3751 mlx5_ib_warn(dev, "\n");
@@ -3725,6 +4047,8 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3725 struct mlx5_ib_qp *qp = to_mqp(ibqp); 4047 struct mlx5_ib_qp *qp = to_mqp(ibqp);
3726 struct mlx5_wqe_data_seg *scat; 4048 struct mlx5_wqe_data_seg *scat;
3727 struct mlx5_rwqe_sig *sig; 4049 struct mlx5_rwqe_sig *sig;
4050 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
4051 struct mlx5_core_dev *mdev = dev->mdev;
3728 unsigned long flags; 4052 unsigned long flags;
3729 int err = 0; 4053 int err = 0;
3730 int nreq; 4054 int nreq;
@@ -3736,6 +4060,13 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3736 4060
3737 spin_lock_irqsave(&qp->rq.lock, flags); 4061 spin_lock_irqsave(&qp->rq.lock, flags);
3738 4062
4063 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
4064 err = -EIO;
4065 *bad_wr = wr;
4066 nreq = 0;
4067 goto out;
4068 }
4069
3739 ind = qp->rq.head & (qp->rq.wqe_cnt - 1); 4070 ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
3740 4071
3741 for (nreq = 0; wr; nreq++, wr = wr->next) { 4072 for (nreq = 0; wr; nreq++, wr = wr->next) {
@@ -4055,6 +4386,9 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
4055 int err = 0; 4386 int err = 0;
4056 u8 raw_packet_qp_state; 4387 u8 raw_packet_qp_state;
4057 4388
4389 if (ibqp->rwq_ind_tbl)
4390 return -ENOSYS;
4391
4058 if (unlikely(ibqp->qp_type == IB_QPT_GSI)) 4392 if (unlikely(ibqp->qp_type == IB_QPT_GSI))
4059 return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask, 4393 return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
4060 qp_init_attr); 4394 qp_init_attr);
@@ -4164,3 +4498,322 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
4164 4498
4165 return 0; 4499 return 0;
4166} 4500}
4501
4502static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
4503 struct ib_wq_init_attr *init_attr)
4504{
4505 struct mlx5_ib_dev *dev;
4506 __be64 *rq_pas0;
4507 void *in;
4508 void *rqc;
4509 void *wq;
4510 int inlen;
4511 int err;
4512
4513 dev = to_mdev(pd->device);
4514
4515 inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas;
4516 in = mlx5_vzalloc(inlen);
4517 if (!in)
4518 return -ENOMEM;
4519
4520 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
4521 MLX5_SET(rqc, rqc, mem_rq_type,
4522 MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
4523 MLX5_SET(rqc, rqc, user_index, rwq->user_index);
4524 MLX5_SET(rqc, rqc, cqn, to_mcq(init_attr->cq)->mcq.cqn);
4525 MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
4526 MLX5_SET(rqc, rqc, flush_in_error_en, 1);
4527 wq = MLX5_ADDR_OF(rqc, rqc, wq);
4528 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
4529 MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
4530 MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride);
4531 MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size);
4532 MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn);
4533 MLX5_SET(wq, wq, page_offset, rwq->rq_page_offset);
4534 MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size);
4535 MLX5_SET(wq, wq, wq_signature, rwq->wq_sig);
4536 MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
4537 rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
4538 mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
4539 err = mlx5_core_create_rq(dev->mdev, in, inlen, &rwq->rqn);
4540 kvfree(in);
4541 return err;
4542}
4543
4544static int set_user_rq_size(struct mlx5_ib_dev *dev,
4545 struct ib_wq_init_attr *wq_init_attr,
4546 struct mlx5_ib_create_wq *ucmd,
4547 struct mlx5_ib_rwq *rwq)
4548{
4549 /* Sanity check RQ size before proceeding */
4550 if (wq_init_attr->max_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_wq_sz)))
4551 return -EINVAL;
4552
4553 if (!ucmd->rq_wqe_count)
4554 return -EINVAL;
4555
4556 rwq->wqe_count = ucmd->rq_wqe_count;
4557 rwq->wqe_shift = ucmd->rq_wqe_shift;
4558 rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift);
4559 rwq->log_rq_stride = rwq->wqe_shift;
4560 rwq->log_rq_size = ilog2(rwq->wqe_count);
4561 return 0;
4562}
4563
4564static int prepare_user_rq(struct ib_pd *pd,
4565 struct ib_wq_init_attr *init_attr,
4566 struct ib_udata *udata,
4567 struct mlx5_ib_rwq *rwq)
4568{
4569 struct mlx5_ib_dev *dev = to_mdev(pd->device);
4570 struct mlx5_ib_create_wq ucmd = {};
4571 int err;
4572 size_t required_cmd_sz;
4573
4574 required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
4575 if (udata->inlen < required_cmd_sz) {
4576 mlx5_ib_dbg(dev, "invalid inlen\n");
4577 return -EINVAL;
4578 }
4579
4580 if (udata->inlen > sizeof(ucmd) &&
4581 !ib_is_udata_cleared(udata, sizeof(ucmd),
4582 udata->inlen - sizeof(ucmd))) {
4583 mlx5_ib_dbg(dev, "inlen is not supported\n");
4584 return -EOPNOTSUPP;
4585 }
4586
4587 if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
4588 mlx5_ib_dbg(dev, "copy failed\n");
4589 return -EFAULT;
4590 }
4591
4592 if (ucmd.comp_mask) {
4593 mlx5_ib_dbg(dev, "invalid comp mask\n");
4594 return -EOPNOTSUPP;
4595 }
4596
4597 if (ucmd.reserved) {
4598 mlx5_ib_dbg(dev, "invalid reserved\n");
4599 return -EOPNOTSUPP;
4600 }
4601
4602 err = set_user_rq_size(dev, init_attr, &ucmd, rwq);
4603 if (err) {
4604 mlx5_ib_dbg(dev, "err %d\n", err);
4605 return err;
4606 }
4607
4608 err = create_user_rq(dev, pd, rwq, &ucmd);
4609 if (err) {
4610 mlx5_ib_dbg(dev, "err %d\n", err);
4611 if (err)
4612 return err;
4613 }
4614
4615 rwq->user_index = ucmd.user_index;
4616 return 0;
4617}
4618
4619struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
4620 struct ib_wq_init_attr *init_attr,
4621 struct ib_udata *udata)
4622{
4623 struct mlx5_ib_dev *dev;
4624 struct mlx5_ib_rwq *rwq;
4625 struct mlx5_ib_create_wq_resp resp = {};
4626 size_t min_resp_len;
4627 int err;
4628
4629 if (!udata)
4630 return ERR_PTR(-ENOSYS);
4631
4632 min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
4633 if (udata->outlen && udata->outlen < min_resp_len)
4634 return ERR_PTR(-EINVAL);
4635
4636 dev = to_mdev(pd->device);
4637 switch (init_attr->wq_type) {
4638 case IB_WQT_RQ:
4639 rwq = kzalloc(sizeof(*rwq), GFP_KERNEL);
4640 if (!rwq)
4641 return ERR_PTR(-ENOMEM);
4642 err = prepare_user_rq(pd, init_attr, udata, rwq);
4643 if (err)
4644 goto err;
4645 err = create_rq(rwq, pd, init_attr);
4646 if (err)
4647 goto err_user_rq;
4648 break;
4649 default:
4650 mlx5_ib_dbg(dev, "unsupported wq type %d\n",
4651 init_attr->wq_type);
4652 return ERR_PTR(-EINVAL);
4653 }
4654
4655 rwq->ibwq.wq_num = rwq->rqn;
4656 rwq->ibwq.state = IB_WQS_RESET;
4657 if (udata->outlen) {
4658 resp.response_length = offsetof(typeof(resp), response_length) +
4659 sizeof(resp.response_length);
4660 err = ib_copy_to_udata(udata, &resp, resp.response_length);
4661 if (err)
4662 goto err_copy;
4663 }
4664
4665 return &rwq->ibwq;
4666
4667err_copy:
4668 mlx5_core_destroy_rq(dev->mdev, rwq->rqn);
4669err_user_rq:
4670 destroy_user_rq(pd, rwq);
4671err:
4672 kfree(rwq);
4673 return ERR_PTR(err);
4674}
4675
4676int mlx5_ib_destroy_wq(struct ib_wq *wq)
4677{
4678 struct mlx5_ib_dev *dev = to_mdev(wq->device);
4679 struct mlx5_ib_rwq *rwq = to_mrwq(wq);
4680
4681 mlx5_core_destroy_rq(dev->mdev, rwq->rqn);
4682 destroy_user_rq(wq->pd, rwq);
4683 kfree(rwq);
4684
4685 return 0;
4686}
4687
4688struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
4689 struct ib_rwq_ind_table_init_attr *init_attr,
4690 struct ib_udata *udata)
4691{
4692 struct mlx5_ib_dev *dev = to_mdev(device);
4693 struct mlx5_ib_rwq_ind_table *rwq_ind_tbl;
4694 int sz = 1 << init_attr->log_ind_tbl_size;
4695 struct mlx5_ib_create_rwq_ind_tbl_resp resp = {};
4696 size_t min_resp_len;
4697 int inlen;
4698 int err;
4699 int i;
4700 u32 *in;
4701 void *rqtc;
4702
4703 if (udata->inlen > 0 &&
4704 !ib_is_udata_cleared(udata, 0,
4705 udata->inlen))
4706 return ERR_PTR(-EOPNOTSUPP);
4707
4708 min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
4709 if (udata->outlen && udata->outlen < min_resp_len)
4710 return ERR_PTR(-EINVAL);
4711
4712 rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL);
4713 if (!rwq_ind_tbl)
4714 return ERR_PTR(-ENOMEM);
4715
4716 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
4717 in = mlx5_vzalloc(inlen);
4718 if (!in) {
4719 err = -ENOMEM;
4720 goto err;
4721 }
4722
4723 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
4724
4725 MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
4726 MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
4727
4728 for (i = 0; i < sz; i++)
4729 MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num);
4730
4731 err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
4732 kvfree(in);
4733
4734 if (err)
4735 goto err;
4736
4737 rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn;
4738 if (udata->outlen) {
4739 resp.response_length = offsetof(typeof(resp), response_length) +
4740 sizeof(resp.response_length);
4741 err = ib_copy_to_udata(udata, &resp, resp.response_length);
4742 if (err)
4743 goto err_copy;
4744 }
4745
4746 return &rwq_ind_tbl->ib_rwq_ind_tbl;
4747
4748err_copy:
4749 mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
4750err:
4751 kfree(rwq_ind_tbl);
4752 return ERR_PTR(err);
4753}
4754
4755int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
4756{
4757 struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
4758 struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
4759
4760 mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
4761
4762 kfree(rwq_ind_tbl);
4763 return 0;
4764}
4765
4766int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
4767 u32 wq_attr_mask, struct ib_udata *udata)
4768{
4769 struct mlx5_ib_dev *dev = to_mdev(wq->device);
4770 struct mlx5_ib_rwq *rwq = to_mrwq(wq);
4771 struct mlx5_ib_modify_wq ucmd = {};
4772 size_t required_cmd_sz;
4773 int curr_wq_state;
4774 int wq_state;
4775 int inlen;
4776 int err;
4777 void *rqc;
4778 void *in;
4779
4780 required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
4781 if (udata->inlen < required_cmd_sz)
4782 return -EINVAL;
4783
4784 if (udata->inlen > sizeof(ucmd) &&
4785 !ib_is_udata_cleared(udata, sizeof(ucmd),
4786 udata->inlen - sizeof(ucmd)))
4787 return -EOPNOTSUPP;
4788
4789 if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)))
4790 return -EFAULT;
4791
4792 if (ucmd.comp_mask || ucmd.reserved)
4793 return -EOPNOTSUPP;
4794
4795 inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
4796 in = mlx5_vzalloc(inlen);
4797 if (!in)
4798 return -ENOMEM;
4799
4800 rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
4801
4802 curr_wq_state = (wq_attr_mask & IB_WQ_CUR_STATE) ?
4803 wq_attr->curr_wq_state : wq->state;
4804 wq_state = (wq_attr_mask & IB_WQ_STATE) ?
4805 wq_attr->wq_state : curr_wq_state;
4806 if (curr_wq_state == IB_WQS_ERR)
4807 curr_wq_state = MLX5_RQC_STATE_ERR;
4808 if (wq_state == IB_WQS_ERR)
4809 wq_state = MLX5_RQC_STATE_ERR;
4810 MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
4811 MLX5_SET(rqc, rqc, state, wq_state);
4812
4813 err = mlx5_core_modify_rq(dev->mdev, rwq->rqn, in, inlen);
4814 kvfree(in);
4815 if (!err)
4816 rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
4817
4818 return err;
4819}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 3b2ddd64a371..ed6ac52355f1 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -74,14 +74,12 @@ static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
74} 74}
75 75
76static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, 76static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
77 struct mlx5_create_srq_mbox_in **in, 77 struct mlx5_srq_attr *in,
78 struct ib_udata *udata, int buf_size, int *inlen, 78 struct ib_udata *udata, int buf_size)
79 int is_xrc)
80{ 79{
81 struct mlx5_ib_dev *dev = to_mdev(pd->device); 80 struct mlx5_ib_dev *dev = to_mdev(pd->device);
82 struct mlx5_ib_create_srq ucmd = {}; 81 struct mlx5_ib_create_srq ucmd = {};
83 size_t ucmdlen; 82 size_t ucmdlen;
84 void *xsrqc;
85 int err; 83 int err;
86 int npages; 84 int npages;
87 int page_shift; 85 int page_shift;
@@ -104,7 +102,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
104 udata->inlen - sizeof(ucmd))) 102 udata->inlen - sizeof(ucmd)))
105 return -EINVAL; 103 return -EINVAL;
106 104
107 if (is_xrc) { 105 if (in->type == IB_SRQT_XRC) {
108 err = get_srq_user_index(to_mucontext(pd->uobject->context), 106 err = get_srq_user_index(to_mucontext(pd->uobject->context),
109 &ucmd, udata->inlen, &uidx); 107 &ucmd, udata->inlen, &uidx);
110 if (err) 108 if (err)
@@ -130,14 +128,13 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
130 goto err_umem; 128 goto err_umem;
131 } 129 }
132 130
133 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; 131 in->pas = mlx5_vzalloc(sizeof(*in->pas) * ncont);
134 *in = mlx5_vzalloc(*inlen); 132 if (!in->pas) {
135 if (!(*in)) {
136 err = -ENOMEM; 133 err = -ENOMEM;
137 goto err_umem; 134 goto err_umem;
138 } 135 }
139 136
140 mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0); 137 mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0);
141 138
142 err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), 139 err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context),
143 ucmd.db_addr, &srq->db); 140 ucmd.db_addr, &srq->db);
@@ -146,20 +143,16 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
146 goto err_in; 143 goto err_in;
147 } 144 }
148 145
149 (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; 146 in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
150 (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26); 147 in->page_offset = offset;
151 148 if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
152 if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) && 149 in->type == IB_SRQT_XRC)
153 is_xrc){ 150 in->user_index = uidx;
154 xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
155 xrc_srq_context_entry);
156 MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
157 }
158 151
159 return 0; 152 return 0;
160 153
161err_in: 154err_in:
162 kvfree(*in); 155 kvfree(in->pas);
163 156
164err_umem: 157err_umem:
165 ib_umem_release(srq->umem); 158 ib_umem_release(srq->umem);
@@ -168,15 +161,13 @@ err_umem:
168} 161}
169 162
170static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, 163static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
171 struct mlx5_create_srq_mbox_in **in, int buf_size, 164 struct mlx5_srq_attr *in, int buf_size)
172 int *inlen, int is_xrc)
173{ 165{
174 int err; 166 int err;
175 int i; 167 int i;
176 struct mlx5_wqe_srq_next_seg *next; 168 struct mlx5_wqe_srq_next_seg *next;
177 int page_shift; 169 int page_shift;
178 int npages; 170 int npages;
179 void *xsrqc;
180 171
181 err = mlx5_db_alloc(dev->mdev, &srq->db); 172 err = mlx5_db_alloc(dev->mdev, &srq->db);
182 if (err) { 173 if (err) {
@@ -204,13 +195,12 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
204 npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT)); 195 npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
205 mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n", 196 mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
206 buf_size, page_shift, srq->buf.npages, npages); 197 buf_size, page_shift, srq->buf.npages, npages);
207 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages; 198 in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages);
208 *in = mlx5_vzalloc(*inlen); 199 if (!in->pas) {
209 if (!*in) {
210 err = -ENOMEM; 200 err = -ENOMEM;
211 goto err_buf; 201 goto err_buf;
212 } 202 }
213 mlx5_fill_page_array(&srq->buf, (*in)->pas); 203 mlx5_fill_page_array(&srq->buf, in->pas);
214 204
215 srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL); 205 srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
216 if (!srq->wrid) { 206 if (!srq->wrid) {
@@ -221,20 +211,15 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
221 } 211 }
222 srq->wq_sig = !!srq_signature; 212 srq->wq_sig = !!srq_signature;
223 213
224 (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; 214 in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
225 215 if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
226 if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) && 216 in->type == IB_SRQT_XRC)
227 is_xrc){ 217 in->user_index = MLX5_IB_DEFAULT_UIDX;
228 xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
229 xrc_srq_context_entry);
230 /* 0xffffff means we ask to work with cqe version 0 */
231 MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX);
232 }
233 218
234 return 0; 219 return 0;
235 220
236err_in: 221err_in:
237 kvfree(*in); 222 kvfree(in->pas);
238 223
239err_buf: 224err_buf:
240 mlx5_buf_free(dev->mdev, &srq->buf); 225 mlx5_buf_free(dev->mdev, &srq->buf);
@@ -267,10 +252,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
267 int desc_size; 252 int desc_size;
268 int buf_size; 253 int buf_size;
269 int err; 254 int err;
270 struct mlx5_create_srq_mbox_in *uninitialized_var(in); 255 struct mlx5_srq_attr in = {0};
271 int uninitialized_var(inlen);
272 int is_xrc;
273 u32 flgs, xrcdn;
274 __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); 256 __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
275 257
276 /* Sanity check SRQ size before proceeding */ 258 /* Sanity check SRQ size before proceeding */
@@ -302,14 +284,10 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
302 desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, 284 desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
303 srq->msrq.max_avail_gather); 285 srq->msrq.max_avail_gather);
304 286
305 is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
306
307 if (pd->uobject) 287 if (pd->uobject)
308 err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen, 288 err = create_srq_user(pd, srq, &in, udata, buf_size);
309 is_xrc);
310 else 289 else
311 err = create_srq_kernel(dev, srq, &in, buf_size, &inlen, 290 err = create_srq_kernel(dev, srq, &in, buf_size);
312 is_xrc);
313 291
314 if (err) { 292 if (err) {
315 mlx5_ib_warn(dev, "create srq %s failed, err %d\n", 293 mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
@@ -317,23 +295,23 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
317 goto err_srq; 295 goto err_srq;
318 } 296 }
319 297
320 in->ctx.state_log_sz = ilog2(srq->msrq.max); 298 in.type = init_attr->srq_type;
321 flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24; 299 in.log_size = ilog2(srq->msrq.max);
322 xrcdn = 0; 300 in.wqe_shift = srq->msrq.wqe_shift - 4;
323 if (is_xrc) { 301 if (srq->wq_sig)
324 xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; 302 in.flags |= MLX5_SRQ_FLAG_WQ_SIG;
325 in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn); 303 if (init_attr->srq_type == IB_SRQT_XRC) {
304 in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
305 in.cqn = to_mcq(init_attr->ext.xrc.cq)->mcq.cqn;
326 } else if (init_attr->srq_type == IB_SRQT_BASIC) { 306 } else if (init_attr->srq_type == IB_SRQT_BASIC) {
327 xrcdn = to_mxrcd(dev->devr.x0)->xrcdn; 307 in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn;
328 in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn); 308 in.cqn = to_mcq(dev->devr.c0)->mcq.cqn;
329 } 309 }
330 310
331 in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF)); 311 in.pd = to_mpd(pd)->pdn;
332 312 in.db_record = srq->db.dma;
333 in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn); 313 err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in);
334 in->ctx.db_record = cpu_to_be64(srq->db.dma); 314 kvfree(in.pas);
335 err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen, is_xrc);
336 kvfree(in);
337 if (err) { 315 if (err) {
338 mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); 316 mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
339 goto err_usr_kern_srq; 317 goto err_usr_kern_srq;
@@ -401,7 +379,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
401 struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); 379 struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
402 struct mlx5_ib_srq *srq = to_msrq(ibsrq); 380 struct mlx5_ib_srq *srq = to_msrq(ibsrq);
403 int ret; 381 int ret;
404 struct mlx5_query_srq_mbox_out *out; 382 struct mlx5_srq_attr *out;
405 383
406 out = kzalloc(sizeof(*out), GFP_KERNEL); 384 out = kzalloc(sizeof(*out), GFP_KERNEL);
407 if (!out) 385 if (!out)
@@ -411,7 +389,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
411 if (ret) 389 if (ret)
412 goto out_box; 390 goto out_box;
413 391
414 srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm); 392 srq_attr->srq_limit = out->lwm;
415 srq_attr->max_wr = srq->msrq.max - 1; 393 srq_attr->max_wr = srq->msrq.max - 1;
416 srq_attr->max_sge = srq->msrq.max_gs; 394 srq_attr->max_sge = srq->msrq.max_gs;
417 395
@@ -458,6 +436,8 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
458 struct mlx5_ib_srq *srq = to_msrq(ibsrq); 436 struct mlx5_ib_srq *srq = to_msrq(ibsrq);
459 struct mlx5_wqe_srq_next_seg *next; 437 struct mlx5_wqe_srq_next_seg *next;
460 struct mlx5_wqe_data_seg *scat; 438 struct mlx5_wqe_data_seg *scat;
439 struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
440 struct mlx5_core_dev *mdev = dev->mdev;
461 unsigned long flags; 441 unsigned long flags;
462 int err = 0; 442 int err = 0;
463 int nreq; 443 int nreq;
@@ -465,6 +445,12 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
465 445
466 spin_lock_irqsave(&srq->lock, flags); 446 spin_lock_irqsave(&srq->lock, flags);
467 447
448 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
449 err = -EIO;
450 *bad_wr = wr;
451 goto out;
452 }
453
468 for (nreq = 0; wr; nreq++, wr = wr->next) { 454 for (nreq = 0; wr; nreq++, wr = wr->next) {
469 if (unlikely(wr->num_sge > srq->msrq.max_gs)) { 455 if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
470 err = -EINVAL; 456 err = -EINVAL;
@@ -507,7 +493,7 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
507 493
508 *srq->db.db = cpu_to_be32(srq->wqe_ctr); 494 *srq->db.db = cpu_to_be32(srq->wqe_ctr);
509 } 495 }
510 496out:
511 spin_unlock_irqrestore(&srq->lock, flags); 497 spin_unlock_irqrestore(&srq->lock, flags);
512 498
513 return err; 499 return err;
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
index 61bc308bb802..188dac4301b5 100644
--- a/drivers/infiniband/hw/mlx5/user.h
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -46,6 +46,10 @@ enum {
46 MLX5_SRQ_FLAG_SIGNATURE = 1 << 0, 46 MLX5_SRQ_FLAG_SIGNATURE = 1 << 0,
47}; 47};
48 48
49enum {
50 MLX5_WQ_FLAG_SIGNATURE = 1 << 0,
51};
52
49 53
50/* Increment this value if any changes that break userspace ABI 54/* Increment this value if any changes that break userspace ABI
51 * compatibility are made. 55 * compatibility are made.
@@ -79,6 +83,10 @@ enum mlx5_ib_alloc_ucontext_resp_mask {
79 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, 83 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
80}; 84};
81 85
86enum mlx5_user_cmds_supp_uhw {
87 MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0,
88};
89
82struct mlx5_ib_alloc_ucontext_resp { 90struct mlx5_ib_alloc_ucontext_resp {
83 __u32 qp_tab_size; 91 __u32 qp_tab_size;
84 __u32 bf_reg_size; 92 __u32 bf_reg_size;
@@ -94,8 +102,8 @@ struct mlx5_ib_alloc_ucontext_resp {
94 __u32 comp_mask; 102 __u32 comp_mask;
95 __u32 response_length; 103 __u32 response_length;
96 __u8 cqe_version; 104 __u8 cqe_version;
97 __u8 reserved2; 105 __u8 cmds_supp_uhw;
98 __u16 reserved3; 106 __u16 reserved2;
99 __u64 hca_core_clock_offset; 107 __u64 hca_core_clock_offset;
100}; 108};
101 109
@@ -103,6 +111,22 @@ struct mlx5_ib_alloc_pd_resp {
103 __u32 pdn; 111 __u32 pdn;
104}; 112};
105 113
114struct mlx5_ib_tso_caps {
115 __u32 max_tso; /* Maximum tso payload size in bytes */
116
117 /* Corresponding bit will be set if qp type from
118 * 'enum ib_qp_type' is supported, e.g.
119 * supported_qpts |= 1 << IB_QPT_UD
120 */
121 __u32 supported_qpts;
122};
123
124struct mlx5_ib_query_device_resp {
125 __u32 comp_mask;
126 __u32 response_length;
127 struct mlx5_ib_tso_caps tso_caps;
128};
129
106struct mlx5_ib_create_cq { 130struct mlx5_ib_create_cq {
107 __u64 buf_addr; 131 __u64 buf_addr;
108 __u64 db_addr; 132 __u64 db_addr;
@@ -148,6 +172,40 @@ struct mlx5_ib_create_qp {
148 __u64 sq_buf_addr; 172 __u64 sq_buf_addr;
149}; 173};
150 174
175/* RX Hash function flags */
176enum mlx5_rx_hash_function_flags {
177 MLX5_RX_HASH_FUNC_TOEPLITZ = 1 << 0,
178};
179
180/*
181 * RX Hash flags, these flags allows to set which incoming packet's field should
182 * participates in RX Hash. Each flag represent certain packet's field,
183 * when the flag is set the field that is represented by the flag will
184 * participate in RX Hash calculation.
185 * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP
186 * and *TCP and *UDP flags can't be enabled together on the same QP.
187*/
188enum mlx5_rx_hash_fields {
189 MLX5_RX_HASH_SRC_IPV4 = 1 << 0,
190 MLX5_RX_HASH_DST_IPV4 = 1 << 1,
191 MLX5_RX_HASH_SRC_IPV6 = 1 << 2,
192 MLX5_RX_HASH_DST_IPV6 = 1 << 3,
193 MLX5_RX_HASH_SRC_PORT_TCP = 1 << 4,
194 MLX5_RX_HASH_DST_PORT_TCP = 1 << 5,
195 MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6,
196 MLX5_RX_HASH_DST_PORT_UDP = 1 << 7
197};
198
199struct mlx5_ib_create_qp_rss {
200 __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */
201 __u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */
202 __u8 rx_key_len; /* valid only for Toeplitz */
203 __u8 reserved[6];
204 __u8 rx_hash_key[128]; /* valid only for Toeplitz */
205 __u32 comp_mask;
206 __u32 reserved1;
207};
208
151struct mlx5_ib_create_qp_resp { 209struct mlx5_ib_create_qp_resp {
152 __u32 uuar_index; 210 __u32 uuar_index;
153}; 211};
@@ -159,6 +217,32 @@ struct mlx5_ib_alloc_mw {
159 __u16 reserved2; 217 __u16 reserved2;
160}; 218};
161 219
220struct mlx5_ib_create_wq {
221 __u64 buf_addr;
222 __u64 db_addr;
223 __u32 rq_wqe_count;
224 __u32 rq_wqe_shift;
225 __u32 user_index;
226 __u32 flags;
227 __u32 comp_mask;
228 __u32 reserved;
229};
230
231struct mlx5_ib_create_wq_resp {
232 __u32 response_length;
233 __u32 reserved;
234};
235
236struct mlx5_ib_create_rwq_ind_tbl_resp {
237 __u32 response_length;
238 __u32 reserved;
239};
240
241struct mlx5_ib_modify_wq {
242 __u32 comp_mask;
243 __u32 reserved;
244};
245
162static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, 246static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
163 struct mlx5_ib_create_qp *ucmd, 247 struct mlx5_ib_create_qp *ucmd,
164 int inlen, 248 int inlen,
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 9866c35cc977..da2335f7f7c3 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1081,16 +1081,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
1081 return sprintf(buf, "%x\n", dev->rev_id); 1081 return sprintf(buf, "%x\n", dev->rev_id);
1082} 1082}
1083 1083
1084static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1085 char *buf)
1086{
1087 struct mthca_dev *dev =
1088 container_of(device, struct mthca_dev, ib_dev.dev);
1089 return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32),
1090 (int) (dev->fw_ver >> 16) & 0xffff,
1091 (int) dev->fw_ver & 0xffff);
1092}
1093
1094static ssize_t show_hca(struct device *device, struct device_attribute *attr, 1084static ssize_t show_hca(struct device *device, struct device_attribute *attr,
1095 char *buf) 1085 char *buf)
1096{ 1086{
@@ -1120,13 +1110,11 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr,
1120} 1110}
1121 1111
1122static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 1112static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
1123static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
1124static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 1113static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
1125static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 1114static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
1126 1115
1127static struct device_attribute *mthca_dev_attributes[] = { 1116static struct device_attribute *mthca_dev_attributes[] = {
1128 &dev_attr_hw_rev, 1117 &dev_attr_hw_rev,
1129 &dev_attr_fw_ver,
1130 &dev_attr_hca_type, 1118 &dev_attr_hca_type,
1131 &dev_attr_board_id 1119 &dev_attr_board_id
1132}; 1120};
@@ -1187,6 +1175,17 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num,
1187 return 0; 1175 return 0;
1188} 1176}
1189 1177
1178static void get_dev_fw_str(struct ib_device *device, char *str,
1179 size_t str_len)
1180{
1181 struct mthca_dev *dev =
1182 container_of(device, struct mthca_dev, ib_dev);
1183 snprintf(str, str_len, "%d.%d.%d",
1184 (int) (dev->fw_ver >> 32),
1185 (int) (dev->fw_ver >> 16) & 0xffff,
1186 (int) dev->fw_ver & 0xffff);
1187}
1188
1190int mthca_register_device(struct mthca_dev *dev) 1189int mthca_register_device(struct mthca_dev *dev)
1191{ 1190{
1192 int ret; 1191 int ret;
@@ -1266,6 +1265,7 @@ int mthca_register_device(struct mthca_dev *dev)
1266 dev->ib_dev.reg_user_mr = mthca_reg_user_mr; 1265 dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
1267 dev->ib_dev.dereg_mr = mthca_dereg_mr; 1266 dev->ib_dev.dereg_mr = mthca_dereg_mr;
1268 dev->ib_dev.get_port_immutable = mthca_port_immutable; 1267 dev->ib_dev.get_port_immutable = mthca_port_immutable;
1268 dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
1269 1269
1270 if (dev->mthca_flags & MTHCA_FLAG_FMR) { 1270 if (dev->mthca_flags & MTHCA_FLAG_FMR) {
1271 dev->ib_dev.alloc_fmr = mthca_alloc_fmr; 1271 dev->ib_dev.alloc_fmr = mthca_alloc_fmr;
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
index 74c6a9426047..6727af27c017 100644
--- a/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -98,7 +98,7 @@ int mthca_reset(struct mthca_dev *mdev)
98 err = -ENOMEM; 98 err = -ENOMEM;
99 mthca_err(mdev, "Couldn't allocate memory to save HCA " 99 mthca_err(mdev, "Couldn't allocate memory to save HCA "
100 "PCI header, aborting.\n"); 100 "PCI header, aborting.\n");
101 goto out; 101 goto put_dev;
102 } 102 }
103 103
104 for (i = 0; i < 64; ++i) { 104 for (i = 0; i < 64; ++i) {
@@ -108,7 +108,7 @@ int mthca_reset(struct mthca_dev *mdev)
108 err = -ENODEV; 108 err = -ENODEV;
109 mthca_err(mdev, "Couldn't save HCA " 109 mthca_err(mdev, "Couldn't save HCA "
110 "PCI header, aborting.\n"); 110 "PCI header, aborting.\n");
111 goto out; 111 goto free_hca;
112 } 112 }
113 } 113 }
114 114
@@ -121,7 +121,7 @@ int mthca_reset(struct mthca_dev *mdev)
121 err = -ENOMEM; 121 err = -ENOMEM;
122 mthca_err(mdev, "Couldn't allocate memory to save HCA " 122 mthca_err(mdev, "Couldn't allocate memory to save HCA "
123 "bridge PCI header, aborting.\n"); 123 "bridge PCI header, aborting.\n");
124 goto out; 124 goto free_hca;
125 } 125 }
126 126
127 for (i = 0; i < 64; ++i) { 127 for (i = 0; i < 64; ++i) {
@@ -131,7 +131,7 @@ int mthca_reset(struct mthca_dev *mdev)
131 err = -ENODEV; 131 err = -ENODEV;
132 mthca_err(mdev, "Couldn't save HCA bridge " 132 mthca_err(mdev, "Couldn't save HCA bridge "
133 "PCI header, aborting.\n"); 133 "PCI header, aborting.\n");
134 goto out; 134 goto free_bh;
135 } 135 }
136 } 136 }
137 bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX); 137 bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
@@ -139,7 +139,7 @@ int mthca_reset(struct mthca_dev *mdev)
139 err = -ENODEV; 139 err = -ENODEV;
140 mthca_err(mdev, "Couldn't locate HCA bridge " 140 mthca_err(mdev, "Couldn't locate HCA bridge "
141 "PCI-X capability, aborting.\n"); 141 "PCI-X capability, aborting.\n");
142 goto out; 142 goto free_bh;
143 } 143 }
144 } 144 }
145 145
@@ -152,7 +152,7 @@ int mthca_reset(struct mthca_dev *mdev)
152 err = -ENOMEM; 152 err = -ENOMEM;
153 mthca_err(mdev, "Couldn't map HCA reset register, " 153 mthca_err(mdev, "Couldn't map HCA reset register, "
154 "aborting.\n"); 154 "aborting.\n");
155 goto out; 155 goto free_bh;
156 } 156 }
157 157
158 writel(MTHCA_RESET_VALUE, reset); 158 writel(MTHCA_RESET_VALUE, reset);
@@ -172,7 +172,7 @@ int mthca_reset(struct mthca_dev *mdev)
172 err = -ENODEV; 172 err = -ENODEV;
173 mthca_err(mdev, "Couldn't access HCA after reset, " 173 mthca_err(mdev, "Couldn't access HCA after reset, "
174 "aborting.\n"); 174 "aborting.\n");
175 goto out; 175 goto free_bh;
176 } 176 }
177 177
178 if (v != 0xffffffff) 178 if (v != 0xffffffff)
@@ -184,7 +184,7 @@ int mthca_reset(struct mthca_dev *mdev)
184 err = -ENODEV; 184 err = -ENODEV;
185 mthca_err(mdev, "PCI device did not come back after reset, " 185 mthca_err(mdev, "PCI device did not come back after reset, "
186 "aborting.\n"); 186 "aborting.\n");
187 goto out; 187 goto free_bh;
188 } 188 }
189 189
190good: 190good:
@@ -195,14 +195,14 @@ good:
195 err = -ENODEV; 195 err = -ENODEV;
196 mthca_err(mdev, "Couldn't restore HCA bridge Upstream " 196 mthca_err(mdev, "Couldn't restore HCA bridge Upstream "
197 "split transaction control, aborting.\n"); 197 "split transaction control, aborting.\n");
198 goto out; 198 goto free_bh;
199 } 199 }
200 if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc, 200 if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc,
201 bridge_header[(bridge_pcix_cap + 0xc) / 4])) { 201 bridge_header[(bridge_pcix_cap + 0xc) / 4])) {
202 err = -ENODEV; 202 err = -ENODEV;
203 mthca_err(mdev, "Couldn't restore HCA bridge Downstream " 203 mthca_err(mdev, "Couldn't restore HCA bridge Downstream "
204 "split transaction control, aborting.\n"); 204 "split transaction control, aborting.\n");
205 goto out; 205 goto free_bh;
206 } 206 }
207 /* 207 /*
208 * Bridge control register is at 0x3e, so we'll 208 * Bridge control register is at 0x3e, so we'll
@@ -216,7 +216,7 @@ good:
216 err = -ENODEV; 216 err = -ENODEV;
217 mthca_err(mdev, "Couldn't restore HCA bridge reg %x, " 217 mthca_err(mdev, "Couldn't restore HCA bridge reg %x, "
218 "aborting.\n", i); 218 "aborting.\n", i);
219 goto out; 219 goto free_bh;
220 } 220 }
221 } 221 }
222 222
@@ -225,7 +225,7 @@ good:
225 err = -ENODEV; 225 err = -ENODEV;
226 mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, " 226 mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, "
227 "aborting.\n"); 227 "aborting.\n");
228 goto out; 228 goto free_bh;
229 } 229 }
230 } 230 }
231 231
@@ -235,7 +235,7 @@ good:
235 err = -ENODEV; 235 err = -ENODEV;
236 mthca_err(mdev, "Couldn't restore HCA PCI-X " 236 mthca_err(mdev, "Couldn't restore HCA PCI-X "
237 "command register, aborting.\n"); 237 "command register, aborting.\n");
238 goto out; 238 goto free_bh;
239 } 239 }
240 } 240 }
241 241
@@ -246,7 +246,7 @@ good:
246 err = -ENODEV; 246 err = -ENODEV;
247 mthca_err(mdev, "Couldn't restore HCA PCI Express " 247 mthca_err(mdev, "Couldn't restore HCA PCI Express "
248 "Device Control register, aborting.\n"); 248 "Device Control register, aborting.\n");
249 goto out; 249 goto free_bh;
250 } 250 }
251 linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4]; 251 linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4];
252 if (pcie_capability_write_word(mdev->pdev, PCI_EXP_LNKCTL, 252 if (pcie_capability_write_word(mdev->pdev, PCI_EXP_LNKCTL,
@@ -254,7 +254,7 @@ good:
254 err = -ENODEV; 254 err = -ENODEV;
255 mthca_err(mdev, "Couldn't restore HCA PCI Express " 255 mthca_err(mdev, "Couldn't restore HCA PCI Express "
256 "Link control register, aborting.\n"); 256 "Link control register, aborting.\n");
257 goto out; 257 goto free_bh;
258 } 258 }
259 } 259 }
260 260
@@ -266,7 +266,7 @@ good:
266 err = -ENODEV; 266 err = -ENODEV;
267 mthca_err(mdev, "Couldn't restore HCA reg %x, " 267 mthca_err(mdev, "Couldn't restore HCA reg %x, "
268 "aborting.\n", i); 268 "aborting.\n", i);
269 goto out; 269 goto free_bh;
270 } 270 }
271 } 271 }
272 272
@@ -275,14 +275,12 @@ good:
275 err = -ENODEV; 275 err = -ENODEV;
276 mthca_err(mdev, "Couldn't restore HCA COMMAND, " 276 mthca_err(mdev, "Couldn't restore HCA COMMAND, "
277 "aborting.\n"); 277 "aborting.\n");
278 goto out;
279 } 278 }
280 279free_bh:
281out:
282 if (bridge)
283 pci_dev_put(bridge);
284 kfree(bridge_header); 280 kfree(bridge_header);
281free_hca:
285 kfree(hca_header); 282 kfree(hca_header);
286 283put_dev:
284 pci_dev_put(bridge);
287 return err; 285 return err;
288} 286}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 464d6da5fe91..bd69125731c1 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -2606,23 +2606,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
2606 2606
2607 2607
2608/** 2608/**
2609 * show_fw_ver
2610 */
2611static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
2612 char *buf)
2613{
2614 struct nes_ib_device *nesibdev =
2615 container_of(dev, struct nes_ib_device, ibdev.dev);
2616 struct nes_vnic *nesvnic = nesibdev->nesvnic;
2617
2618 nes_debug(NES_DBG_INIT, "\n");
2619 return sprintf(buf, "%u.%u\n",
2620 (nesvnic->nesdev->nesadapter->firmware_version >> 16),
2621 (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff));
2622}
2623
2624
2625/**
2626 * show_hca 2609 * show_hca
2627 */ 2610 */
2628static ssize_t show_hca(struct device *dev, struct device_attribute *attr, 2611static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
@@ -2645,13 +2628,11 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
2645 2628
2646 2629
2647static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 2630static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
2648static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
2649static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 2631static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
2650static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 2632static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
2651 2633
2652static struct device_attribute *nes_dev_attributes[] = { 2634static struct device_attribute *nes_dev_attributes[] = {
2653 &dev_attr_hw_rev, 2635 &dev_attr_hw_rev,
2654 &dev_attr_fw_ver,
2655 &dev_attr_hca_type, 2636 &dev_attr_hca_type,
2656 &dev_attr_board_id 2637 &dev_attr_board_id
2657}; 2638};
@@ -3703,6 +3684,19 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num,
3703 return 0; 3684 return 0;
3704} 3685}
3705 3686
3687static void get_dev_fw_str(struct ib_device *dev, char *str,
3688 size_t str_len)
3689{
3690 struct nes_ib_device *nesibdev =
3691 container_of(dev, struct nes_ib_device, ibdev);
3692 struct nes_vnic *nesvnic = nesibdev->nesvnic;
3693
3694 nes_debug(NES_DBG_INIT, "\n");
3695 snprintf(str, str_len, "%u.%u",
3696 (nesvnic->nesdev->nesadapter->firmware_version >> 16),
3697 (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff));
3698}
3699
3706/** 3700/**
3707 * nes_init_ofa_device 3701 * nes_init_ofa_device
3708 */ 3702 */
@@ -3802,6 +3796,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
3802 nesibdev->ibdev.iwcm->create_listen = nes_create_listen; 3796 nesibdev->ibdev.iwcm->create_listen = nes_create_listen;
3803 nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; 3797 nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen;
3804 nesibdev->ibdev.get_port_immutable = nes_port_immutable; 3798 nesibdev->ibdev.get_port_immutable = nes_port_immutable;
3799 nesibdev->ibdev.get_dev_fw_str = get_dev_fw_str;
3805 memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name, 3800 memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name,
3806 sizeof(nesibdev->ibdev.iwcm->ifname)); 3801 sizeof(nesibdev->ibdev.iwcm->ifname));
3807 3802
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 3d75f65ce87e..07d0c6c5b046 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -107,6 +107,14 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
107 return 0; 107 return 0;
108} 108}
109 109
110static void get_dev_fw_str(struct ib_device *device, char *str,
111 size_t str_len)
112{
113 struct ocrdma_dev *dev = get_ocrdma_dev(device);
114
115 snprintf(str, str_len, "%s", &dev->attr.fw_ver[0]);
116}
117
110static int ocrdma_register_device(struct ocrdma_dev *dev) 118static int ocrdma_register_device(struct ocrdma_dev *dev)
111{ 119{
112 strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX); 120 strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX);
@@ -193,6 +201,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
193 201
194 dev->ibdev.process_mad = ocrdma_process_mad; 202 dev->ibdev.process_mad = ocrdma_process_mad;
195 dev->ibdev.get_port_immutable = ocrdma_port_immutable; 203 dev->ibdev.get_port_immutable = ocrdma_port_immutable;
204 dev->ibdev.get_dev_fw_str = get_dev_fw_str;
196 205
197 if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { 206 if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
198 dev->ibdev.uverbs_cmd_mask |= 207 dev->ibdev.uverbs_cmd_mask |=
@@ -262,14 +271,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
262 return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); 271 return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
263} 272}
264 273
265static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
266 char *buf)
267{
268 struct ocrdma_dev *dev = dev_get_drvdata(device);
269
270 return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->attr.fw_ver[0]);
271}
272
273static ssize_t show_hca_type(struct device *device, 274static ssize_t show_hca_type(struct device *device,
274 struct device_attribute *attr, char *buf) 275 struct device_attribute *attr, char *buf)
275{ 276{
@@ -279,12 +280,10 @@ static ssize_t show_hca_type(struct device *device,
279} 280}
280 281
281static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 282static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
282static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
283static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL); 283static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
284 284
285static struct device_attribute *ocrdma_attributes[] = { 285static struct device_attribute *ocrdma_attributes[] = {
286 &dev_attr_hw_rev, 286 &dev_attr_hw_rev,
287 &dev_attr_fw_ver,
288 &dev_attr_hca_type 287 &dev_attr_hca_type
289}; 288};
290 289
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 565c881a44ba..c229b9f4a52d 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -331,6 +331,21 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num,
331 return 0; 331 return 0;
332} 332}
333 333
334static void usnic_get_dev_fw_str(struct ib_device *device,
335 char *str,
336 size_t str_len)
337{
338 struct usnic_ib_dev *us_ibdev =
339 container_of(device, struct usnic_ib_dev, ib_dev);
340 struct ethtool_drvinfo info;
341
342 mutex_lock(&us_ibdev->usdev_lock);
343 us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
344 mutex_unlock(&us_ibdev->usdev_lock);
345
346 snprintf(str, str_len, "%s", info.fw_version);
347}
348
334/* Start of PF discovery section */ 349/* Start of PF discovery section */
335static void *usnic_ib_device_add(struct pci_dev *dev) 350static void *usnic_ib_device_add(struct pci_dev *dev)
336{ 351{
@@ -414,6 +429,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
414 us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq; 429 us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq;
415 us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr; 430 us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr;
416 us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable; 431 us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable;
432 us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str;
417 433
418 434
419 if (ib_register_device(&us_ibdev->ib_dev, NULL)) 435 if (ib_register_device(&us_ibdev->ib_dev, NULL))
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index 3412ea06116e..80ef3f8998c8 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -45,21 +45,6 @@
45#include "usnic_ib_verbs.h" 45#include "usnic_ib_verbs.h"
46#include "usnic_log.h" 46#include "usnic_log.h"
47 47
48static ssize_t usnic_ib_show_fw_ver(struct device *device,
49 struct device_attribute *attr,
50 char *buf)
51{
52 struct usnic_ib_dev *us_ibdev =
53 container_of(device, struct usnic_ib_dev, ib_dev.dev);
54 struct ethtool_drvinfo info;
55
56 mutex_lock(&us_ibdev->usdev_lock);
57 us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
58 mutex_unlock(&us_ibdev->usdev_lock);
59
60 return scnprintf(buf, PAGE_SIZE, "%s\n", info.fw_version);
61}
62
63static ssize_t usnic_ib_show_board(struct device *device, 48static ssize_t usnic_ib_show_board(struct device *device,
64 struct device_attribute *attr, 49 struct device_attribute *attr,
65 char *buf) 50 char *buf)
@@ -192,7 +177,6 @@ usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
192 us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); 177 us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
193} 178}
194 179
195static DEVICE_ATTR(fw_ver, S_IRUGO, usnic_ib_show_fw_ver, NULL);
196static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL); 180static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL);
197static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL); 181static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL);
198static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL); 182static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL);
@@ -201,7 +185,6 @@ static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL);
201static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL); 185static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL);
202 186
203static struct device_attribute *usnic_class_attributes[] = { 187static struct device_attribute *usnic_class_attributes[] = {
204 &dev_attr_fw_ver,
205 &dev_attr_board_id, 188 &dev_attr_board_id,
206 &dev_attr_config, 189 &dev_attr_config,
207 &dev_attr_iface, 190 &dev_attr_iface,
diff --git a/drivers/infiniband/sw/Makefile b/drivers/infiniband/sw/Makefile
index 988b6a0101a4..8b095b27db87 100644
--- a/drivers/infiniband/sw/Makefile
+++ b/drivers/infiniband/sw/Makefile
@@ -1 +1,2 @@
1obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt/ 1obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt/
2obj-$(CONFIG_RDMA_RXE) += rxe/
diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig
index 11aa6a34bd71..1da8d01a6855 100644
--- a/drivers/infiniband/sw/rdmavt/Kconfig
+++ b/drivers/infiniband/sw/rdmavt/Kconfig
@@ -1,6 +1,5 @@
1config INFINIBAND_RDMAVT 1config INFINIBAND_RDMAVT
2 tristate "RDMA verbs transport library" 2 tristate "RDMA verbs transport library"
3 depends on 64BIT 3 depends on 64BIT
4 default m
5 ---help--- 4 ---help---
6 This is a common software verbs provider for RDMA networks. 5 This is a common software verbs provider for RDMA networks.
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
new file mode 100644
index 000000000000..1e4e628fe7b0
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -0,0 +1,24 @@
1config RDMA_RXE
2 tristate "Software RDMA over Ethernet (RoCE) driver"
3 depends on INET && PCI && INFINIBAND
4 depends on NET_UDP_TUNNEL
5 ---help---
6 This driver implements the InfiniBand RDMA transport over
7 the Linux network stack. It enables a system with a
8 standard Ethernet adapter to interoperate with a RoCE
9 adapter or with another system running the RXE driver.
10 Documentation on InfiniBand and RoCE can be downloaded at
11 www.infinibandta.org and www.openfabrics.org. (See also
12 siw which is a similar software driver for iWARP.)
13
14 The driver is split into two layers, one interfaces with the
15 Linux RDMA stack and implements a kernel or user space
16 verbs API. The user space verbs API requires a support
17 library named librxe which is loaded by the generic user
18 space verbs API, libibverbs. The other layer interfaces
19 with the Linux network stack at layer 3.
20
21 To configure and work with soft-RoCE driver please use the
22 following wiki page under "configure Soft-RoCE (RXE)" section:
23
24 https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile
new file mode 100644
index 000000000000..3b3fb9d1c470
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/Makefile
@@ -0,0 +1,24 @@
1obj-$(CONFIG_RDMA_RXE) += rdma_rxe.o
2
3rdma_rxe-y := \
4 rxe.o \
5 rxe_comp.o \
6 rxe_req.o \
7 rxe_resp.o \
8 rxe_recv.o \
9 rxe_pool.o \
10 rxe_queue.o \
11 rxe_verbs.o \
12 rxe_av.o \
13 rxe_srq.o \
14 rxe_qp.o \
15 rxe_cq.o \
16 rxe_mr.o \
17 rxe_dma.o \
18 rxe_opcode.o \
19 rxe_mmap.o \
20 rxe_icrc.o \
21 rxe_mcast.o \
22 rxe_task.o \
23 rxe_net.o \
24 rxe_sysfs.o
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
new file mode 100644
index 000000000000..55f0e8f0ca79
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -0,0 +1,386 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include "rxe.h"
35#include "rxe_loc.h"
36
37MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib");
38MODULE_DESCRIPTION("Soft RDMA transport");
39MODULE_LICENSE("Dual BSD/GPL");
40MODULE_VERSION("0.2");
41
42/* free resources for all ports on a device */
43static void rxe_cleanup_ports(struct rxe_dev *rxe)
44{
45 kfree(rxe->port.pkey_tbl);
46 rxe->port.pkey_tbl = NULL;
47
48}
49
50/* free resources for a rxe device all objects created for this device must
51 * have been destroyed
52 */
53static void rxe_cleanup(struct rxe_dev *rxe)
54{
55 rxe_pool_cleanup(&rxe->uc_pool);
56 rxe_pool_cleanup(&rxe->pd_pool);
57 rxe_pool_cleanup(&rxe->ah_pool);
58 rxe_pool_cleanup(&rxe->srq_pool);
59 rxe_pool_cleanup(&rxe->qp_pool);
60 rxe_pool_cleanup(&rxe->cq_pool);
61 rxe_pool_cleanup(&rxe->mr_pool);
62 rxe_pool_cleanup(&rxe->mw_pool);
63 rxe_pool_cleanup(&rxe->mc_grp_pool);
64 rxe_pool_cleanup(&rxe->mc_elem_pool);
65
66 rxe_cleanup_ports(rxe);
67}
68
69/* called when all references have been dropped */
70void rxe_release(struct kref *kref)
71{
72 struct rxe_dev *rxe = container_of(kref, struct rxe_dev, ref_cnt);
73
74 rxe_cleanup(rxe);
75 ib_dealloc_device(&rxe->ib_dev);
76}
77
78void rxe_dev_put(struct rxe_dev *rxe)
79{
80 kref_put(&rxe->ref_cnt, rxe_release);
81}
82EXPORT_SYMBOL_GPL(rxe_dev_put);
83
84/* initialize rxe device parameters */
85static int rxe_init_device_param(struct rxe_dev *rxe)
86{
87 rxe->max_inline_data = RXE_MAX_INLINE_DATA;
88
89 rxe->attr.fw_ver = RXE_FW_VER;
90 rxe->attr.max_mr_size = RXE_MAX_MR_SIZE;
91 rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP;
92 rxe->attr.vendor_id = RXE_VENDOR_ID;
93 rxe->attr.vendor_part_id = RXE_VENDOR_PART_ID;
94 rxe->attr.hw_ver = RXE_HW_VER;
95 rxe->attr.max_qp = RXE_MAX_QP;
96 rxe->attr.max_qp_wr = RXE_MAX_QP_WR;
97 rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS;
98 rxe->attr.max_sge = RXE_MAX_SGE;
99 rxe->attr.max_sge_rd = RXE_MAX_SGE_RD;
100 rxe->attr.max_cq = RXE_MAX_CQ;
101 rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1;
102 rxe->attr.max_mr = RXE_MAX_MR;
103 rxe->attr.max_pd = RXE_MAX_PD;
104 rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM;
105 rxe->attr.max_ee_rd_atom = RXE_MAX_EE_RD_ATOM;
106 rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM;
107 rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM;
108 rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM;
109 rxe->attr.atomic_cap = RXE_ATOMIC_CAP;
110 rxe->attr.max_ee = RXE_MAX_EE;
111 rxe->attr.max_rdd = RXE_MAX_RDD;
112 rxe->attr.max_mw = RXE_MAX_MW;
113 rxe->attr.max_raw_ipv6_qp = RXE_MAX_RAW_IPV6_QP;
114 rxe->attr.max_raw_ethy_qp = RXE_MAX_RAW_ETHY_QP;
115 rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP;
116 rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH;
117 rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH;
118 rxe->attr.max_ah = RXE_MAX_AH;
119 rxe->attr.max_fmr = RXE_MAX_FMR;
120 rxe->attr.max_map_per_fmr = RXE_MAX_MAP_PER_FMR;
121 rxe->attr.max_srq = RXE_MAX_SRQ;
122 rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR;
123 rxe->attr.max_srq_sge = RXE_MAX_SRQ_SGE;
124 rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN;
125 rxe->attr.max_pkeys = RXE_MAX_PKEYS;
126 rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY;
127
128 rxe->max_ucontext = RXE_MAX_UCONTEXT;
129
130 return 0;
131}
132
133/* initialize port attributes */
134static int rxe_init_port_param(struct rxe_port *port)
135{
136 port->attr.state = RXE_PORT_STATE;
137 port->attr.max_mtu = RXE_PORT_MAX_MTU;
138 port->attr.active_mtu = RXE_PORT_ACTIVE_MTU;
139 port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN;
140 port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS;
141 port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ;
142 port->attr.bad_pkey_cntr = RXE_PORT_BAD_PKEY_CNTR;
143 port->attr.qkey_viol_cntr = RXE_PORT_QKEY_VIOL_CNTR;
144 port->attr.pkey_tbl_len = RXE_PORT_PKEY_TBL_LEN;
145 port->attr.lid = RXE_PORT_LID;
146 port->attr.sm_lid = RXE_PORT_SM_LID;
147 port->attr.lmc = RXE_PORT_LMC;
148 port->attr.max_vl_num = RXE_PORT_MAX_VL_NUM;
149 port->attr.sm_sl = RXE_PORT_SM_SL;
150 port->attr.subnet_timeout = RXE_PORT_SUBNET_TIMEOUT;
151 port->attr.init_type_reply = RXE_PORT_INIT_TYPE_REPLY;
152 port->attr.active_width = RXE_PORT_ACTIVE_WIDTH;
153 port->attr.active_speed = RXE_PORT_ACTIVE_SPEED;
154 port->attr.phys_state = RXE_PORT_PHYS_STATE;
155 port->mtu_cap =
156 ib_mtu_enum_to_int(RXE_PORT_ACTIVE_MTU);
157 port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX);
158
159 return 0;
160}
161
162/* initialize port state, note IB convention that HCA ports are always
163 * numbered from 1
164 */
165static int rxe_init_ports(struct rxe_dev *rxe)
166{
167 struct rxe_port *port = &rxe->port;
168
169 rxe_init_port_param(port);
170
171 if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len)
172 return -EINVAL;
173
174 port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len,
175 sizeof(*port->pkey_tbl), GFP_KERNEL);
176
177 if (!port->pkey_tbl)
178 return -ENOMEM;
179
180 port->pkey_tbl[0] = 0xffff;
181 port->port_guid = rxe->ifc_ops->port_guid(rxe);
182
183 spin_lock_init(&port->port_lock);
184
185 return 0;
186}
187
188/* init pools of managed objects */
189static int rxe_init_pools(struct rxe_dev *rxe)
190{
191 int err;
192
193 err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC,
194 rxe->max_ucontext);
195 if (err)
196 goto err1;
197
198 err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD,
199 rxe->attr.max_pd);
200 if (err)
201 goto err2;
202
203 err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH,
204 rxe->attr.max_ah);
205 if (err)
206 goto err3;
207
208 err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ,
209 rxe->attr.max_srq);
210 if (err)
211 goto err4;
212
213 err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP,
214 rxe->attr.max_qp);
215 if (err)
216 goto err5;
217
218 err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ,
219 rxe->attr.max_cq);
220 if (err)
221 goto err6;
222
223 err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR,
224 rxe->attr.max_mr);
225 if (err)
226 goto err7;
227
228 err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW,
229 rxe->attr.max_mw);
230 if (err)
231 goto err8;
232
233 err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP,
234 rxe->attr.max_mcast_grp);
235 if (err)
236 goto err9;
237
238 err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM,
239 rxe->attr.max_total_mcast_qp_attach);
240 if (err)
241 goto err10;
242
243 return 0;
244
245err10:
246 rxe_pool_cleanup(&rxe->mc_grp_pool);
247err9:
248 rxe_pool_cleanup(&rxe->mw_pool);
249err8:
250 rxe_pool_cleanup(&rxe->mr_pool);
251err7:
252 rxe_pool_cleanup(&rxe->cq_pool);
253err6:
254 rxe_pool_cleanup(&rxe->qp_pool);
255err5:
256 rxe_pool_cleanup(&rxe->srq_pool);
257err4:
258 rxe_pool_cleanup(&rxe->ah_pool);
259err3:
260 rxe_pool_cleanup(&rxe->pd_pool);
261err2:
262 rxe_pool_cleanup(&rxe->uc_pool);
263err1:
264 return err;
265}
266
267/* initialize rxe device state */
268static int rxe_init(struct rxe_dev *rxe)
269{
270 int err;
271
272 /* init default device parameters */
273 rxe_init_device_param(rxe);
274
275 err = rxe_init_ports(rxe);
276 if (err)
277 goto err1;
278
279 err = rxe_init_pools(rxe);
280 if (err)
281 goto err2;
282
283 /* init pending mmap list */
284 spin_lock_init(&rxe->mmap_offset_lock);
285 spin_lock_init(&rxe->pending_lock);
286 INIT_LIST_HEAD(&rxe->pending_mmaps);
287 INIT_LIST_HEAD(&rxe->list);
288
289 mutex_init(&rxe->usdev_lock);
290
291 return 0;
292
293err2:
294 rxe_cleanup_ports(rxe);
295err1:
296 return err;
297}
298
299int rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
300{
301 struct rxe_port *port = &rxe->port;
302 enum ib_mtu mtu;
303
304 mtu = eth_mtu_int_to_enum(ndev_mtu);
305
306 /* Make sure that new MTU in range */
307 mtu = mtu ? min_t(enum ib_mtu, mtu, RXE_PORT_MAX_MTU) : IB_MTU_256;
308
309 port->attr.active_mtu = mtu;
310 port->mtu_cap = ib_mtu_enum_to_int(mtu);
311
312 return 0;
313}
314EXPORT_SYMBOL(rxe_set_mtu);
315
316/* called by ifc layer to create new rxe device.
317 * The caller should allocate memory for rxe by calling ib_alloc_device.
318 */
319int rxe_add(struct rxe_dev *rxe, unsigned int mtu)
320{
321 int err;
322
323 kref_init(&rxe->ref_cnt);
324
325 err = rxe_init(rxe);
326 if (err)
327 goto err1;
328
329 err = rxe_set_mtu(rxe, mtu);
330 if (err)
331 goto err1;
332
333 err = rxe_register_device(rxe);
334 if (err)
335 goto err1;
336
337 return 0;
338
339err1:
340 rxe_dev_put(rxe);
341 return err;
342}
343EXPORT_SYMBOL(rxe_add);
344
345/* called by the ifc layer to remove a device */
346void rxe_remove(struct rxe_dev *rxe)
347{
348 rxe_unregister_device(rxe);
349
350 rxe_dev_put(rxe);
351}
352EXPORT_SYMBOL(rxe_remove);
353
354static int __init rxe_module_init(void)
355{
356 int err;
357
358 /* initialize slab caches for managed objects */
359 err = rxe_cache_init();
360 if (err) {
361 pr_err("rxe: unable to init object pools\n");
362 return err;
363 }
364
365 err = rxe_net_init();
366 if (err) {
367 pr_err("rxe: unable to init\n");
368 rxe_cache_exit();
369 return err;
370 }
371 pr_info("rxe: loaded\n");
372
373 return 0;
374}
375
376static void __exit rxe_module_exit(void)
377{
378 rxe_remove_all();
379 rxe_net_exit();
380 rxe_cache_exit();
381
382 pr_info("rxe: unloaded\n");
383}
384
385module_init(rxe_module_init);
386module_exit(rxe_module_exit);
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
new file mode 100644
index 000000000000..12c71c549f97
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -0,0 +1,77 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef RXE_H
35#define RXE_H
36
37#include <linux/module.h>
38#include <linux/skbuff.h>
39#include <linux/crc32.h>
40
41#include <rdma/ib_verbs.h>
42#include <rdma/ib_user_verbs.h>
43#include <rdma/ib_pack.h>
44#include <rdma/ib_smi.h>
45#include <rdma/ib_umem.h>
46#include <rdma/ib_cache.h>
47#include <rdma/ib_addr.h>
48
49#include "rxe_net.h"
50#include "rxe_opcode.h"
51#include "rxe_hdr.h"
52#include "rxe_param.h"
53#include "rxe_verbs.h"
54
55#define RXE_UVERBS_ABI_VERSION (1)
56
57#define IB_PHYS_STATE_LINK_UP (5)
58#define IB_PHYS_STATE_LINK_DOWN (3)
59
60#define RXE_ROCE_V2_SPORT (0xc000)
61
62int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
63
64int rxe_add(struct rxe_dev *rxe, unsigned int mtu);
65void rxe_remove(struct rxe_dev *rxe);
66void rxe_remove_all(void);
67
68int rxe_rcv(struct sk_buff *skb);
69
70void rxe_dev_put(struct rxe_dev *rxe);
71struct rxe_dev *net_to_rxe(struct net_device *ndev);
72struct rxe_dev *get_rxe_by_name(const char* name);
73
74void rxe_port_up(struct rxe_dev *rxe);
75void rxe_port_down(struct rxe_dev *rxe);
76
77#endif /* RXE_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
new file mode 100644
index 000000000000..5c9474212d4e
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -0,0 +1,98 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include "rxe.h"
35#include "rxe_loc.h"
36
37int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr)
38{
39 struct rxe_port *port;
40
41 if (attr->port_num != 1) {
42 pr_info("rxe: invalid port_num = %d\n", attr->port_num);
43 return -EINVAL;
44 }
45
46 port = &rxe->port;
47
48 if (attr->ah_flags & IB_AH_GRH) {
49 if (attr->grh.sgid_index > port->attr.gid_tbl_len) {
50 pr_info("rxe: invalid sgid index = %d\n",
51 attr->grh.sgid_index);
52 return -EINVAL;
53 }
54 }
55
56 return 0;
57}
58
59int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num,
60 struct rxe_av *av, struct ib_ah_attr *attr)
61{
62 memset(av, 0, sizeof(*av));
63 memcpy(&av->grh, &attr->grh, sizeof(attr->grh));
64 av->port_num = port_num;
65 return 0;
66}
67
68int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av,
69 struct ib_ah_attr *attr)
70{
71 memcpy(&attr->grh, &av->grh, sizeof(av->grh));
72 attr->port_num = av->port_num;
73 return 0;
74}
75
76int rxe_av_fill_ip_info(struct rxe_dev *rxe,
77 struct rxe_av *av,
78 struct ib_ah_attr *attr,
79 struct ib_gid_attr *sgid_attr,
80 union ib_gid *sgid)
81{
82 rdma_gid2ip(&av->sgid_addr._sockaddr, sgid);
83 rdma_gid2ip(&av->dgid_addr._sockaddr, &attr->grh.dgid);
84 av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid);
85
86 return 0;
87}
88
89struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt)
90{
91 if (!pkt || !pkt->qp)
92 return NULL;
93
94 if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC)
95 return &pkt->qp->pri_av;
96
97 return (pkt->wqe) ? &pkt->wqe->av : NULL;
98}
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
new file mode 100644
index 000000000000..36f67de44095
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -0,0 +1,734 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/skbuff.h>
35
36#include "rxe.h"
37#include "rxe_loc.h"
38#include "rxe_queue.h"
39#include "rxe_task.h"
40
41enum comp_state {
42 COMPST_GET_ACK,
43 COMPST_GET_WQE,
44 COMPST_COMP_WQE,
45 COMPST_COMP_ACK,
46 COMPST_CHECK_PSN,
47 COMPST_CHECK_ACK,
48 COMPST_READ,
49 COMPST_ATOMIC,
50 COMPST_WRITE_SEND,
51 COMPST_UPDATE_COMP,
52 COMPST_ERROR_RETRY,
53 COMPST_RNR_RETRY,
54 COMPST_ERROR,
55 COMPST_EXIT, /* We have an issue, and we want to rerun the completer */
56 COMPST_DONE, /* The completer finished successflly */
57};
58
59static char *comp_state_name[] = {
60 [COMPST_GET_ACK] = "GET ACK",
61 [COMPST_GET_WQE] = "GET WQE",
62 [COMPST_COMP_WQE] = "COMP WQE",
63 [COMPST_COMP_ACK] = "COMP ACK",
64 [COMPST_CHECK_PSN] = "CHECK PSN",
65 [COMPST_CHECK_ACK] = "CHECK ACK",
66 [COMPST_READ] = "READ",
67 [COMPST_ATOMIC] = "ATOMIC",
68 [COMPST_WRITE_SEND] = "WRITE/SEND",
69 [COMPST_UPDATE_COMP] = "UPDATE COMP",
70 [COMPST_ERROR_RETRY] = "ERROR RETRY",
71 [COMPST_RNR_RETRY] = "RNR RETRY",
72 [COMPST_ERROR] = "ERROR",
73 [COMPST_EXIT] = "EXIT",
74 [COMPST_DONE] = "DONE",
75};
76
77static unsigned long rnrnak_usec[32] = {
78 [IB_RNR_TIMER_655_36] = 655360,
79 [IB_RNR_TIMER_000_01] = 10,
80 [IB_RNR_TIMER_000_02] = 20,
81 [IB_RNR_TIMER_000_03] = 30,
82 [IB_RNR_TIMER_000_04] = 40,
83 [IB_RNR_TIMER_000_06] = 60,
84 [IB_RNR_TIMER_000_08] = 80,
85 [IB_RNR_TIMER_000_12] = 120,
86 [IB_RNR_TIMER_000_16] = 160,
87 [IB_RNR_TIMER_000_24] = 240,
88 [IB_RNR_TIMER_000_32] = 320,
89 [IB_RNR_TIMER_000_48] = 480,
90 [IB_RNR_TIMER_000_64] = 640,
91 [IB_RNR_TIMER_000_96] = 960,
92 [IB_RNR_TIMER_001_28] = 1280,
93 [IB_RNR_TIMER_001_92] = 1920,
94 [IB_RNR_TIMER_002_56] = 2560,
95 [IB_RNR_TIMER_003_84] = 3840,
96 [IB_RNR_TIMER_005_12] = 5120,
97 [IB_RNR_TIMER_007_68] = 7680,
98 [IB_RNR_TIMER_010_24] = 10240,
99 [IB_RNR_TIMER_015_36] = 15360,
100 [IB_RNR_TIMER_020_48] = 20480,
101 [IB_RNR_TIMER_030_72] = 30720,
102 [IB_RNR_TIMER_040_96] = 40960,
103 [IB_RNR_TIMER_061_44] = 61410,
104 [IB_RNR_TIMER_081_92] = 81920,
105 [IB_RNR_TIMER_122_88] = 122880,
106 [IB_RNR_TIMER_163_84] = 163840,
107 [IB_RNR_TIMER_245_76] = 245760,
108 [IB_RNR_TIMER_327_68] = 327680,
109 [IB_RNR_TIMER_491_52] = 491520,
110};
111
112static inline unsigned long rnrnak_jiffies(u8 timeout)
113{
114 return max_t(unsigned long,
115 usecs_to_jiffies(rnrnak_usec[timeout]), 1);
116}
117
118static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
119{
120 switch (opcode) {
121 case IB_WR_RDMA_WRITE: return IB_WC_RDMA_WRITE;
122 case IB_WR_RDMA_WRITE_WITH_IMM: return IB_WC_RDMA_WRITE;
123 case IB_WR_SEND: return IB_WC_SEND;
124 case IB_WR_SEND_WITH_IMM: return IB_WC_SEND;
125 case IB_WR_RDMA_READ: return IB_WC_RDMA_READ;
126 case IB_WR_ATOMIC_CMP_AND_SWP: return IB_WC_COMP_SWAP;
127 case IB_WR_ATOMIC_FETCH_AND_ADD: return IB_WC_FETCH_ADD;
128 case IB_WR_LSO: return IB_WC_LSO;
129 case IB_WR_SEND_WITH_INV: return IB_WC_SEND;
130 case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ;
131 case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV;
132 case IB_WR_REG_MR: return IB_WC_REG_MR;
133
134 default:
135 return 0xff;
136 }
137}
138
139void retransmit_timer(unsigned long data)
140{
141 struct rxe_qp *qp = (struct rxe_qp *)data;
142
143 if (qp->valid) {
144 qp->comp.timeout = 1;
145 rxe_run_task(&qp->comp.task, 1);
146 }
147}
148
149void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp,
150 struct sk_buff *skb)
151{
152 int must_sched;
153
154 skb_queue_tail(&qp->resp_pkts, skb);
155
156 must_sched = skb_queue_len(&qp->resp_pkts) > 1;
157 rxe_run_task(&qp->comp.task, must_sched);
158}
159
160static inline enum comp_state get_wqe(struct rxe_qp *qp,
161 struct rxe_pkt_info *pkt,
162 struct rxe_send_wqe **wqe_p)
163{
164 struct rxe_send_wqe *wqe;
165
166 /* we come here whether or not we found a response packet to see if
167 * there are any posted WQEs
168 */
169 wqe = queue_head(qp->sq.queue);
170 *wqe_p = wqe;
171
172 /* no WQE or requester has not started it yet */
173 if (!wqe || wqe->state == wqe_state_posted)
174 return pkt ? COMPST_DONE : COMPST_EXIT;
175
176 /* WQE does not require an ack */
177 if (wqe->state == wqe_state_done)
178 return COMPST_COMP_WQE;
179
180 /* WQE caused an error */
181 if (wqe->state == wqe_state_error)
182 return COMPST_ERROR;
183
184 /* we have a WQE, if we also have an ack check its PSN */
185 return pkt ? COMPST_CHECK_PSN : COMPST_EXIT;
186}
187
188static inline void reset_retry_counters(struct rxe_qp *qp)
189{
190 qp->comp.retry_cnt = qp->attr.retry_cnt;
191 qp->comp.rnr_retry = qp->attr.rnr_retry;
192}
193
194static inline enum comp_state check_psn(struct rxe_qp *qp,
195 struct rxe_pkt_info *pkt,
196 struct rxe_send_wqe *wqe)
197{
198 s32 diff;
199
200 /* check to see if response is past the oldest WQE. if it is, complete
201 * send/write or error read/atomic
202 */
203 diff = psn_compare(pkt->psn, wqe->last_psn);
204 if (diff > 0) {
205 if (wqe->state == wqe_state_pending) {
206 if (wqe->mask & WR_ATOMIC_OR_READ_MASK)
207 return COMPST_ERROR_RETRY;
208
209 reset_retry_counters(qp);
210 return COMPST_COMP_WQE;
211 } else {
212 return COMPST_DONE;
213 }
214 }
215
216 /* compare response packet to expected response */
217 diff = psn_compare(pkt->psn, qp->comp.psn);
218 if (diff < 0) {
219 /* response is most likely a retried packet if it matches an
220 * uncompleted WQE go complete it else ignore it
221 */
222 if (pkt->psn == wqe->last_psn)
223 return COMPST_COMP_ACK;
224 else
225 return COMPST_DONE;
226 } else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) {
227 return COMPST_ERROR_RETRY;
228 } else {
229 return COMPST_CHECK_ACK;
230 }
231}
232
233static inline enum comp_state check_ack(struct rxe_qp *qp,
234 struct rxe_pkt_info *pkt,
235 struct rxe_send_wqe *wqe)
236{
237 unsigned int mask = pkt->mask;
238 u8 syn;
239
240 /* Check the sequence only */
241 switch (qp->comp.opcode) {
242 case -1:
243 /* Will catch all *_ONLY cases. */
244 if (!(mask & RXE_START_MASK))
245 return COMPST_ERROR;
246
247 break;
248
249 case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST:
250 case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
251 if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE &&
252 pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) {
253 return COMPST_ERROR;
254 }
255 break;
256 default:
257 WARN_ON(1);
258 }
259
260 /* Check operation validity. */
261 switch (pkt->opcode) {
262 case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST:
263 case IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST:
264 case IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY:
265 syn = aeth_syn(pkt);
266
267 if ((syn & AETH_TYPE_MASK) != AETH_ACK)
268 return COMPST_ERROR;
269
270 /* Fall through (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
271 * doesn't have an AETH)
272 */
273 case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
274 if (wqe->wr.opcode != IB_WR_RDMA_READ &&
275 wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) {
276 return COMPST_ERROR;
277 }
278 reset_retry_counters(qp);
279 return COMPST_READ;
280
281 case IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE:
282 syn = aeth_syn(pkt);
283
284 if ((syn & AETH_TYPE_MASK) != AETH_ACK)
285 return COMPST_ERROR;
286
287 if (wqe->wr.opcode != IB_WR_ATOMIC_CMP_AND_SWP &&
288 wqe->wr.opcode != IB_WR_ATOMIC_FETCH_AND_ADD)
289 return COMPST_ERROR;
290 reset_retry_counters(qp);
291 return COMPST_ATOMIC;
292
293 case IB_OPCODE_RC_ACKNOWLEDGE:
294 syn = aeth_syn(pkt);
295 switch (syn & AETH_TYPE_MASK) {
296 case AETH_ACK:
297 reset_retry_counters(qp);
298 return COMPST_WRITE_SEND;
299
300 case AETH_RNR_NAK:
301 return COMPST_RNR_RETRY;
302
303 case AETH_NAK:
304 switch (syn) {
305 case AETH_NAK_PSN_SEQ_ERROR:
306 /* a nak implicitly acks all packets with psns
307 * before
308 */
309 if (psn_compare(pkt->psn, qp->comp.psn) > 0) {
310 qp->comp.psn = pkt->psn;
311 if (qp->req.wait_psn) {
312 qp->req.wait_psn = 0;
313 rxe_run_task(&qp->req.task, 1);
314 }
315 }
316 return COMPST_ERROR_RETRY;
317
318 case AETH_NAK_INVALID_REQ:
319 wqe->status = IB_WC_REM_INV_REQ_ERR;
320 return COMPST_ERROR;
321
322 case AETH_NAK_REM_ACC_ERR:
323 wqe->status = IB_WC_REM_ACCESS_ERR;
324 return COMPST_ERROR;
325
326 case AETH_NAK_REM_OP_ERR:
327 wqe->status = IB_WC_REM_OP_ERR;
328 return COMPST_ERROR;
329
330 default:
331 pr_warn("unexpected nak %x\n", syn);
332 wqe->status = IB_WC_REM_OP_ERR;
333 return COMPST_ERROR;
334 }
335
336 default:
337 return COMPST_ERROR;
338 }
339 break;
340
341 default:
342 pr_warn("unexpected opcode\n");
343 }
344
345 return COMPST_ERROR;
346}
347
348static inline enum comp_state do_read(struct rxe_qp *qp,
349 struct rxe_pkt_info *pkt,
350 struct rxe_send_wqe *wqe)
351{
352 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
353 int ret;
354
355 ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE,
356 &wqe->dma, payload_addr(pkt),
357 payload_size(pkt), to_mem_obj, NULL);
358 if (ret)
359 return COMPST_ERROR;
360
361 if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK))
362 return COMPST_COMP_ACK;
363 else
364 return COMPST_UPDATE_COMP;
365}
366
367static inline enum comp_state do_atomic(struct rxe_qp *qp,
368 struct rxe_pkt_info *pkt,
369 struct rxe_send_wqe *wqe)
370{
371 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
372 int ret;
373
374 u64 atomic_orig = atmack_orig(pkt);
375
376 ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE,
377 &wqe->dma, &atomic_orig,
378 sizeof(u64), to_mem_obj, NULL);
379 if (ret)
380 return COMPST_ERROR;
381 else
382 return COMPST_COMP_ACK;
383}
384
385static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
386 struct rxe_cqe *cqe)
387{
388 memset(cqe, 0, sizeof(*cqe));
389
390 if (!qp->is_user) {
391 struct ib_wc *wc = &cqe->ibwc;
392
393 wc->wr_id = wqe->wr.wr_id;
394 wc->status = wqe->status;
395 wc->opcode = wr_to_wc_opcode(wqe->wr.opcode);
396 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
397 wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
398 wc->wc_flags = IB_WC_WITH_IMM;
399 wc->byte_len = wqe->dma.length;
400 wc->qp = &qp->ibqp;
401 } else {
402 struct ib_uverbs_wc *uwc = &cqe->uibwc;
403
404 uwc->wr_id = wqe->wr.wr_id;
405 uwc->status = wqe->status;
406 uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode);
407 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
408 wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
409 uwc->wc_flags = IB_WC_WITH_IMM;
410 uwc->byte_len = wqe->dma.length;
411 uwc->qp_num = qp->ibqp.qp_num;
412 }
413}
414
415static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
416{
417 struct rxe_cqe cqe;
418
419 if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) ||
420 (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
421 (qp->req.state == QP_STATE_ERROR)) {
422 make_send_cqe(qp, wqe, &cqe);
423 rxe_cq_post(qp->scq, &cqe, 0);
424 }
425
426 advance_consumer(qp->sq.queue);
427
428 /*
429 * we completed something so let req run again
430 * if it is trying to fence
431 */
432 if (qp->req.wait_fence) {
433 qp->req.wait_fence = 0;
434 rxe_run_task(&qp->req.task, 1);
435 }
436}
437
438static inline enum comp_state complete_ack(struct rxe_qp *qp,
439 struct rxe_pkt_info *pkt,
440 struct rxe_send_wqe *wqe)
441{
442 unsigned long flags;
443
444 if (wqe->has_rd_atomic) {
445 wqe->has_rd_atomic = 0;
446 atomic_inc(&qp->req.rd_atomic);
447 if (qp->req.need_rd_atomic) {
448 qp->comp.timeout_retry = 0;
449 qp->req.need_rd_atomic = 0;
450 rxe_run_task(&qp->req.task, 1);
451 }
452 }
453
454 if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
455 /* state_lock used by requester & completer */
456 spin_lock_irqsave(&qp->state_lock, flags);
457 if ((qp->req.state == QP_STATE_DRAIN) &&
458 (qp->comp.psn == qp->req.psn)) {
459 qp->req.state = QP_STATE_DRAINED;
460 spin_unlock_irqrestore(&qp->state_lock, flags);
461
462 if (qp->ibqp.event_handler) {
463 struct ib_event ev;
464
465 ev.device = qp->ibqp.device;
466 ev.element.qp = &qp->ibqp;
467 ev.event = IB_EVENT_SQ_DRAINED;
468 qp->ibqp.event_handler(&ev,
469 qp->ibqp.qp_context);
470 }
471 } else {
472 spin_unlock_irqrestore(&qp->state_lock, flags);
473 }
474 }
475
476 do_complete(qp, wqe);
477
478 if (psn_compare(pkt->psn, qp->comp.psn) >= 0)
479 return COMPST_UPDATE_COMP;
480 else
481 return COMPST_DONE;
482}
483
484static inline enum comp_state complete_wqe(struct rxe_qp *qp,
485 struct rxe_pkt_info *pkt,
486 struct rxe_send_wqe *wqe)
487{
488 qp->comp.opcode = -1;
489
490 if (pkt) {
491 if (psn_compare(pkt->psn, qp->comp.psn) >= 0)
492 qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
493
494 if (qp->req.wait_psn) {
495 qp->req.wait_psn = 0;
496 rxe_run_task(&qp->req.task, 1);
497 }
498 }
499
500 do_complete(qp, wqe);
501
502 return COMPST_GET_WQE;
503}
504
505int rxe_completer(void *arg)
506{
507 struct rxe_qp *qp = (struct rxe_qp *)arg;
508 struct rxe_send_wqe *wqe = wqe;
509 struct sk_buff *skb = NULL;
510 struct rxe_pkt_info *pkt = NULL;
511 enum comp_state state;
512
513 if (!qp->valid) {
514 while ((skb = skb_dequeue(&qp->resp_pkts))) {
515 rxe_drop_ref(qp);
516 kfree_skb(skb);
517 }
518 skb = NULL;
519 pkt = NULL;
520
521 while (queue_head(qp->sq.queue))
522 advance_consumer(qp->sq.queue);
523
524 goto exit;
525 }
526
527 if (qp->req.state == QP_STATE_ERROR) {
528 while ((skb = skb_dequeue(&qp->resp_pkts))) {
529 rxe_drop_ref(qp);
530 kfree_skb(skb);
531 }
532 skb = NULL;
533 pkt = NULL;
534
535 while ((wqe = queue_head(qp->sq.queue))) {
536 wqe->status = IB_WC_WR_FLUSH_ERR;
537 do_complete(qp, wqe);
538 }
539
540 goto exit;
541 }
542
543 if (qp->req.state == QP_STATE_RESET) {
544 while ((skb = skb_dequeue(&qp->resp_pkts))) {
545 rxe_drop_ref(qp);
546 kfree_skb(skb);
547 }
548 skb = NULL;
549 pkt = NULL;
550
551 while (queue_head(qp->sq.queue))
552 advance_consumer(qp->sq.queue);
553
554 goto exit;
555 }
556
557 if (qp->comp.timeout) {
558 qp->comp.timeout_retry = 1;
559 qp->comp.timeout = 0;
560 } else {
561 qp->comp.timeout_retry = 0;
562 }
563
564 if (qp->req.need_retry)
565 goto exit;
566
567 state = COMPST_GET_ACK;
568
569 while (1) {
570 pr_debug("state = %s\n", comp_state_name[state]);
571 switch (state) {
572 case COMPST_GET_ACK:
573 skb = skb_dequeue(&qp->resp_pkts);
574 if (skb) {
575 pkt = SKB_TO_PKT(skb);
576 qp->comp.timeout_retry = 0;
577 }
578 state = COMPST_GET_WQE;
579 break;
580
581 case COMPST_GET_WQE:
582 state = get_wqe(qp, pkt, &wqe);
583 break;
584
585 case COMPST_CHECK_PSN:
586 state = check_psn(qp, pkt, wqe);
587 break;
588
589 case COMPST_CHECK_ACK:
590 state = check_ack(qp, pkt, wqe);
591 break;
592
593 case COMPST_READ:
594 state = do_read(qp, pkt, wqe);
595 break;
596
597 case COMPST_ATOMIC:
598 state = do_atomic(qp, pkt, wqe);
599 break;
600
601 case COMPST_WRITE_SEND:
602 if (wqe->state == wqe_state_pending &&
603 wqe->last_psn == pkt->psn)
604 state = COMPST_COMP_ACK;
605 else
606 state = COMPST_UPDATE_COMP;
607 break;
608
609 case COMPST_COMP_ACK:
610 state = complete_ack(qp, pkt, wqe);
611 break;
612
613 case COMPST_COMP_WQE:
614 state = complete_wqe(qp, pkt, wqe);
615 break;
616
617 case COMPST_UPDATE_COMP:
618 if (pkt->mask & RXE_END_MASK)
619 qp->comp.opcode = -1;
620 else
621 qp->comp.opcode = pkt->opcode;
622
623 if (psn_compare(pkt->psn, qp->comp.psn) >= 0)
624 qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
625
626 if (qp->req.wait_psn) {
627 qp->req.wait_psn = 0;
628 rxe_run_task(&qp->req.task, 1);
629 }
630
631 state = COMPST_DONE;
632 break;
633
634 case COMPST_DONE:
635 if (pkt) {
636 rxe_drop_ref(pkt->qp);
637 kfree_skb(skb);
638 }
639 goto done;
640
641 case COMPST_EXIT:
642 if (qp->comp.timeout_retry && wqe) {
643 state = COMPST_ERROR_RETRY;
644 break;
645 }
646
647 /* re reset the timeout counter if
648 * (1) QP is type RC
649 * (2) the QP is alive
650 * (3) there is a packet sent by the requester that
651 * might be acked (we still might get spurious
652 * timeouts but try to keep them as few as possible)
653 * (4) the timeout parameter is set
654 */
655 if ((qp_type(qp) == IB_QPT_RC) &&
656 (qp->req.state == QP_STATE_READY) &&
657 (psn_compare(qp->req.psn, qp->comp.psn) > 0) &&
658 qp->qp_timeout_jiffies)
659 mod_timer(&qp->retrans_timer,
660 jiffies + qp->qp_timeout_jiffies);
661 goto exit;
662
663 case COMPST_ERROR_RETRY:
664 /* we come here if the retry timer fired and we did
665 * not receive a response packet. try to retry the send
666 * queue if that makes sense and the limits have not
667 * been exceeded. remember that some timeouts are
668 * spurious since we do not reset the timer but kick
669 * it down the road or let it expire
670 */
671
672 /* there is nothing to retry in this case */
673 if (!wqe || (wqe->state == wqe_state_posted))
674 goto exit;
675
676 if (qp->comp.retry_cnt > 0) {
677 if (qp->comp.retry_cnt != 7)
678 qp->comp.retry_cnt--;
679
680 /* no point in retrying if we have already
681 * seen the last ack that the requester could
682 * have caused
683 */
684 if (psn_compare(qp->req.psn,
685 qp->comp.psn) > 0) {
686 /* tell the requester to retry the
687 * send send queue next time around
688 */
689 qp->req.need_retry = 1;
690 rxe_run_task(&qp->req.task, 1);
691 }
692 goto exit;
693 } else {
694 wqe->status = IB_WC_RETRY_EXC_ERR;
695 state = COMPST_ERROR;
696 }
697 break;
698
699 case COMPST_RNR_RETRY:
700 if (qp->comp.rnr_retry > 0) {
701 if (qp->comp.rnr_retry != 7)
702 qp->comp.rnr_retry--;
703
704 qp->req.need_retry = 1;
705 pr_debug("set rnr nak timer\n");
706 mod_timer(&qp->rnr_nak_timer,
707 jiffies + rnrnak_jiffies(aeth_syn(pkt)
708 & ~AETH_TYPE_MASK));
709 goto exit;
710 } else {
711 wqe->status = IB_WC_RNR_RETRY_EXC_ERR;
712 state = COMPST_ERROR;
713 }
714 break;
715
716 case COMPST_ERROR:
717 do_complete(qp, wqe);
718 rxe_qp_error(qp);
719 goto exit;
720 }
721 }
722
723exit:
724 /* we come here if we are done with processing and want the task to
725 * exit from the loop calling us
726 */
727 return -EAGAIN;
728
729done:
730 /* we come here if we have processed a packet we want the task to call
731 * us again to see if there is anything else to do
732 */
733 return 0;
734}
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
new file mode 100644
index 000000000000..e5e6a5e7dee9
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -0,0 +1,165 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include "rxe.h"
35#include "rxe_loc.h"
36#include "rxe_queue.h"
37
38int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
39 int cqe, int comp_vector, struct ib_udata *udata)
40{
41 int count;
42
43 if (cqe <= 0) {
44 pr_warn("cqe(%d) <= 0\n", cqe);
45 goto err1;
46 }
47
48 if (cqe > rxe->attr.max_cqe) {
49 pr_warn("cqe(%d) > max_cqe(%d)\n",
50 cqe, rxe->attr.max_cqe);
51 goto err1;
52 }
53
54 if (cq) {
55 count = queue_count(cq->queue);
56 if (cqe < count) {
57 pr_warn("cqe(%d) < current # elements in queue (%d)",
58 cqe, count);
59 goto err1;
60 }
61 }
62
63 return 0;
64
65err1:
66 return -EINVAL;
67}
68
69static void rxe_send_complete(unsigned long data)
70{
71 struct rxe_cq *cq = (struct rxe_cq *)data;
72
73 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
74}
75
76int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
77 int comp_vector, struct ib_ucontext *context,
78 struct ib_udata *udata)
79{
80 int err;
81
82 cq->queue = rxe_queue_init(rxe, &cqe,
83 sizeof(struct rxe_cqe));
84 if (!cq->queue) {
85 pr_warn("unable to create cq\n");
86 return -ENOMEM;
87 }
88
89 err = do_mmap_info(rxe, udata, false, context, cq->queue->buf,
90 cq->queue->buf_size, &cq->queue->ip);
91 if (err) {
92 kvfree(cq->queue->buf);
93 kfree(cq->queue);
94 return err;
95 }
96
97 if (udata)
98 cq->is_user = 1;
99
100 tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq);
101
102 spin_lock_init(&cq->cq_lock);
103 cq->ibcq.cqe = cqe;
104 return 0;
105}
106
107int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, struct ib_udata *udata)
108{
109 int err;
110
111 err = rxe_queue_resize(cq->queue, (unsigned int *)&cqe,
112 sizeof(struct rxe_cqe),
113 cq->queue->ip ? cq->queue->ip->context : NULL,
114 udata, NULL, &cq->cq_lock);
115 if (!err)
116 cq->ibcq.cqe = cqe;
117
118 return err;
119}
120
121int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
122{
123 struct ib_event ev;
124 unsigned long flags;
125
126 spin_lock_irqsave(&cq->cq_lock, flags);
127
128 if (unlikely(queue_full(cq->queue))) {
129 spin_unlock_irqrestore(&cq->cq_lock, flags);
130 if (cq->ibcq.event_handler) {
131 ev.device = cq->ibcq.device;
132 ev.element.cq = &cq->ibcq;
133 ev.event = IB_EVENT_CQ_ERR;
134 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
135 }
136
137 return -EBUSY;
138 }
139
140 memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe));
141
142 /* make sure all changes to the CQ are written before we update the
143 * producer pointer
144 */
145 smp_wmb();
146
147 advance_producer(cq->queue);
148 spin_unlock_irqrestore(&cq->cq_lock, flags);
149
150 if ((cq->notify == IB_CQ_NEXT_COMP) ||
151 (cq->notify == IB_CQ_SOLICITED && solicited)) {
152 cq->notify = 0;
153 tasklet_schedule(&cq->comp_task);
154 }
155
156 return 0;
157}
158
159void rxe_cq_cleanup(void *arg)
160{
161 struct rxe_cq *cq = arg;
162
163 if (cq->queue)
164 rxe_queue_cleanup(cq->queue);
165}
diff --git a/drivers/infiniband/sw/rxe/rxe_dma.c b/drivers/infiniband/sw/rxe/rxe_dma.c
new file mode 100644
index 000000000000..7634c1a81b2b
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_dma.c
@@ -0,0 +1,166 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include "rxe.h"
35#include "rxe_loc.h"
36
37#define DMA_BAD_ADDER ((u64)0)
38
39static int rxe_mapping_error(struct ib_device *dev, u64 dma_addr)
40{
41 return dma_addr == DMA_BAD_ADDER;
42}
43
44static u64 rxe_dma_map_single(struct ib_device *dev,
45 void *cpu_addr, size_t size,
46 enum dma_data_direction direction)
47{
48 WARN_ON(!valid_dma_direction(direction));
49 return (uintptr_t)cpu_addr;
50}
51
52static void rxe_dma_unmap_single(struct ib_device *dev,
53 u64 addr, size_t size,
54 enum dma_data_direction direction)
55{
56 WARN_ON(!valid_dma_direction(direction));
57}
58
59static u64 rxe_dma_map_page(struct ib_device *dev,
60 struct page *page,
61 unsigned long offset,
62 size_t size, enum dma_data_direction direction)
63{
64 u64 addr;
65
66 WARN_ON(!valid_dma_direction(direction));
67
68 if (offset + size > PAGE_SIZE) {
69 addr = DMA_BAD_ADDER;
70 goto done;
71 }
72
73 addr = (uintptr_t)page_address(page);
74 if (addr)
75 addr += offset;
76
77done:
78 return addr;
79}
80
81static void rxe_dma_unmap_page(struct ib_device *dev,
82 u64 addr, size_t size,
83 enum dma_data_direction direction)
84{
85 WARN_ON(!valid_dma_direction(direction));
86}
87
88static int rxe_map_sg(struct ib_device *dev, struct scatterlist *sgl,
89 int nents, enum dma_data_direction direction)
90{
91 struct scatterlist *sg;
92 u64 addr;
93 int i;
94 int ret = nents;
95
96 WARN_ON(!valid_dma_direction(direction));
97
98 for_each_sg(sgl, sg, nents, i) {
99 addr = (uintptr_t)page_address(sg_page(sg));
100 if (!addr) {
101 ret = 0;
102 break;
103 }
104 sg->dma_address = addr + sg->offset;
105#ifdef CONFIG_NEED_SG_DMA_LENGTH
106 sg->dma_length = sg->length;
107#endif
108 }
109
110 return ret;
111}
112
113static void rxe_unmap_sg(struct ib_device *dev,
114 struct scatterlist *sg, int nents,
115 enum dma_data_direction direction)
116{
117 WARN_ON(!valid_dma_direction(direction));
118}
119
120static void rxe_sync_single_for_cpu(struct ib_device *dev,
121 u64 addr,
122 size_t size, enum dma_data_direction dir)
123{
124}
125
126static void rxe_sync_single_for_device(struct ib_device *dev,
127 u64 addr,
128 size_t size, enum dma_data_direction dir)
129{
130}
131
132static void *rxe_dma_alloc_coherent(struct ib_device *dev, size_t size,
133 u64 *dma_handle, gfp_t flag)
134{
135 struct page *p;
136 void *addr = NULL;
137
138 p = alloc_pages(flag, get_order(size));
139 if (p)
140 addr = page_address(p);
141
142 if (dma_handle)
143 *dma_handle = (uintptr_t)addr;
144
145 return addr;
146}
147
148static void rxe_dma_free_coherent(struct ib_device *dev, size_t size,
149 void *cpu_addr, u64 dma_handle)
150{
151 free_pages((unsigned long)cpu_addr, get_order(size));
152}
153
154struct ib_dma_mapping_ops rxe_dma_mapping_ops = {
155 .mapping_error = rxe_mapping_error,
156 .map_single = rxe_dma_map_single,
157 .unmap_single = rxe_dma_unmap_single,
158 .map_page = rxe_dma_map_page,
159 .unmap_page = rxe_dma_unmap_page,
160 .map_sg = rxe_map_sg,
161 .unmap_sg = rxe_unmap_sg,
162 .sync_single_for_cpu = rxe_sync_single_for_cpu,
163 .sync_single_for_device = rxe_sync_single_for_device,
164 .alloc_coherent = rxe_dma_alloc_coherent,
165 .free_coherent = rxe_dma_free_coherent
166};
diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h
new file mode 100644
index 000000000000..d57b5e956ceb
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_hdr.h
@@ -0,0 +1,952 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef RXE_HDR_H
35#define RXE_HDR_H
36
37/* extracted information about a packet carried in an sk_buff struct fits in
38 * the skbuff cb array. Must be at most 48 bytes. stored in control block of
39 * sk_buff for received packets.
40 */
41struct rxe_pkt_info {
42 struct rxe_dev *rxe; /* device that owns packet */
43 struct rxe_qp *qp; /* qp that owns packet */
44 struct rxe_send_wqe *wqe; /* send wqe */
45 u8 *hdr; /* points to bth */
46 u32 mask; /* useful info about pkt */
47 u32 psn; /* bth psn of packet */
48 u16 pkey_index; /* partition of pkt */
49 u16 paylen; /* length of bth - icrc */
50 u8 port_num; /* port pkt received on */
51 u8 opcode; /* bth opcode of packet */
52 u8 offset; /* bth offset from pkt->hdr */
53};
54
55/* Macros should be used only for received skb */
56#define SKB_TO_PKT(skb) ((struct rxe_pkt_info *)(skb)->cb)
57#define PKT_TO_SKB(pkt) container_of((void *)(pkt), struct sk_buff, cb)
58
59/*
60 * IBA header types and methods
61 *
62 * Some of these are for reference and completeness only since
63 * rxe does not currently support RD transport
64 * most of this could be moved into IB core. ib_pack.h has
65 * part of this but is incomplete
66 *
67 * Header specific routines to insert/extract values to/from headers
68 * the routines that are named __hhh_(set_)fff() take a pointer to a
69 * hhh header and get(set) the fff field. The routines named
70 * hhh_(set_)fff take a packet info struct and find the
71 * header and field based on the opcode in the packet.
72 * Conversion to/from network byte order from cpu order is also done.
73 */
74
75#define RXE_ICRC_SIZE (4)
76#define RXE_MAX_HDR_LENGTH (80)
77
78/******************************************************************************
79 * Base Transport Header
80 ******************************************************************************/
81struct rxe_bth {
82 u8 opcode;
83 u8 flags;
84 __be16 pkey;
85 __be32 qpn;
86 __be32 apsn;
87};
88
89#define BTH_TVER (0)
90#define BTH_DEF_PKEY (0xffff)
91
92#define BTH_SE_MASK (0x80)
93#define BTH_MIG_MASK (0x40)
94#define BTH_PAD_MASK (0x30)
95#define BTH_TVER_MASK (0x0f)
96#define BTH_FECN_MASK (0x80000000)
97#define BTH_BECN_MASK (0x40000000)
98#define BTH_RESV6A_MASK (0x3f000000)
99#define BTH_QPN_MASK (0x00ffffff)
100#define BTH_ACK_MASK (0x80000000)
101#define BTH_RESV7_MASK (0x7f000000)
102#define BTH_PSN_MASK (0x00ffffff)
103
104static inline u8 __bth_opcode(void *arg)
105{
106 struct rxe_bth *bth = arg;
107
108 return bth->opcode;
109}
110
111static inline void __bth_set_opcode(void *arg, u8 opcode)
112{
113 struct rxe_bth *bth = arg;
114
115 bth->opcode = opcode;
116}
117
118static inline u8 __bth_se(void *arg)
119{
120 struct rxe_bth *bth = arg;
121
122 return 0 != (BTH_SE_MASK & bth->flags);
123}
124
125static inline void __bth_set_se(void *arg, int se)
126{
127 struct rxe_bth *bth = arg;
128
129 if (se)
130 bth->flags |= BTH_SE_MASK;
131 else
132 bth->flags &= ~BTH_SE_MASK;
133}
134
135static inline u8 __bth_mig(void *arg)
136{
137 struct rxe_bth *bth = arg;
138
139 return 0 != (BTH_MIG_MASK & bth->flags);
140}
141
142static inline void __bth_set_mig(void *arg, u8 mig)
143{
144 struct rxe_bth *bth = arg;
145
146 if (mig)
147 bth->flags |= BTH_MIG_MASK;
148 else
149 bth->flags &= ~BTH_MIG_MASK;
150}
151
152static inline u8 __bth_pad(void *arg)
153{
154 struct rxe_bth *bth = arg;
155
156 return (BTH_PAD_MASK & bth->flags) >> 4;
157}
158
159static inline void __bth_set_pad(void *arg, u8 pad)
160{
161 struct rxe_bth *bth = arg;
162
163 bth->flags = (BTH_PAD_MASK & (pad << 4)) |
164 (~BTH_PAD_MASK & bth->flags);
165}
166
167static inline u8 __bth_tver(void *arg)
168{
169 struct rxe_bth *bth = arg;
170
171 return BTH_TVER_MASK & bth->flags;
172}
173
174static inline void __bth_set_tver(void *arg, u8 tver)
175{
176 struct rxe_bth *bth = arg;
177
178 bth->flags = (BTH_TVER_MASK & tver) |
179 (~BTH_TVER_MASK & bth->flags);
180}
181
182static inline u16 __bth_pkey(void *arg)
183{
184 struct rxe_bth *bth = arg;
185
186 return be16_to_cpu(bth->pkey);
187}
188
189static inline void __bth_set_pkey(void *arg, u16 pkey)
190{
191 struct rxe_bth *bth = arg;
192
193 bth->pkey = cpu_to_be16(pkey);
194}
195
196static inline u32 __bth_qpn(void *arg)
197{
198 struct rxe_bth *bth = arg;
199
200 return BTH_QPN_MASK & be32_to_cpu(bth->qpn);
201}
202
203static inline void __bth_set_qpn(void *arg, u32 qpn)
204{
205 struct rxe_bth *bth = arg;
206 u32 resvqpn = be32_to_cpu(bth->qpn);
207
208 bth->qpn = cpu_to_be32((BTH_QPN_MASK & qpn) |
209 (~BTH_QPN_MASK & resvqpn));
210}
211
212static inline int __bth_fecn(void *arg)
213{
214 struct rxe_bth *bth = arg;
215
216 return 0 != (cpu_to_be32(BTH_FECN_MASK) & bth->qpn);
217}
218
219static inline void __bth_set_fecn(void *arg, int fecn)
220{
221 struct rxe_bth *bth = arg;
222
223 if (fecn)
224 bth->qpn |= cpu_to_be32(BTH_FECN_MASK);
225 else
226 bth->qpn &= ~cpu_to_be32(BTH_FECN_MASK);
227}
228
229static inline int __bth_becn(void *arg)
230{
231 struct rxe_bth *bth = arg;
232
233 return 0 != (cpu_to_be32(BTH_BECN_MASK) & bth->qpn);
234}
235
236static inline void __bth_set_becn(void *arg, int becn)
237{
238 struct rxe_bth *bth = arg;
239
240 if (becn)
241 bth->qpn |= cpu_to_be32(BTH_BECN_MASK);
242 else
243 bth->qpn &= ~cpu_to_be32(BTH_BECN_MASK);
244}
245
246static inline u8 __bth_resv6a(void *arg)
247{
248 struct rxe_bth *bth = arg;
249
250 return (BTH_RESV6A_MASK & be32_to_cpu(bth->qpn)) >> 24;
251}
252
253static inline void __bth_set_resv6a(void *arg)
254{
255 struct rxe_bth *bth = arg;
256
257 bth->qpn = cpu_to_be32(~BTH_RESV6A_MASK);
258}
259
260static inline int __bth_ack(void *arg)
261{
262 struct rxe_bth *bth = arg;
263
264 return 0 != (cpu_to_be32(BTH_ACK_MASK) & bth->apsn);
265}
266
267static inline void __bth_set_ack(void *arg, int ack)
268{
269 struct rxe_bth *bth = arg;
270
271 if (ack)
272 bth->apsn |= cpu_to_be32(BTH_ACK_MASK);
273 else
274 bth->apsn &= ~cpu_to_be32(BTH_ACK_MASK);
275}
276
277static inline void __bth_set_resv7(void *arg)
278{
279 struct rxe_bth *bth = arg;
280
281 bth->apsn &= ~cpu_to_be32(BTH_RESV7_MASK);
282}
283
284static inline u32 __bth_psn(void *arg)
285{
286 struct rxe_bth *bth = arg;
287
288 return BTH_PSN_MASK & be32_to_cpu(bth->apsn);
289}
290
291static inline void __bth_set_psn(void *arg, u32 psn)
292{
293 struct rxe_bth *bth = arg;
294 u32 apsn = be32_to_cpu(bth->apsn);
295
296 bth->apsn = cpu_to_be32((BTH_PSN_MASK & psn) |
297 (~BTH_PSN_MASK & apsn));
298}
299
300static inline u8 bth_opcode(struct rxe_pkt_info *pkt)
301{
302 return __bth_opcode(pkt->hdr + pkt->offset);
303}
304
305static inline void bth_set_opcode(struct rxe_pkt_info *pkt, u8 opcode)
306{
307 __bth_set_opcode(pkt->hdr + pkt->offset, opcode);
308}
309
310static inline u8 bth_se(struct rxe_pkt_info *pkt)
311{
312 return __bth_se(pkt->hdr + pkt->offset);
313}
314
315static inline void bth_set_se(struct rxe_pkt_info *pkt, int se)
316{
317 __bth_set_se(pkt->hdr + pkt->offset, se);
318}
319
320static inline u8 bth_mig(struct rxe_pkt_info *pkt)
321{
322 return __bth_mig(pkt->hdr + pkt->offset);
323}
324
325static inline void bth_set_mig(struct rxe_pkt_info *pkt, u8 mig)
326{
327 __bth_set_mig(pkt->hdr + pkt->offset, mig);
328}
329
330static inline u8 bth_pad(struct rxe_pkt_info *pkt)
331{
332 return __bth_pad(pkt->hdr + pkt->offset);
333}
334
335static inline void bth_set_pad(struct rxe_pkt_info *pkt, u8 pad)
336{
337 __bth_set_pad(pkt->hdr + pkt->offset, pad);
338}
339
340static inline u8 bth_tver(struct rxe_pkt_info *pkt)
341{
342 return __bth_tver(pkt->hdr + pkt->offset);
343}
344
345static inline void bth_set_tver(struct rxe_pkt_info *pkt, u8 tver)
346{
347 __bth_set_tver(pkt->hdr + pkt->offset, tver);
348}
349
350static inline u16 bth_pkey(struct rxe_pkt_info *pkt)
351{
352 return __bth_pkey(pkt->hdr + pkt->offset);
353}
354
355static inline void bth_set_pkey(struct rxe_pkt_info *pkt, u16 pkey)
356{
357 __bth_set_pkey(pkt->hdr + pkt->offset, pkey);
358}
359
360static inline u32 bth_qpn(struct rxe_pkt_info *pkt)
361{
362 return __bth_qpn(pkt->hdr + pkt->offset);
363}
364
365static inline void bth_set_qpn(struct rxe_pkt_info *pkt, u32 qpn)
366{
367 __bth_set_qpn(pkt->hdr + pkt->offset, qpn);
368}
369
370static inline int bth_fecn(struct rxe_pkt_info *pkt)
371{
372 return __bth_fecn(pkt->hdr + pkt->offset);
373}
374
375static inline void bth_set_fecn(struct rxe_pkt_info *pkt, int fecn)
376{
377 __bth_set_fecn(pkt->hdr + pkt->offset, fecn);
378}
379
380static inline int bth_becn(struct rxe_pkt_info *pkt)
381{
382 return __bth_becn(pkt->hdr + pkt->offset);
383}
384
385static inline void bth_set_becn(struct rxe_pkt_info *pkt, int becn)
386{
387 __bth_set_becn(pkt->hdr + pkt->offset, becn);
388}
389
390static inline u8 bth_resv6a(struct rxe_pkt_info *pkt)
391{
392 return __bth_resv6a(pkt->hdr + pkt->offset);
393}
394
395static inline void bth_set_resv6a(struct rxe_pkt_info *pkt)
396{
397 __bth_set_resv6a(pkt->hdr + pkt->offset);
398}
399
400static inline int bth_ack(struct rxe_pkt_info *pkt)
401{
402 return __bth_ack(pkt->hdr + pkt->offset);
403}
404
405static inline void bth_set_ack(struct rxe_pkt_info *pkt, int ack)
406{
407 __bth_set_ack(pkt->hdr + pkt->offset, ack);
408}
409
410static inline void bth_set_resv7(struct rxe_pkt_info *pkt)
411{
412 __bth_set_resv7(pkt->hdr + pkt->offset);
413}
414
415static inline u32 bth_psn(struct rxe_pkt_info *pkt)
416{
417 return __bth_psn(pkt->hdr + pkt->offset);
418}
419
420static inline void bth_set_psn(struct rxe_pkt_info *pkt, u32 psn)
421{
422 __bth_set_psn(pkt->hdr + pkt->offset, psn);
423}
424
425static inline void bth_init(struct rxe_pkt_info *pkt, u8 opcode, int se,
426 int mig, int pad, u16 pkey, u32 qpn, int ack_req,
427 u32 psn)
428{
429 struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr + pkt->offset);
430
431 bth->opcode = opcode;
432 bth->flags = (pad << 4) & BTH_PAD_MASK;
433 if (se)
434 bth->flags |= BTH_SE_MASK;
435 if (mig)
436 bth->flags |= BTH_MIG_MASK;
437 bth->pkey = cpu_to_be16(pkey);
438 bth->qpn = cpu_to_be32(qpn & BTH_QPN_MASK);
439 psn &= BTH_PSN_MASK;
440 if (ack_req)
441 psn |= BTH_ACK_MASK;
442 bth->apsn = cpu_to_be32(psn);
443}
444
445/******************************************************************************
446 * Reliable Datagram Extended Transport Header
447 ******************************************************************************/
448struct rxe_rdeth {
449 __be32 een;
450};
451
452#define RDETH_EEN_MASK (0x00ffffff)
453
454static inline u8 __rdeth_een(void *arg)
455{
456 struct rxe_rdeth *rdeth = arg;
457
458 return RDETH_EEN_MASK & be32_to_cpu(rdeth->een);
459}
460
461static inline void __rdeth_set_een(void *arg, u32 een)
462{
463 struct rxe_rdeth *rdeth = arg;
464
465 rdeth->een = cpu_to_be32(RDETH_EEN_MASK & een);
466}
467
468static inline u8 rdeth_een(struct rxe_pkt_info *pkt)
469{
470 return __rdeth_een(pkt->hdr + pkt->offset
471 + rxe_opcode[pkt->opcode].offset[RXE_RDETH]);
472}
473
474static inline void rdeth_set_een(struct rxe_pkt_info *pkt, u32 een)
475{
476 __rdeth_set_een(pkt->hdr + pkt->offset
477 + rxe_opcode[pkt->opcode].offset[RXE_RDETH], een);
478}
479
480/******************************************************************************
481 * Datagram Extended Transport Header
482 ******************************************************************************/
483struct rxe_deth {
484 __be32 qkey;
485 __be32 sqp;
486};
487
488#define GSI_QKEY (0x80010000)
489#define DETH_SQP_MASK (0x00ffffff)
490
491static inline u32 __deth_qkey(void *arg)
492{
493 struct rxe_deth *deth = arg;
494
495 return be32_to_cpu(deth->qkey);
496}
497
498static inline void __deth_set_qkey(void *arg, u32 qkey)
499{
500 struct rxe_deth *deth = arg;
501
502 deth->qkey = cpu_to_be32(qkey);
503}
504
505static inline u32 __deth_sqp(void *arg)
506{
507 struct rxe_deth *deth = arg;
508
509 return DETH_SQP_MASK & be32_to_cpu(deth->sqp);
510}
511
512static inline void __deth_set_sqp(void *arg, u32 sqp)
513{
514 struct rxe_deth *deth = arg;
515
516 deth->sqp = cpu_to_be32(DETH_SQP_MASK & sqp);
517}
518
519static inline u32 deth_qkey(struct rxe_pkt_info *pkt)
520{
521 return __deth_qkey(pkt->hdr + pkt->offset
522 + rxe_opcode[pkt->opcode].offset[RXE_DETH]);
523}
524
525static inline void deth_set_qkey(struct rxe_pkt_info *pkt, u32 qkey)
526{
527 __deth_set_qkey(pkt->hdr + pkt->offset
528 + rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey);
529}
530
531static inline u32 deth_sqp(struct rxe_pkt_info *pkt)
532{
533 return __deth_sqp(pkt->hdr + pkt->offset
534 + rxe_opcode[pkt->opcode].offset[RXE_DETH]);
535}
536
537static inline void deth_set_sqp(struct rxe_pkt_info *pkt, u32 sqp)
538{
539 __deth_set_sqp(pkt->hdr + pkt->offset
540 + rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp);
541}
542
543/******************************************************************************
544 * RDMA Extended Transport Header
545 ******************************************************************************/
546struct rxe_reth {
547 __be64 va;
548 __be32 rkey;
549 __be32 len;
550};
551
552static inline u64 __reth_va(void *arg)
553{
554 struct rxe_reth *reth = arg;
555
556 return be64_to_cpu(reth->va);
557}
558
559static inline void __reth_set_va(void *arg, u64 va)
560{
561 struct rxe_reth *reth = arg;
562
563 reth->va = cpu_to_be64(va);
564}
565
566static inline u32 __reth_rkey(void *arg)
567{
568 struct rxe_reth *reth = arg;
569
570 return be32_to_cpu(reth->rkey);
571}
572
573static inline void __reth_set_rkey(void *arg, u32 rkey)
574{
575 struct rxe_reth *reth = arg;
576
577 reth->rkey = cpu_to_be32(rkey);
578}
579
580static inline u32 __reth_len(void *arg)
581{
582 struct rxe_reth *reth = arg;
583
584 return be32_to_cpu(reth->len);
585}
586
587static inline void __reth_set_len(void *arg, u32 len)
588{
589 struct rxe_reth *reth = arg;
590
591 reth->len = cpu_to_be32(len);
592}
593
594static inline u64 reth_va(struct rxe_pkt_info *pkt)
595{
596 return __reth_va(pkt->hdr + pkt->offset
597 + rxe_opcode[pkt->opcode].offset[RXE_RETH]);
598}
599
600static inline void reth_set_va(struct rxe_pkt_info *pkt, u64 va)
601{
602 __reth_set_va(pkt->hdr + pkt->offset
603 + rxe_opcode[pkt->opcode].offset[RXE_RETH], va);
604}
605
606static inline u32 reth_rkey(struct rxe_pkt_info *pkt)
607{
608 return __reth_rkey(pkt->hdr + pkt->offset
609 + rxe_opcode[pkt->opcode].offset[RXE_RETH]);
610}
611
612static inline void reth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)
613{
614 __reth_set_rkey(pkt->hdr + pkt->offset
615 + rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey);
616}
617
618static inline u32 reth_len(struct rxe_pkt_info *pkt)
619{
620 return __reth_len(pkt->hdr + pkt->offset
621 + rxe_opcode[pkt->opcode].offset[RXE_RETH]);
622}
623
624static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len)
625{
626 __reth_set_len(pkt->hdr + pkt->offset
627 + rxe_opcode[pkt->opcode].offset[RXE_RETH], len);
628}
629
630/******************************************************************************
631 * Atomic Extended Transport Header
632 ******************************************************************************/
633struct rxe_atmeth {
634 __be64 va;
635 __be32 rkey;
636 __be64 swap_add;
637 __be64 comp;
638} __attribute__((__packed__));
639
640static inline u64 __atmeth_va(void *arg)
641{
642 struct rxe_atmeth *atmeth = arg;
643
644 return be64_to_cpu(atmeth->va);
645}
646
647static inline void __atmeth_set_va(void *arg, u64 va)
648{
649 struct rxe_atmeth *atmeth = arg;
650
651 atmeth->va = cpu_to_be64(va);
652}
653
654static inline u32 __atmeth_rkey(void *arg)
655{
656 struct rxe_atmeth *atmeth = arg;
657
658 return be32_to_cpu(atmeth->rkey);
659}
660
661static inline void __atmeth_set_rkey(void *arg, u32 rkey)
662{
663 struct rxe_atmeth *atmeth = arg;
664
665 atmeth->rkey = cpu_to_be32(rkey);
666}
667
668static inline u64 __atmeth_swap_add(void *arg)
669{
670 struct rxe_atmeth *atmeth = arg;
671
672 return be64_to_cpu(atmeth->swap_add);
673}
674
675static inline void __atmeth_set_swap_add(void *arg, u64 swap_add)
676{
677 struct rxe_atmeth *atmeth = arg;
678
679 atmeth->swap_add = cpu_to_be64(swap_add);
680}
681
682static inline u64 __atmeth_comp(void *arg)
683{
684 struct rxe_atmeth *atmeth = arg;
685
686 return be64_to_cpu(atmeth->comp);
687}
688
689static inline void __atmeth_set_comp(void *arg, u64 comp)
690{
691 struct rxe_atmeth *atmeth = arg;
692
693 atmeth->comp = cpu_to_be64(comp);
694}
695
696static inline u64 atmeth_va(struct rxe_pkt_info *pkt)
697{
698 return __atmeth_va(pkt->hdr + pkt->offset
699 + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
700}
701
702static inline void atmeth_set_va(struct rxe_pkt_info *pkt, u64 va)
703{
704 __atmeth_set_va(pkt->hdr + pkt->offset
705 + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va);
706}
707
708static inline u32 atmeth_rkey(struct rxe_pkt_info *pkt)
709{
710 return __atmeth_rkey(pkt->hdr + pkt->offset
711 + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
712}
713
714static inline void atmeth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)
715{
716 __atmeth_set_rkey(pkt->hdr + pkt->offset
717 + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey);
718}
719
720static inline u64 atmeth_swap_add(struct rxe_pkt_info *pkt)
721{
722 return __atmeth_swap_add(pkt->hdr + pkt->offset
723 + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
724}
725
726static inline void atmeth_set_swap_add(struct rxe_pkt_info *pkt, u64 swap_add)
727{
728 __atmeth_set_swap_add(pkt->hdr + pkt->offset
729 + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add);
730}
731
732static inline u64 atmeth_comp(struct rxe_pkt_info *pkt)
733{
734 return __atmeth_comp(pkt->hdr + pkt->offset
735 + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
736}
737
738static inline void atmeth_set_comp(struct rxe_pkt_info *pkt, u64 comp)
739{
740 __atmeth_set_comp(pkt->hdr + pkt->offset
741 + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp);
742}
743
744/******************************************************************************
745 * Ack Extended Transport Header
746 ******************************************************************************/
747struct rxe_aeth {
748 __be32 smsn;
749};
750
751#define AETH_SYN_MASK (0xff000000)
752#define AETH_MSN_MASK (0x00ffffff)
753
754enum aeth_syndrome {
755 AETH_TYPE_MASK = 0xe0,
756 AETH_ACK = 0x00,
757 AETH_RNR_NAK = 0x20,
758 AETH_RSVD = 0x40,
759 AETH_NAK = 0x60,
760 AETH_ACK_UNLIMITED = 0x1f,
761 AETH_NAK_PSN_SEQ_ERROR = 0x60,
762 AETH_NAK_INVALID_REQ = 0x61,
763 AETH_NAK_REM_ACC_ERR = 0x62,
764 AETH_NAK_REM_OP_ERR = 0x63,
765 AETH_NAK_INV_RD_REQ = 0x64,
766};
767
768static inline u8 __aeth_syn(void *arg)
769{
770 struct rxe_aeth *aeth = arg;
771
772 return (AETH_SYN_MASK & be32_to_cpu(aeth->smsn)) >> 24;
773}
774
775static inline void __aeth_set_syn(void *arg, u8 syn)
776{
777 struct rxe_aeth *aeth = arg;
778 u32 smsn = be32_to_cpu(aeth->smsn);
779
780 aeth->smsn = cpu_to_be32((AETH_SYN_MASK & (syn << 24)) |
781 (~AETH_SYN_MASK & smsn));
782}
783
784static inline u32 __aeth_msn(void *arg)
785{
786 struct rxe_aeth *aeth = arg;
787
788 return AETH_MSN_MASK & be32_to_cpu(aeth->smsn);
789}
790
791static inline void __aeth_set_msn(void *arg, u32 msn)
792{
793 struct rxe_aeth *aeth = arg;
794 u32 smsn = be32_to_cpu(aeth->smsn);
795
796 aeth->smsn = cpu_to_be32((AETH_MSN_MASK & msn) |
797 (~AETH_MSN_MASK & smsn));
798}
799
800static inline u8 aeth_syn(struct rxe_pkt_info *pkt)
801{
802 return __aeth_syn(pkt->hdr + pkt->offset
803 + rxe_opcode[pkt->opcode].offset[RXE_AETH]);
804}
805
806static inline void aeth_set_syn(struct rxe_pkt_info *pkt, u8 syn)
807{
808 __aeth_set_syn(pkt->hdr + pkt->offset
809 + rxe_opcode[pkt->opcode].offset[RXE_AETH], syn);
810}
811
812static inline u32 aeth_msn(struct rxe_pkt_info *pkt)
813{
814 return __aeth_msn(pkt->hdr + pkt->offset
815 + rxe_opcode[pkt->opcode].offset[RXE_AETH]);
816}
817
818static inline void aeth_set_msn(struct rxe_pkt_info *pkt, u32 msn)
819{
820 __aeth_set_msn(pkt->hdr + pkt->offset
821 + rxe_opcode[pkt->opcode].offset[RXE_AETH], msn);
822}
823
824/******************************************************************************
825 * Atomic Ack Extended Transport Header
826 ******************************************************************************/
827struct rxe_atmack {
828 __be64 orig;
829};
830
831static inline u64 __atmack_orig(void *arg)
832{
833 struct rxe_atmack *atmack = arg;
834
835 return be64_to_cpu(atmack->orig);
836}
837
838static inline void __atmack_set_orig(void *arg, u64 orig)
839{
840 struct rxe_atmack *atmack = arg;
841
842 atmack->orig = cpu_to_be64(orig);
843}
844
845static inline u64 atmack_orig(struct rxe_pkt_info *pkt)
846{
847 return __atmack_orig(pkt->hdr + pkt->offset
848 + rxe_opcode[pkt->opcode].offset[RXE_ATMACK]);
849}
850
851static inline void atmack_set_orig(struct rxe_pkt_info *pkt, u64 orig)
852{
853 __atmack_set_orig(pkt->hdr + pkt->offset
854 + rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig);
855}
856
857/******************************************************************************
858 * Immediate Extended Transport Header
859 ******************************************************************************/
860struct rxe_immdt {
861 __be32 imm;
862};
863
864static inline __be32 __immdt_imm(void *arg)
865{
866 struct rxe_immdt *immdt = arg;
867
868 return immdt->imm;
869}
870
871static inline void __immdt_set_imm(void *arg, __be32 imm)
872{
873 struct rxe_immdt *immdt = arg;
874
875 immdt->imm = imm;
876}
877
878static inline __be32 immdt_imm(struct rxe_pkt_info *pkt)
879{
880 return __immdt_imm(pkt->hdr + pkt->offset
881 + rxe_opcode[pkt->opcode].offset[RXE_IMMDT]);
882}
883
884static inline void immdt_set_imm(struct rxe_pkt_info *pkt, __be32 imm)
885{
886 __immdt_set_imm(pkt->hdr + pkt->offset
887 + rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm);
888}
889
890/******************************************************************************
891 * Invalidate Extended Transport Header
892 ******************************************************************************/
893struct rxe_ieth {
894 __be32 rkey;
895};
896
897static inline u32 __ieth_rkey(void *arg)
898{
899 struct rxe_ieth *ieth = arg;
900
901 return be32_to_cpu(ieth->rkey);
902}
903
904static inline void __ieth_set_rkey(void *arg, u32 rkey)
905{
906 struct rxe_ieth *ieth = arg;
907
908 ieth->rkey = cpu_to_be32(rkey);
909}
910
911static inline u32 ieth_rkey(struct rxe_pkt_info *pkt)
912{
913 return __ieth_rkey(pkt->hdr + pkt->offset
914 + rxe_opcode[pkt->opcode].offset[RXE_IETH]);
915}
916
917static inline void ieth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)
918{
919 __ieth_set_rkey(pkt->hdr + pkt->offset
920 + rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey);
921}
922
923enum rxe_hdr_length {
924 RXE_BTH_BYTES = sizeof(struct rxe_bth),
925 RXE_DETH_BYTES = sizeof(struct rxe_deth),
926 RXE_IMMDT_BYTES = sizeof(struct rxe_immdt),
927 RXE_RETH_BYTES = sizeof(struct rxe_reth),
928 RXE_AETH_BYTES = sizeof(struct rxe_aeth),
929 RXE_ATMACK_BYTES = sizeof(struct rxe_atmack),
930 RXE_ATMETH_BYTES = sizeof(struct rxe_atmeth),
931 RXE_IETH_BYTES = sizeof(struct rxe_ieth),
932 RXE_RDETH_BYTES = sizeof(struct rxe_rdeth),
933};
934
935static inline size_t header_size(struct rxe_pkt_info *pkt)
936{
937 return pkt->offset + rxe_opcode[pkt->opcode].length;
938}
939
940static inline void *payload_addr(struct rxe_pkt_info *pkt)
941{
942 return pkt->hdr + pkt->offset
943 + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD];
944}
945
946static inline size_t payload_size(struct rxe_pkt_info *pkt)
947{
948 return pkt->paylen - rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD]
949 - bth_pad(pkt) - RXE_ICRC_SIZE;
950}
951
952#endif /* RXE_HDR_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c
new file mode 100644
index 000000000000..413b56b23a06
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_icrc.c
@@ -0,0 +1,96 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include "rxe.h"
35#include "rxe_loc.h"
36
37/* Compute a partial ICRC for all the IB transport headers. */
38u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb)
39{
40 unsigned int bth_offset = 0;
41 struct iphdr *ip4h = NULL;
42 struct ipv6hdr *ip6h = NULL;
43 struct udphdr *udph;
44 struct rxe_bth *bth;
45 int crc;
46 int length;
47 int hdr_size = sizeof(struct udphdr) +
48 (skb->protocol == htons(ETH_P_IP) ?
49 sizeof(struct iphdr) : sizeof(struct ipv6hdr));
50 /* pseudo header buffer size is calculate using ipv6 header size since
51 * it is bigger than ipv4
52 */
53 u8 pshdr[sizeof(struct udphdr) +
54 sizeof(struct ipv6hdr) +
55 RXE_BTH_BYTES];
56
57 /* This seed is the result of computing a CRC with a seed of
58 * 0xfffffff and 8 bytes of 0xff representing a masked LRH.
59 */
60 crc = 0xdebb20e3;
61
62 if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */
63 memcpy(pshdr, ip_hdr(skb), hdr_size);
64 ip4h = (struct iphdr *)pshdr;
65 udph = (struct udphdr *)(ip4h + 1);
66
67 ip4h->ttl = 0xff;
68 ip4h->check = CSUM_MANGLED_0;
69 ip4h->tos = 0xff;
70 } else { /* IPv6 */
71 memcpy(pshdr, ipv6_hdr(skb), hdr_size);
72 ip6h = (struct ipv6hdr *)pshdr;
73 udph = (struct udphdr *)(ip6h + 1);
74
75 memset(ip6h->flow_lbl, 0xff, sizeof(ip6h->flow_lbl));
76 ip6h->priority = 0xf;
77 ip6h->hop_limit = 0xff;
78 }
79 udph->check = CSUM_MANGLED_0;
80
81 bth_offset += hdr_size;
82
83 memcpy(&pshdr[bth_offset], pkt->hdr, RXE_BTH_BYTES);
84 bth = (struct rxe_bth *)&pshdr[bth_offset];
85
86 /* exclude bth.resv8a */
87 bth->qpn |= cpu_to_be32(~BTH_QPN_MASK);
88
89 length = hdr_size + RXE_BTH_BYTES;
90 crc = crc32_le(crc, pshdr, length);
91
92 /* And finish to compute the CRC on the remainder of the headers. */
93 crc = crc32_le(crc, pkt->hdr + RXE_BTH_BYTES,
94 rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES);
95 return crc;
96}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
new file mode 100644
index 000000000000..4a5484ef604f
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -0,0 +1,286 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef RXE_LOC_H
35#define RXE_LOC_H
36
37/* rxe_av.c */
38
39int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr);
40
41int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num,
42 struct rxe_av *av, struct ib_ah_attr *attr);
43
44int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av,
45 struct ib_ah_attr *attr);
46
47int rxe_av_fill_ip_info(struct rxe_dev *rxe,
48 struct rxe_av *av,
49 struct ib_ah_attr *attr,
50 struct ib_gid_attr *sgid_attr,
51 union ib_gid *sgid);
52
53struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt);
54
55/* rxe_cq.c */
56int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
57 int cqe, int comp_vector, struct ib_udata *udata);
58
59int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
60 int comp_vector, struct ib_ucontext *context,
61 struct ib_udata *udata);
62
63int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, struct ib_udata *udata);
64
65int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);
66
67void rxe_cq_cleanup(void *arg);
68
69/* rxe_mcast.c */
70int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
71 struct rxe_mc_grp **grp_p);
72
73int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
74 struct rxe_mc_grp *grp);
75
76int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
77 union ib_gid *mgid);
78
79void rxe_drop_all_mcast_groups(struct rxe_qp *qp);
80
81void rxe_mc_cleanup(void *arg);
82
83/* rxe_mmap.c */
84struct rxe_mmap_info {
85 struct list_head pending_mmaps;
86 struct ib_ucontext *context;
87 struct kref ref;
88 void *obj;
89
90 struct mminfo info;
91};
92
93void rxe_mmap_release(struct kref *ref);
94
95struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *dev,
96 u32 size,
97 struct ib_ucontext *context,
98 void *obj);
99
100int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
101
102/* rxe_mr.c */
103enum copy_direction {
104 to_mem_obj,
105 from_mem_obj,
106};
107
108int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd,
109 int access, struct rxe_mem *mem);
110
111int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
112 u64 length, u64 iova, int access, struct ib_udata *udata,
113 struct rxe_mem *mr);
114
115int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd,
116 int max_pages, struct rxe_mem *mem);
117
118int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr,
119 int length, enum copy_direction dir, u32 *crcp);
120
121int copy_data(struct rxe_dev *rxe, struct rxe_pd *pd, int access,
122 struct rxe_dma_info *dma, void *addr, int length,
123 enum copy_direction dir, u32 *crcp);
124
125void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length);
126
127enum lookup_type {
128 lookup_local,
129 lookup_remote,
130};
131
132struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
133 enum lookup_type type);
134
135int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length);
136
137int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
138 u64 *page, int num_pages, u64 iova);
139
140void rxe_mem_cleanup(void *arg);
141
142int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
143
144/* rxe_qp.c */
145int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init);
146
147int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
148 struct ib_qp_init_attr *init, struct ib_udata *udata,
149 struct ib_pd *ibpd);
150
151int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init);
152
153int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
154 struct ib_qp_attr *attr, int mask);
155
156int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr,
157 int mask, struct ib_udata *udata);
158
159int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask);
160
161void rxe_qp_error(struct rxe_qp *qp);
162
163void rxe_qp_destroy(struct rxe_qp *qp);
164
165void rxe_qp_cleanup(void *arg);
166
167static inline int qp_num(struct rxe_qp *qp)
168{
169 return qp->ibqp.qp_num;
170}
171
172static inline enum ib_qp_type qp_type(struct rxe_qp *qp)
173{
174 return qp->ibqp.qp_type;
175}
176
177static inline enum ib_qp_state qp_state(struct rxe_qp *qp)
178{
179 return qp->attr.qp_state;
180}
181
182static inline int qp_mtu(struct rxe_qp *qp)
183{
184 if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
185 return qp->attr.path_mtu;
186 else
187 return RXE_PORT_MAX_MTU;
188}
189
190static inline int rcv_wqe_size(int max_sge)
191{
192 return sizeof(struct rxe_recv_wqe) +
193 max_sge * sizeof(struct ib_sge);
194}
195
196void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res);
197
198static inline void rxe_advance_resp_resource(struct rxe_qp *qp)
199{
200 qp->resp.res_head++;
201 if (unlikely(qp->resp.res_head == qp->attr.max_rd_atomic))
202 qp->resp.res_head = 0;
203}
204
205void retransmit_timer(unsigned long data);
206void rnr_nak_timer(unsigned long data);
207
208void dump_qp(struct rxe_qp *qp);
209
210/* rxe_srq.c */
211#define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT)
212
213int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
214 struct ib_srq_attr *attr, enum ib_srq_attr_mask mask);
215
216int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
217 struct ib_srq_init_attr *init,
218 struct ib_ucontext *context, struct ib_udata *udata);
219
220int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
221 struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
222 struct ib_udata *udata);
223
224extern struct ib_dma_mapping_ops rxe_dma_mapping_ops;
225
226void rxe_release(struct kref *kref);
227
228int rxe_completer(void *arg);
229int rxe_requester(void *arg);
230int rxe_responder(void *arg);
231
232u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb);
233
234void rxe_resp_queue_pkt(struct rxe_dev *rxe,
235 struct rxe_qp *qp, struct sk_buff *skb);
236
237void rxe_comp_queue_pkt(struct rxe_dev *rxe,
238 struct rxe_qp *qp, struct sk_buff *skb);
239
240static inline unsigned wr_opcode_mask(int opcode, struct rxe_qp *qp)
241{
242 return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type];
243}
244
245static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
246 struct rxe_pkt_info *pkt, struct sk_buff *skb)
247{
248 int err;
249 int is_request = pkt->mask & RXE_REQ_MASK;
250
251 if ((is_request && (qp->req.state != QP_STATE_READY)) ||
252 (!is_request && (qp->resp.state != QP_STATE_READY))) {
253 pr_info("Packet dropped. QP is not in ready state\n");
254 goto drop;
255 }
256
257 if (pkt->mask & RXE_LOOPBACK_MASK) {
258 memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));
259 err = rxe->ifc_ops->loopback(skb);
260 } else {
261 err = rxe->ifc_ops->send(rxe, pkt, skb);
262 }
263
264 if (err) {
265 rxe->xmit_errors++;
266 return err;
267 }
268
269 atomic_inc(&qp->skb_out);
270
271 if ((qp_type(qp) != IB_QPT_RC) &&
272 (pkt->mask & RXE_END_MASK)) {
273 pkt->wqe->state = wqe_state_done;
274 rxe_run_task(&qp->comp.task, 1);
275 }
276
277 goto done;
278
279drop:
280 kfree_skb(skb);
281 err = 0;
282done:
283 return err;
284}
285
286#endif /* RXE_LOC_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
new file mode 100644
index 000000000000..fa95544ca7e0
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -0,0 +1,190 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include "rxe.h"
35#include "rxe_loc.h"
36
37int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
38 struct rxe_mc_grp **grp_p)
39{
40 int err;
41 struct rxe_mc_grp *grp;
42
43 if (rxe->attr.max_mcast_qp_attach == 0) {
44 err = -EINVAL;
45 goto err1;
46 }
47
48 grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);
49 if (grp)
50 goto done;
51
52 grp = rxe_alloc(&rxe->mc_grp_pool);
53 if (!grp) {
54 err = -ENOMEM;
55 goto err1;
56 }
57
58 INIT_LIST_HEAD(&grp->qp_list);
59 spin_lock_init(&grp->mcg_lock);
60 grp->rxe = rxe;
61
62 rxe_add_key(grp, mgid);
63
64 err = rxe->ifc_ops->mcast_add(rxe, mgid);
65 if (err)
66 goto err2;
67
68done:
69 *grp_p = grp;
70 return 0;
71
72err2:
73 rxe_drop_ref(grp);
74err1:
75 return err;
76}
77
78int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
79 struct rxe_mc_grp *grp)
80{
81 int err;
82 struct rxe_mc_elem *elem;
83
84 /* check to see of the qp is already a member of the group */
85 spin_lock_bh(&qp->grp_lock);
86 spin_lock_bh(&grp->mcg_lock);
87 list_for_each_entry(elem, &grp->qp_list, qp_list) {
88 if (elem->qp == qp) {
89 err = 0;
90 goto out;
91 }
92 }
93
94 if (grp->num_qp >= rxe->attr.max_mcast_qp_attach) {
95 err = -ENOMEM;
96 goto out;
97 }
98
99 elem = rxe_alloc(&rxe->mc_elem_pool);
100 if (!elem) {
101 err = -ENOMEM;
102 goto out;
103 }
104
105 /* each qp holds a ref on the grp */
106 rxe_add_ref(grp);
107
108 grp->num_qp++;
109 elem->qp = qp;
110 elem->grp = grp;
111
112 list_add(&elem->qp_list, &grp->qp_list);
113 list_add(&elem->grp_list, &qp->grp_list);
114
115 err = 0;
116out:
117 spin_unlock_bh(&grp->mcg_lock);
118 spin_unlock_bh(&qp->grp_lock);
119 return err;
120}
121
122int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
123 union ib_gid *mgid)
124{
125 struct rxe_mc_grp *grp;
126 struct rxe_mc_elem *elem, *tmp;
127
128 grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);
129 if (!grp)
130 goto err1;
131
132 spin_lock_bh(&qp->grp_lock);
133 spin_lock_bh(&grp->mcg_lock);
134
135 list_for_each_entry_safe(elem, tmp, &grp->qp_list, qp_list) {
136 if (elem->qp == qp) {
137 list_del(&elem->qp_list);
138 list_del(&elem->grp_list);
139 grp->num_qp--;
140
141 spin_unlock_bh(&grp->mcg_lock);
142 spin_unlock_bh(&qp->grp_lock);
143 rxe_drop_ref(elem);
144 rxe_drop_ref(grp); /* ref held by QP */
145 rxe_drop_ref(grp); /* ref from get_key */
146 return 0;
147 }
148 }
149
150 spin_unlock_bh(&grp->mcg_lock);
151 spin_unlock_bh(&qp->grp_lock);
152 rxe_drop_ref(grp); /* ref from get_key */
153err1:
154 return -EINVAL;
155}
156
157void rxe_drop_all_mcast_groups(struct rxe_qp *qp)
158{
159 struct rxe_mc_grp *grp;
160 struct rxe_mc_elem *elem;
161
162 while (1) {
163 spin_lock_bh(&qp->grp_lock);
164 if (list_empty(&qp->grp_list)) {
165 spin_unlock_bh(&qp->grp_lock);
166 break;
167 }
168 elem = list_first_entry(&qp->grp_list, struct rxe_mc_elem,
169 grp_list);
170 list_del(&elem->grp_list);
171 spin_unlock_bh(&qp->grp_lock);
172
173 grp = elem->grp;
174 spin_lock_bh(&grp->mcg_lock);
175 list_del(&elem->qp_list);
176 grp->num_qp--;
177 spin_unlock_bh(&grp->mcg_lock);
178 rxe_drop_ref(grp);
179 rxe_drop_ref(elem);
180 }
181}
182
183void rxe_mc_cleanup(void *arg)
184{
185 struct rxe_mc_grp *grp = arg;
186 struct rxe_dev *rxe = grp->rxe;
187
188 rxe_drop_key(grp);
189 rxe->ifc_ops->mcast_delete(rxe, &grp->mgid);
190}
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
new file mode 100644
index 000000000000..54b3c7c99eff
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -0,0 +1,173 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/module.h>
35#include <linux/vmalloc.h>
36#include <linux/mm.h>
37#include <linux/errno.h>
38#include <asm/pgtable.h>
39
40#include "rxe.h"
41#include "rxe_loc.h"
42#include "rxe_queue.h"
43
44void rxe_mmap_release(struct kref *ref)
45{
46 struct rxe_mmap_info *ip = container_of(ref,
47 struct rxe_mmap_info, ref);
48 struct rxe_dev *rxe = to_rdev(ip->context->device);
49
50 spin_lock_bh(&rxe->pending_lock);
51
52 if (!list_empty(&ip->pending_mmaps))
53 list_del(&ip->pending_mmaps);
54
55 spin_unlock_bh(&rxe->pending_lock);
56
57 vfree(ip->obj); /* buf */
58 kfree(ip);
59}
60
61/*
62 * open and close keep track of how many times the memory region is mapped,
63 * to avoid releasing it.
64 */
65static void rxe_vma_open(struct vm_area_struct *vma)
66{
67 struct rxe_mmap_info *ip = vma->vm_private_data;
68
69 kref_get(&ip->ref);
70}
71
72static void rxe_vma_close(struct vm_area_struct *vma)
73{
74 struct rxe_mmap_info *ip = vma->vm_private_data;
75
76 kref_put(&ip->ref, rxe_mmap_release);
77}
78
79static struct vm_operations_struct rxe_vm_ops = {
80 .open = rxe_vma_open,
81 .close = rxe_vma_close,
82};
83
84/**
85 * rxe_mmap - create a new mmap region
86 * @context: the IB user context of the process making the mmap() call
87 * @vma: the VMA to be initialized
88 * Return zero if the mmap is OK. Otherwise, return an errno.
89 */
90int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
91{
92 struct rxe_dev *rxe = to_rdev(context->device);
93 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
94 unsigned long size = vma->vm_end - vma->vm_start;
95 struct rxe_mmap_info *ip, *pp;
96 int ret;
97
98 /*
99 * Search the device's list of objects waiting for a mmap call.
100 * Normally, this list is very short since a call to create a
101 * CQ, QP, or SRQ is soon followed by a call to mmap().
102 */
103 spin_lock_bh(&rxe->pending_lock);
104 list_for_each_entry_safe(ip, pp, &rxe->pending_mmaps, pending_mmaps) {
105 if (context != ip->context || (__u64)offset != ip->info.offset)
106 continue;
107
108 /* Don't allow a mmap larger than the object. */
109 if (size > ip->info.size) {
110 pr_err("mmap region is larger than the object!\n");
111 spin_unlock_bh(&rxe->pending_lock);
112 ret = -EINVAL;
113 goto done;
114 }
115
116 goto found_it;
117 }
118 pr_warn("unable to find pending mmap info\n");
119 spin_unlock_bh(&rxe->pending_lock);
120 ret = -EINVAL;
121 goto done;
122
123found_it:
124 list_del_init(&ip->pending_mmaps);
125 spin_unlock_bh(&rxe->pending_lock);
126
127 ret = remap_vmalloc_range(vma, ip->obj, 0);
128 if (ret) {
129 pr_err("rxe: err %d from remap_vmalloc_range\n", ret);
130 goto done;
131 }
132
133 vma->vm_ops = &rxe_vm_ops;
134 vma->vm_private_data = ip;
135 rxe_vma_open(vma);
136done:
137 return ret;
138}
139
140/*
141 * Allocate information for rxe_mmap
142 */
143struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe,
144 u32 size,
145 struct ib_ucontext *context,
146 void *obj)
147{
148 struct rxe_mmap_info *ip;
149
150 ip = kmalloc(sizeof(*ip), GFP_KERNEL);
151 if (!ip)
152 return NULL;
153
154 size = PAGE_ALIGN(size);
155
156 spin_lock_bh(&rxe->mmap_offset_lock);
157
158 if (rxe->mmap_offset == 0)
159 rxe->mmap_offset = PAGE_SIZE;
160
161 ip->info.offset = rxe->mmap_offset;
162 rxe->mmap_offset += size;
163
164 spin_unlock_bh(&rxe->mmap_offset_lock);
165
166 INIT_LIST_HEAD(&ip->pending_mmaps);
167 ip->info.size = size;
168 ip->context = context;
169 ip->obj = obj;
170 kref_init(&ip->ref);
171
172 return ip;
173}
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
new file mode 100644
index 000000000000..f3dab6574504
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -0,0 +1,643 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include "rxe.h"
35#include "rxe_loc.h"
36
37/*
38 * lfsr (linear feedback shift register) with period 255
39 */
40static u8 rxe_get_key(void)
41{
42 static unsigned key = 1;
43
44 key = key << 1;
45
46 key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10))
47 ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40));
48
49 key &= 0xff;
50
51 return key;
52}
53
54int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length)
55{
56 switch (mem->type) {
57 case RXE_MEM_TYPE_DMA:
58 return 0;
59
60 case RXE_MEM_TYPE_MR:
61 case RXE_MEM_TYPE_FMR:
62 return ((iova < mem->iova) ||
63 ((iova + length) > (mem->iova + mem->length))) ?
64 -EFAULT : 0;
65
66 default:
67 return -EFAULT;
68 }
69}
70
71#define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \
72 | IB_ACCESS_REMOTE_WRITE \
73 | IB_ACCESS_REMOTE_ATOMIC)
74
75static void rxe_mem_init(int access, struct rxe_mem *mem)
76{
77 u32 lkey = mem->pelem.index << 8 | rxe_get_key();
78 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
79
80 if (mem->pelem.pool->type == RXE_TYPE_MR) {
81 mem->ibmr.lkey = lkey;
82 mem->ibmr.rkey = rkey;
83 }
84
85 mem->lkey = lkey;
86 mem->rkey = rkey;
87 mem->state = RXE_MEM_STATE_INVALID;
88 mem->type = RXE_MEM_TYPE_NONE;
89 mem->map_shift = ilog2(RXE_BUF_PER_MAP);
90}
91
92void rxe_mem_cleanup(void *arg)
93{
94 struct rxe_mem *mem = arg;
95 int i;
96
97 if (mem->umem)
98 ib_umem_release(mem->umem);
99
100 if (mem->map) {
101 for (i = 0; i < mem->num_map; i++)
102 kfree(mem->map[i]);
103
104 kfree(mem->map);
105 }
106}
107
108static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf)
109{
110 int i;
111 int num_map;
112 struct rxe_map **map = mem->map;
113
114 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
115
116 mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
117 if (!mem->map)
118 goto err1;
119
120 for (i = 0; i < num_map; i++) {
121 mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
122 if (!mem->map[i])
123 goto err2;
124 }
125
126 WARN_ON(!is_power_of_2(RXE_BUF_PER_MAP));
127
128 mem->map_shift = ilog2(RXE_BUF_PER_MAP);
129 mem->map_mask = RXE_BUF_PER_MAP - 1;
130
131 mem->num_buf = num_buf;
132 mem->num_map = num_map;
133 mem->max_buf = num_map * RXE_BUF_PER_MAP;
134
135 return 0;
136
137err2:
138 for (i--; i >= 0; i--)
139 kfree(mem->map[i]);
140
141 kfree(mem->map);
142err1:
143 return -ENOMEM;
144}
145
146int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd,
147 int access, struct rxe_mem *mem)
148{
149 rxe_mem_init(access, mem);
150
151 mem->pd = pd;
152 mem->access = access;
153 mem->state = RXE_MEM_STATE_VALID;
154 mem->type = RXE_MEM_TYPE_DMA;
155
156 return 0;
157}
158
159int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
160 u64 length, u64 iova, int access, struct ib_udata *udata,
161 struct rxe_mem *mem)
162{
163 int entry;
164 struct rxe_map **map;
165 struct rxe_phys_buf *buf = NULL;
166 struct ib_umem *umem;
167 struct scatterlist *sg;
168 int num_buf;
169 void *vaddr;
170 int err;
171
172 umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0);
173 if (IS_ERR(umem)) {
174 pr_warn("err %d from rxe_umem_get\n",
175 (int)PTR_ERR(umem));
176 err = -EINVAL;
177 goto err1;
178 }
179
180 mem->umem = umem;
181 num_buf = umem->nmap;
182
183 rxe_mem_init(access, mem);
184
185 err = rxe_mem_alloc(rxe, mem, num_buf);
186 if (err) {
187 pr_warn("err %d from rxe_mem_alloc\n", err);
188 ib_umem_release(umem);
189 goto err1;
190 }
191
192 WARN_ON(!is_power_of_2(umem->page_size));
193
194 mem->page_shift = ilog2(umem->page_size);
195 mem->page_mask = umem->page_size - 1;
196
197 num_buf = 0;
198 map = mem->map;
199 if (length > 0) {
200 buf = map[0]->buf;
201
202 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
203 vaddr = page_address(sg_page(sg));
204 if (!vaddr) {
205 pr_warn("null vaddr\n");
206 err = -ENOMEM;
207 goto err1;
208 }
209
210 buf->addr = (uintptr_t)vaddr;
211 buf->size = umem->page_size;
212 num_buf++;
213 buf++;
214
215 if (num_buf >= RXE_BUF_PER_MAP) {
216 map++;
217 buf = map[0]->buf;
218 num_buf = 0;
219 }
220 }
221 }
222
223 mem->pd = pd;
224 mem->umem = umem;
225 mem->access = access;
226 mem->length = length;
227 mem->iova = iova;
228 mem->va = start;
229 mem->offset = ib_umem_offset(umem);
230 mem->state = RXE_MEM_STATE_VALID;
231 mem->type = RXE_MEM_TYPE_MR;
232
233 return 0;
234
235err1:
236 return err;
237}
238
239int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd,
240 int max_pages, struct rxe_mem *mem)
241{
242 int err;
243
244 rxe_mem_init(0, mem);
245
246 /* In fastreg, we also set the rkey */
247 mem->ibmr.rkey = mem->ibmr.lkey;
248
249 err = rxe_mem_alloc(rxe, mem, max_pages);
250 if (err)
251 goto err1;
252
253 mem->pd = pd;
254 mem->max_buf = max_pages;
255 mem->state = RXE_MEM_STATE_FREE;
256 mem->type = RXE_MEM_TYPE_MR;
257
258 return 0;
259
260err1:
261 return err;
262}
263
264static void lookup_iova(
265 struct rxe_mem *mem,
266 u64 iova,
267 int *m_out,
268 int *n_out,
269 size_t *offset_out)
270{
271 size_t offset = iova - mem->iova + mem->offset;
272 int map_index;
273 int buf_index;
274 u64 length;
275
276 if (likely(mem->page_shift)) {
277 *offset_out = offset & mem->page_mask;
278 offset >>= mem->page_shift;
279 *n_out = offset & mem->map_mask;
280 *m_out = offset >> mem->map_shift;
281 } else {
282 map_index = 0;
283 buf_index = 0;
284
285 length = mem->map[map_index]->buf[buf_index].size;
286
287 while (offset >= length) {
288 offset -= length;
289 buf_index++;
290
291 if (buf_index == RXE_BUF_PER_MAP) {
292 map_index++;
293 buf_index = 0;
294 }
295 length = mem->map[map_index]->buf[buf_index].size;
296 }
297
298 *m_out = map_index;
299 *n_out = buf_index;
300 *offset_out = offset;
301 }
302}
303
304void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length)
305{
306 size_t offset;
307 int m, n;
308 void *addr;
309
310 if (mem->state != RXE_MEM_STATE_VALID) {
311 pr_warn("mem not in valid state\n");
312 addr = NULL;
313 goto out;
314 }
315
316 if (!mem->map) {
317 addr = (void *)(uintptr_t)iova;
318 goto out;
319 }
320
321 if (mem_check_range(mem, iova, length)) {
322 pr_warn("range violation\n");
323 addr = NULL;
324 goto out;
325 }
326
327 lookup_iova(mem, iova, &m, &n, &offset);
328
329 if (offset + length > mem->map[m]->buf[n].size) {
330 pr_warn("crosses page boundary\n");
331 addr = NULL;
332 goto out;
333 }
334
335 addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset;
336
337out:
338 return addr;
339}
340
341/* copy data from a range (vaddr, vaddr+length-1) to or from
342 * a mem object starting at iova. Compute incremental value of
343 * crc32 if crcp is not zero. caller must hold a reference to mem
344 */
345int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
346 enum copy_direction dir, u32 *crcp)
347{
348 int err;
349 int bytes;
350 u8 *va;
351 struct rxe_map **map;
352 struct rxe_phys_buf *buf;
353 int m;
354 int i;
355 size_t offset;
356 u32 crc = crcp ? (*crcp) : 0;
357
358 if (mem->type == RXE_MEM_TYPE_DMA) {
359 u8 *src, *dest;
360
361 src = (dir == to_mem_obj) ?
362 addr : ((void *)(uintptr_t)iova);
363
364 dest = (dir == to_mem_obj) ?
365 ((void *)(uintptr_t)iova) : addr;
366
367 if (crcp)
368 *crcp = crc32_le(*crcp, src, length);
369
370 memcpy(dest, src, length);
371
372 return 0;
373 }
374
375 WARN_ON(!mem->map);
376
377 err = mem_check_range(mem, iova, length);
378 if (err) {
379 err = -EFAULT;
380 goto err1;
381 }
382
383 lookup_iova(mem, iova, &m, &i, &offset);
384
385 map = mem->map + m;
386 buf = map[0]->buf + i;
387
388 while (length > 0) {
389 u8 *src, *dest;
390
391 va = (u8 *)(uintptr_t)buf->addr + offset;
392 src = (dir == to_mem_obj) ? addr : va;
393 dest = (dir == to_mem_obj) ? va : addr;
394
395 bytes = buf->size - offset;
396
397 if (bytes > length)
398 bytes = length;
399
400 if (crcp)
401 crc = crc32_le(crc, src, bytes);
402
403 memcpy(dest, src, bytes);
404
405 length -= bytes;
406 addr += bytes;
407
408 offset = 0;
409 buf++;
410 i++;
411
412 if (i == RXE_BUF_PER_MAP) {
413 i = 0;
414 map++;
415 buf = map[0]->buf;
416 }
417 }
418
419 if (crcp)
420 *crcp = crc;
421
422 return 0;
423
424err1:
425 return err;
426}
427
428/* copy data in or out of a wqe, i.e. sg list
429 * under the control of a dma descriptor
430 */
431int copy_data(
432 struct rxe_dev *rxe,
433 struct rxe_pd *pd,
434 int access,
435 struct rxe_dma_info *dma,
436 void *addr,
437 int length,
438 enum copy_direction dir,
439 u32 *crcp)
440{
441 int bytes;
442 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
443 int offset = dma->sge_offset;
444 int resid = dma->resid;
445 struct rxe_mem *mem = NULL;
446 u64 iova;
447 int err;
448
449 if (length == 0)
450 return 0;
451
452 if (length > resid) {
453 err = -EINVAL;
454 goto err2;
455 }
456
457 if (sge->length && (offset < sge->length)) {
458 mem = lookup_mem(pd, access, sge->lkey, lookup_local);
459 if (!mem) {
460 err = -EINVAL;
461 goto err1;
462 }
463 }
464
465 while (length > 0) {
466 bytes = length;
467
468 if (offset >= sge->length) {
469 if (mem) {
470 rxe_drop_ref(mem);
471 mem = NULL;
472 }
473 sge++;
474 dma->cur_sge++;
475 offset = 0;
476
477 if (dma->cur_sge >= dma->num_sge) {
478 err = -ENOSPC;
479 goto err2;
480 }
481
482 if (sge->length) {
483 mem = lookup_mem(pd, access, sge->lkey,
484 lookup_local);
485 if (!mem) {
486 err = -EINVAL;
487 goto err1;
488 }
489 } else {
490 continue;
491 }
492 }
493
494 if (bytes > sge->length - offset)
495 bytes = sge->length - offset;
496
497 if (bytes > 0) {
498 iova = sge->addr + offset;
499
500 err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp);
501 if (err)
502 goto err2;
503
504 offset += bytes;
505 resid -= bytes;
506 length -= bytes;
507 addr += bytes;
508 }
509 }
510
511 dma->sge_offset = offset;
512 dma->resid = resid;
513
514 if (mem)
515 rxe_drop_ref(mem);
516
517 return 0;
518
519err2:
520 if (mem)
521 rxe_drop_ref(mem);
522err1:
523 return err;
524}
525
526int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
527{
528 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
529 int offset = dma->sge_offset;
530 int resid = dma->resid;
531
532 while (length) {
533 unsigned int bytes;
534
535 if (offset >= sge->length) {
536 sge++;
537 dma->cur_sge++;
538 offset = 0;
539 if (dma->cur_sge >= dma->num_sge)
540 return -ENOSPC;
541 }
542
543 bytes = length;
544
545 if (bytes > sge->length - offset)
546 bytes = sge->length - offset;
547
548 offset += bytes;
549 resid -= bytes;
550 length -= bytes;
551 }
552
553 dma->sge_offset = offset;
554 dma->resid = resid;
555
556 return 0;
557}
558
559/* (1) find the mem (mr or mw) corresponding to lkey/rkey
560 * depending on lookup_type
561 * (2) verify that the (qp) pd matches the mem pd
562 * (3) verify that the mem can support the requested access
563 * (4) verify that mem state is valid
564 */
565struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
566 enum lookup_type type)
567{
568 struct rxe_mem *mem;
569 struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
570 int index = key >> 8;
571
572 if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) {
573 mem = rxe_pool_get_index(&rxe->mr_pool, index);
574 if (!mem)
575 goto err1;
576 } else {
577 goto err1;
578 }
579
580 if ((type == lookup_local && mem->lkey != key) ||
581 (type == lookup_remote && mem->rkey != key))
582 goto err2;
583
584 if (mem->pd != pd)
585 goto err2;
586
587 if (access && !(access & mem->access))
588 goto err2;
589
590 if (mem->state != RXE_MEM_STATE_VALID)
591 goto err2;
592
593 return mem;
594
595err2:
596 rxe_drop_ref(mem);
597err1:
598 return NULL;
599}
600
601int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
602 u64 *page, int num_pages, u64 iova)
603{
604 int i;
605 int num_buf;
606 int err;
607 struct rxe_map **map;
608 struct rxe_phys_buf *buf;
609 int page_size;
610
611 if (num_pages > mem->max_buf) {
612 err = -EINVAL;
613 goto err1;
614 }
615
616 num_buf = 0;
617 page_size = 1 << mem->page_shift;
618 map = mem->map;
619 buf = map[0]->buf;
620
621 for (i = 0; i < num_pages; i++) {
622 buf->addr = *page++;
623 buf->size = page_size;
624 buf++;
625 num_buf++;
626
627 if (num_buf == RXE_BUF_PER_MAP) {
628 map++;
629 buf = map[0]->buf;
630 num_buf = 0;
631 }
632 }
633
634 mem->iova = iova;
635 mem->va = iova;
636 mem->length = num_pages << mem->page_shift;
637 mem->state = RXE_MEM_STATE_VALID;
638
639 return 0;
640
641err1:
642 return err;
643}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
new file mode 100644
index 000000000000..0b8d2ea8b41d
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -0,0 +1,708 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/skbuff.h>
35#include <linux/if_arp.h>
36#include <linux/netdevice.h>
37#include <linux/if.h>
38#include <linux/if_vlan.h>
39#include <net/udp_tunnel.h>
40#include <net/sch_generic.h>
41#include <linux/netfilter.h>
42#include <rdma/ib_addr.h>
43
44#include "rxe.h"
45#include "rxe_net.h"
46#include "rxe_loc.h"
47
48static LIST_HEAD(rxe_dev_list);
49static spinlock_t dev_list_lock; /* spinlock for device list */
50
51struct rxe_dev *net_to_rxe(struct net_device *ndev)
52{
53 struct rxe_dev *rxe;
54 struct rxe_dev *found = NULL;
55
56 spin_lock_bh(&dev_list_lock);
57 list_for_each_entry(rxe, &rxe_dev_list, list) {
58 if (rxe->ndev == ndev) {
59 found = rxe;
60 break;
61 }
62 }
63 spin_unlock_bh(&dev_list_lock);
64
65 return found;
66}
67
68struct rxe_dev *get_rxe_by_name(const char* name)
69{
70 struct rxe_dev *rxe;
71 struct rxe_dev *found = NULL;
72
73 spin_lock_bh(&dev_list_lock);
74 list_for_each_entry(rxe, &rxe_dev_list, list) {
75 if (!strcmp(name, rxe->ib_dev.name)) {
76 found = rxe;
77 break;
78 }
79 }
80 spin_unlock_bh(&dev_list_lock);
81 return found;
82}
83
84
85struct rxe_recv_sockets recv_sockets;
86
87static __be64 rxe_mac_to_eui64(struct net_device *ndev)
88{
89 unsigned char *mac_addr = ndev->dev_addr;
90 __be64 eui64;
91 unsigned char *dst = (unsigned char *)&eui64;
92
93 dst[0] = mac_addr[0] ^ 2;
94 dst[1] = mac_addr[1];
95 dst[2] = mac_addr[2];
96 dst[3] = 0xff;
97 dst[4] = 0xfe;
98 dst[5] = mac_addr[3];
99 dst[6] = mac_addr[4];
100 dst[7] = mac_addr[5];
101
102 return eui64;
103}
104
105static __be64 node_guid(struct rxe_dev *rxe)
106{
107 return rxe_mac_to_eui64(rxe->ndev);
108}
109
110static __be64 port_guid(struct rxe_dev *rxe)
111{
112 return rxe_mac_to_eui64(rxe->ndev);
113}
114
115static struct device *dma_device(struct rxe_dev *rxe)
116{
117 struct net_device *ndev;
118
119 ndev = rxe->ndev;
120
121 if (ndev->priv_flags & IFF_802_1Q_VLAN)
122 ndev = vlan_dev_real_dev(ndev);
123
124 return ndev->dev.parent;
125}
126
127static int mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
128{
129 int err;
130 unsigned char ll_addr[ETH_ALEN];
131
132 ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
133 err = dev_mc_add(rxe->ndev, ll_addr);
134
135 return err;
136}
137
138static int mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
139{
140 int err;
141 unsigned char ll_addr[ETH_ALEN];
142
143 ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
144 err = dev_mc_del(rxe->ndev, ll_addr);
145
146 return err;
147}
148
149static struct dst_entry *rxe_find_route4(struct net_device *ndev,
150 struct in_addr *saddr,
151 struct in_addr *daddr)
152{
153 struct rtable *rt;
154 struct flowi4 fl = { { 0 } };
155
156 memset(&fl, 0, sizeof(fl));
157 fl.flowi4_oif = ndev->ifindex;
158 memcpy(&fl.saddr, saddr, sizeof(*saddr));
159 memcpy(&fl.daddr, daddr, sizeof(*daddr));
160 fl.flowi4_proto = IPPROTO_UDP;
161
162 rt = ip_route_output_key(&init_net, &fl);
163 if (IS_ERR(rt)) {
164 pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr);
165 return NULL;
166 }
167
168 return &rt->dst;
169}
170
171#if IS_ENABLED(CONFIG_IPV6)
172static struct dst_entry *rxe_find_route6(struct net_device *ndev,
173 struct in6_addr *saddr,
174 struct in6_addr *daddr)
175{
176 struct dst_entry *ndst;
177 struct flowi6 fl6 = { { 0 } };
178
179 memset(&fl6, 0, sizeof(fl6));
180 fl6.flowi6_oif = ndev->ifindex;
181 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
182 memcpy(&fl6.daddr, daddr, sizeof(*daddr));
183 fl6.flowi6_proto = IPPROTO_UDP;
184
185 if (unlikely(ipv6_stub->ipv6_dst_lookup(sock_net(recv_sockets.sk6->sk),
186 recv_sockets.sk6->sk, &ndst, &fl6))) {
187 pr_err_ratelimited("no route to %pI6\n", daddr);
188 goto put;
189 }
190
191 if (unlikely(ndst->error)) {
192 pr_err("no route to %pI6\n", daddr);
193 goto put;
194 }
195
196 return ndst;
197put:
198 dst_release(ndst);
199 return NULL;
200}
201
202#else
203
204static struct dst_entry *rxe_find_route6(struct net_device *ndev,
205 struct in6_addr *saddr,
206 struct in6_addr *daddr)
207{
208 return NULL;
209}
210
211#endif
212
213static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
214{
215 struct udphdr *udph;
216 struct net_device *ndev = skb->dev;
217 struct rxe_dev *rxe = net_to_rxe(ndev);
218 struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
219
220 if (!rxe)
221 goto drop;
222
223 if (skb_linearize(skb)) {
224 pr_err("skb_linearize failed\n");
225 goto drop;
226 }
227
228 udph = udp_hdr(skb);
229 pkt->rxe = rxe;
230 pkt->port_num = 1;
231 pkt->hdr = (u8 *)(udph + 1);
232 pkt->mask = RXE_GRH_MASK;
233 pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph);
234
235 return rxe_rcv(skb);
236drop:
237 kfree_skb(skb);
238 return 0;
239}
240
241static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
242 bool ipv6)
243{
244 int err;
245 struct socket *sock;
246 struct udp_port_cfg udp_cfg;
247 struct udp_tunnel_sock_cfg tnl_cfg;
248
249 memset(&udp_cfg, 0, sizeof(udp_cfg));
250
251 if (ipv6) {
252 udp_cfg.family = AF_INET6;
253 udp_cfg.ipv6_v6only = 1;
254 } else {
255 udp_cfg.family = AF_INET;
256 }
257
258 udp_cfg.local_udp_port = port;
259
260 /* Create UDP socket */
261 err = udp_sock_create(net, &udp_cfg, &sock);
262 if (err < 0) {
263 pr_err("failed to create udp socket. err = %d\n", err);
264 return ERR_PTR(err);
265 }
266
267 tnl_cfg.sk_user_data = NULL;
268 tnl_cfg.encap_type = 1;
269 tnl_cfg.encap_rcv = rxe_udp_encap_recv;
270 tnl_cfg.encap_destroy = NULL;
271
272 /* Setup UDP tunnel */
273 setup_udp_tunnel_sock(net, sock, &tnl_cfg);
274
275 return sock;
276}
277
278static void rxe_release_udp_tunnel(struct socket *sk)
279{
280 udp_tunnel_sock_release(sk);
281}
282
283static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port,
284 __be16 dst_port)
285{
286 struct udphdr *udph;
287
288 __skb_push(skb, sizeof(*udph));
289 skb_reset_transport_header(skb);
290 udph = udp_hdr(skb);
291
292 udph->dest = dst_port;
293 udph->source = src_port;
294 udph->len = htons(skb->len);
295 udph->check = 0;
296}
297
298static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb,
299 __be32 saddr, __be32 daddr, __u8 proto,
300 __u8 tos, __u8 ttl, __be16 df, bool xnet)
301{
302 struct iphdr *iph;
303
304 skb_scrub_packet(skb, xnet);
305
306 skb_clear_hash(skb);
307 skb_dst_set(skb, dst);
308 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
309
310 skb_push(skb, sizeof(struct iphdr));
311 skb_reset_network_header(skb);
312
313 iph = ip_hdr(skb);
314
315 iph->version = IPVERSION;
316 iph->ihl = sizeof(struct iphdr) >> 2;
317 iph->frag_off = df;
318 iph->protocol = proto;
319 iph->tos = tos;
320 iph->daddr = daddr;
321 iph->saddr = saddr;
322 iph->ttl = ttl;
323 __ip_select_ident(dev_net(dst->dev), iph,
324 skb_shinfo(skb)->gso_segs ?: 1);
325 iph->tot_len = htons(skb->len);
326 ip_send_check(iph);
327}
328
329static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
330 struct in6_addr *saddr, struct in6_addr *daddr,
331 __u8 proto, __u8 prio, __u8 ttl)
332{
333 struct ipv6hdr *ip6h;
334
335 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
336 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
337 | IPSKB_REROUTED);
338 skb_dst_set(skb, dst);
339
340 __skb_push(skb, sizeof(*ip6h));
341 skb_reset_network_header(skb);
342 ip6h = ipv6_hdr(skb);
343 ip6_flow_hdr(ip6h, prio, htonl(0));
344 ip6h->payload_len = htons(skb->len);
345 ip6h->nexthdr = proto;
346 ip6h->hop_limit = ttl;
347 ip6h->daddr = *daddr;
348 ip6h->saddr = *saddr;
349 ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
350}
351
352static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av)
353{
354 struct dst_entry *dst;
355 bool xnet = false;
356 __be16 df = htons(IP_DF);
357 struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;
358 struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;
359 struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
360
361 dst = rxe_find_route4(rxe->ndev, saddr, daddr);
362 if (!dst) {
363 pr_err("Host not reachable\n");
364 return -EHOSTUNREACH;
365 }
366
367 if (!memcmp(saddr, daddr, sizeof(*daddr)))
368 pkt->mask |= RXE_LOOPBACK_MASK;
369
370 prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
371 htons(ROCE_V2_UDP_DPORT));
372
373 prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,
374 av->grh.traffic_class, av->grh.hop_limit, df, xnet);
375 return 0;
376}
377
378static int prepare6(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av)
379{
380 struct dst_entry *dst;
381 struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
382 struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
383 struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
384
385 dst = rxe_find_route6(rxe->ndev, saddr, daddr);
386 if (!dst) {
387 pr_err("Host not reachable\n");
388 return -EHOSTUNREACH;
389 }
390
391 if (!memcmp(saddr, daddr, sizeof(*daddr)))
392 pkt->mask |= RXE_LOOPBACK_MASK;
393
394 prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
395 htons(ROCE_V2_UDP_DPORT));
396
397 prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP,
398 av->grh.traffic_class,
399 av->grh.hop_limit);
400 return 0;
401}
402
403static int prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
404 struct sk_buff *skb, u32 *crc)
405{
406 int err = 0;
407 struct rxe_av *av = rxe_get_av(pkt);
408
409 if (av->network_type == RDMA_NETWORK_IPV4)
410 err = prepare4(rxe, skb, av);
411 else if (av->network_type == RDMA_NETWORK_IPV6)
412 err = prepare6(rxe, skb, av);
413
414 *crc = rxe_icrc_hdr(pkt, skb);
415
416 return err;
417}
418
419static void rxe_skb_tx_dtor(struct sk_buff *skb)
420{
421 struct sock *sk = skb->sk;
422 struct rxe_qp *qp = sk->sk_user_data;
423 int skb_out = atomic_dec_return(&qp->skb_out);
424
425 if (unlikely(qp->need_req_skb &&
426 skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
427 rxe_run_task(&qp->req.task, 1);
428}
429
430static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
431 struct sk_buff *skb)
432{
433 struct sk_buff *nskb;
434 struct rxe_av *av;
435 int err;
436
437 av = rxe_get_av(pkt);
438
439 nskb = skb_clone(skb, GFP_ATOMIC);
440 if (!nskb)
441 return -ENOMEM;
442
443 nskb->destructor = rxe_skb_tx_dtor;
444 nskb->sk = pkt->qp->sk->sk;
445
446 if (av->network_type == RDMA_NETWORK_IPV4) {
447 err = ip_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb);
448 } else if (av->network_type == RDMA_NETWORK_IPV6) {
449 err = ip6_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb);
450 } else {
451 pr_err("Unknown layer 3 protocol: %d\n", av->network_type);
452 kfree_skb(nskb);
453 return -EINVAL;
454 }
455
456 if (unlikely(net_xmit_eval(err))) {
457 pr_debug("error sending packet: %d\n", err);
458 return -EAGAIN;
459 }
460
461 kfree_skb(skb);
462
463 return 0;
464}
465
466static int loopback(struct sk_buff *skb)
467{
468 return rxe_rcv(skb);
469}
470
471static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)
472{
473 return rxe->port.port_guid == av->grh.dgid.global.interface_id;
474}
475
476static struct sk_buff *init_packet(struct rxe_dev *rxe, struct rxe_av *av,
477 int paylen, struct rxe_pkt_info *pkt)
478{
479 unsigned int hdr_len;
480 struct sk_buff *skb;
481
482 if (av->network_type == RDMA_NETWORK_IPV4)
483 hdr_len = ETH_HLEN + sizeof(struct udphdr) +
484 sizeof(struct iphdr);
485 else
486 hdr_len = ETH_HLEN + sizeof(struct udphdr) +
487 sizeof(struct ipv6hdr);
488
489 skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(rxe->ndev),
490 GFP_ATOMIC);
491 if (unlikely(!skb))
492 return NULL;
493
494 skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev));
495
496 skb->dev = rxe->ndev;
497 if (av->network_type == RDMA_NETWORK_IPV4)
498 skb->protocol = htons(ETH_P_IP);
499 else
500 skb->protocol = htons(ETH_P_IPV6);
501
502 pkt->rxe = rxe;
503 pkt->port_num = 1;
504 pkt->hdr = skb_put(skb, paylen);
505 pkt->mask |= RXE_GRH_MASK;
506
507 memset(pkt->hdr, 0, paylen);
508
509 return skb;
510}
511
512/*
513 * this is required by rxe_cfg to match rxe devices in
514 * /sys/class/infiniband up with their underlying ethernet devices
515 */
516static char *parent_name(struct rxe_dev *rxe, unsigned int port_num)
517{
518 return rxe->ndev->name;
519}
520
521static enum rdma_link_layer link_layer(struct rxe_dev *rxe,
522 unsigned int port_num)
523{
524 return IB_LINK_LAYER_ETHERNET;
525}
526
527static struct rxe_ifc_ops ifc_ops = {
528 .node_guid = node_guid,
529 .port_guid = port_guid,
530 .dma_device = dma_device,
531 .mcast_add = mcast_add,
532 .mcast_delete = mcast_delete,
533 .prepare = prepare,
534 .send = send,
535 .loopback = loopback,
536 .init_packet = init_packet,
537 .parent_name = parent_name,
538 .link_layer = link_layer,
539};
540
541struct rxe_dev *rxe_net_add(struct net_device *ndev)
542{
543 int err;
544 struct rxe_dev *rxe = NULL;
545
546 rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe));
547 if (!rxe)
548 return NULL;
549
550 rxe->ifc_ops = &ifc_ops;
551 rxe->ndev = ndev;
552
553 err = rxe_add(rxe, ndev->mtu);
554 if (err) {
555 ib_dealloc_device(&rxe->ib_dev);
556 return NULL;
557 }
558
559 spin_lock_bh(&dev_list_lock);
560 list_add_tail(&rxe_dev_list, &rxe->list);
561 spin_unlock_bh(&dev_list_lock);
562 return rxe;
563}
564
565void rxe_remove_all(void)
566{
567 spin_lock_bh(&dev_list_lock);
568 while (!list_empty(&rxe_dev_list)) {
569 struct rxe_dev *rxe =
570 list_first_entry(&rxe_dev_list, struct rxe_dev, list);
571
572 list_del(&rxe->list);
573 spin_unlock_bh(&dev_list_lock);
574 rxe_remove(rxe);
575 spin_lock_bh(&dev_list_lock);
576 }
577 spin_unlock_bh(&dev_list_lock);
578}
579EXPORT_SYMBOL(rxe_remove_all);
580
581static void rxe_port_event(struct rxe_dev *rxe,
582 enum ib_event_type event)
583{
584 struct ib_event ev;
585
586 ev.device = &rxe->ib_dev;
587 ev.element.port_num = 1;
588 ev.event = event;
589
590 ib_dispatch_event(&ev);
591}
592
593/* Caller must hold net_info_lock */
594void rxe_port_up(struct rxe_dev *rxe)
595{
596 struct rxe_port *port;
597
598 port = &rxe->port;
599 port->attr.state = IB_PORT_ACTIVE;
600 port->attr.phys_state = IB_PHYS_STATE_LINK_UP;
601
602 rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
603 pr_info("rxe: set %s active\n", rxe->ib_dev.name);
604 return;
605}
606
607/* Caller must hold net_info_lock */
608void rxe_port_down(struct rxe_dev *rxe)
609{
610 struct rxe_port *port;
611
612 port = &rxe->port;
613 port->attr.state = IB_PORT_DOWN;
614 port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN;
615
616 rxe_port_event(rxe, IB_EVENT_PORT_ERR);
617 pr_info("rxe: set %s down\n", rxe->ib_dev.name);
618 return;
619}
620
621static int rxe_notify(struct notifier_block *not_blk,
622 unsigned long event,
623 void *arg)
624{
625 struct net_device *ndev = netdev_notifier_info_to_dev(arg);
626 struct rxe_dev *rxe = net_to_rxe(ndev);
627
628 if (!rxe)
629 goto out;
630
631 switch (event) {
632 case NETDEV_UNREGISTER:
633 list_del(&rxe->list);
634 rxe_remove(rxe);
635 break;
636 case NETDEV_UP:
637 rxe_port_up(rxe);
638 break;
639 case NETDEV_DOWN:
640 rxe_port_down(rxe);
641 break;
642 case NETDEV_CHANGEMTU:
643 pr_info("rxe: %s changed mtu to %d\n", ndev->name, ndev->mtu);
644 rxe_set_mtu(rxe, ndev->mtu);
645 break;
646 case NETDEV_REBOOT:
647 case NETDEV_CHANGE:
648 case NETDEV_GOING_DOWN:
649 case NETDEV_CHANGEADDR:
650 case NETDEV_CHANGENAME:
651 case NETDEV_FEAT_CHANGE:
652 default:
653 pr_info("rxe: ignoring netdev event = %ld for %s\n",
654 event, ndev->name);
655 break;
656 }
657out:
658 return NOTIFY_OK;
659}
660
661static struct notifier_block rxe_net_notifier = {
662 .notifier_call = rxe_notify,
663};
664
665int rxe_net_init(void)
666{
667 int err;
668
669 spin_lock_init(&dev_list_lock);
670
671 recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
672 htons(ROCE_V2_UDP_DPORT), true);
673 if (IS_ERR(recv_sockets.sk6)) {
674 recv_sockets.sk6 = NULL;
675 pr_err("rxe: Failed to create IPv6 UDP tunnel\n");
676 return -1;
677 }
678
679 recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
680 htons(ROCE_V2_UDP_DPORT), false);
681 if (IS_ERR(recv_sockets.sk4)) {
682 rxe_release_udp_tunnel(recv_sockets.sk6);
683 recv_sockets.sk4 = NULL;
684 recv_sockets.sk6 = NULL;
685 pr_err("rxe: Failed to create IPv4 UDP tunnel\n");
686 return -1;
687 }
688
689 err = register_netdevice_notifier(&rxe_net_notifier);
690 if (err) {
691 rxe_release_udp_tunnel(recv_sockets.sk6);
692 rxe_release_udp_tunnel(recv_sockets.sk4);
693 pr_err("rxe: Failed to rigister netdev notifier\n");
694 }
695
696 return err;
697}
698
699void rxe_net_exit(void)
700{
701 if (recv_sockets.sk6)
702 rxe_release_udp_tunnel(recv_sockets.sk6);
703
704 if (recv_sockets.sk4)
705 rxe_release_udp_tunnel(recv_sockets.sk4);
706
707 unregister_netdevice_notifier(&rxe_net_notifier);
708}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h
new file mode 100644
index 000000000000..7b06f76d16cc
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_net.h
@@ -0,0 +1,53 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef RXE_NET_H
35#define RXE_NET_H
36
37#include <net/sock.h>
38#include <net/if_inet6.h>
39#include <linux/module.h>
40
41struct rxe_recv_sockets {
42 struct socket *sk4;
43 struct socket *sk6;
44};
45
46extern struct rxe_recv_sockets recv_sockets;
47
48struct rxe_dev *rxe_net_add(struct net_device *ndev);
49
50int rxe_net_init(void);
51void rxe_net_exit(void);
52
53#endif /* RXE_NET_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
new file mode 100644
index 000000000000..61927c165b59
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -0,0 +1,961 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <rdma/ib_pack.h>
35#include "rxe_opcode.h"
36#include "rxe_hdr.h"
37
38/* useful information about work request opcodes and pkt opcodes in
39 * table form
40 */
41struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
42 [IB_WR_RDMA_WRITE] = {
43 .name = "IB_WR_RDMA_WRITE",
44 .mask = {
45 [IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK,
46 [IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK,
47 },
48 },
49 [IB_WR_RDMA_WRITE_WITH_IMM] = {
50 .name = "IB_WR_RDMA_WRITE_WITH_IMM",
51 .mask = {
52 [IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK,
53 [IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK,
54 },
55 },
56 [IB_WR_SEND] = {
57 .name = "IB_WR_SEND",
58 .mask = {
59 [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK,
60 [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK,
61 [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
62 [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
63 [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK,
64 },
65 },
66 [IB_WR_SEND_WITH_IMM] = {
67 .name = "IB_WR_SEND_WITH_IMM",
68 .mask = {
69 [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK,
70 [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK,
71 [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
72 [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
73 [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK,
74 },
75 },
76 [IB_WR_RDMA_READ] = {
77 .name = "IB_WR_RDMA_READ",
78 .mask = {
79 [IB_QPT_RC] = WR_READ_MASK,
80 },
81 },
82 [IB_WR_ATOMIC_CMP_AND_SWP] = {
83 .name = "IB_WR_ATOMIC_CMP_AND_SWP",
84 .mask = {
85 [IB_QPT_RC] = WR_ATOMIC_MASK,
86 },
87 },
88 [IB_WR_ATOMIC_FETCH_AND_ADD] = {
89 .name = "IB_WR_ATOMIC_FETCH_AND_ADD",
90 .mask = {
91 [IB_QPT_RC] = WR_ATOMIC_MASK,
92 },
93 },
94 [IB_WR_LSO] = {
95 .name = "IB_WR_LSO",
96 .mask = {
97 /* not supported */
98 },
99 },
100 [IB_WR_SEND_WITH_INV] = {
101 .name = "IB_WR_SEND_WITH_INV",
102 .mask = {
103 [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
104 [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
105 [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK,
106 },
107 },
108 [IB_WR_RDMA_READ_WITH_INV] = {
109 .name = "IB_WR_RDMA_READ_WITH_INV",
110 .mask = {
111 [IB_QPT_RC] = WR_READ_MASK,
112 },
113 },
114 [IB_WR_LOCAL_INV] = {
115 .name = "IB_WR_LOCAL_INV",
116 .mask = {
117 [IB_QPT_RC] = WR_REG_MASK,
118 },
119 },
120 [IB_WR_REG_MR] = {
121 .name = "IB_WR_REG_MR",
122 .mask = {
123 [IB_QPT_RC] = WR_REG_MASK,
124 },
125 },
126};
127
128struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
129 [IB_OPCODE_RC_SEND_FIRST] = {
130 .name = "IB_OPCODE_RC_SEND_FIRST",
131 .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK
132 | RXE_SEND_MASK | RXE_START_MASK,
133 .length = RXE_BTH_BYTES,
134 .offset = {
135 [RXE_BTH] = 0,
136 [RXE_PAYLOAD] = RXE_BTH_BYTES,
137 }
138 },
139 [IB_OPCODE_RC_SEND_MIDDLE] = {
140 .name = "IB_OPCODE_RC_SEND_MIDDLE]",
141 .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK
142 | RXE_MIDDLE_MASK,
143 .length = RXE_BTH_BYTES,
144 .offset = {
145 [RXE_BTH] = 0,
146 [RXE_PAYLOAD] = RXE_BTH_BYTES,
147 }
148 },
149 [IB_OPCODE_RC_SEND_LAST] = {
150 .name = "IB_OPCODE_RC_SEND_LAST",
151 .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
152 | RXE_SEND_MASK | RXE_END_MASK,
153 .length = RXE_BTH_BYTES,
154 .offset = {
155 [RXE_BTH] = 0,
156 [RXE_PAYLOAD] = RXE_BTH_BYTES,
157 }
158 },
159 [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = {
160 .name = "IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE",
161 .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
162 | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
163 .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
164 .offset = {
165 [RXE_BTH] = 0,
166 [RXE_IMMDT] = RXE_BTH_BYTES,
167 [RXE_PAYLOAD] = RXE_BTH_BYTES
168 + RXE_IMMDT_BYTES,
169 }
170 },
171 [IB_OPCODE_RC_SEND_ONLY] = {
172 .name = "IB_OPCODE_RC_SEND_ONLY",
173 .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
174 | RXE_RWR_MASK | RXE_SEND_MASK
175 | RXE_START_MASK | RXE_END_MASK,
176 .length = RXE_BTH_BYTES,
177 .offset = {
178 [RXE_BTH] = 0,
179 [RXE_PAYLOAD] = RXE_BTH_BYTES,
180 }
181 },
182 [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = {
183 .name = "IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE",
184 .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
185 | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
186 | RXE_START_MASK | RXE_END_MASK,
187 .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
188 .offset = {
189 [RXE_BTH] = 0,
190 [RXE_IMMDT] = RXE_BTH_BYTES,
191 [RXE_PAYLOAD] = RXE_BTH_BYTES
192 + RXE_IMMDT_BYTES,
193 }
194 },
195 [IB_OPCODE_RC_RDMA_WRITE_FIRST] = {
196 .name = "IB_OPCODE_RC_RDMA_WRITE_FIRST",
197 .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
198 | RXE_WRITE_MASK | RXE_START_MASK,
199 .length = RXE_BTH_BYTES + RXE_RETH_BYTES,
200 .offset = {
201 [RXE_BTH] = 0,
202 [RXE_RETH] = RXE_BTH_BYTES,
203 [RXE_PAYLOAD] = RXE_BTH_BYTES
204 + RXE_RETH_BYTES,
205 }
206 },
207 [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = {
208 .name = "IB_OPCODE_RC_RDMA_WRITE_MIDDLE",
209 .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
210 | RXE_MIDDLE_MASK,
211 .length = RXE_BTH_BYTES,
212 .offset = {
213 [RXE_BTH] = 0,
214 [RXE_PAYLOAD] = RXE_BTH_BYTES,
215 }
216 },
217 [IB_OPCODE_RC_RDMA_WRITE_LAST] = {
218 .name = "IB_OPCODE_RC_RDMA_WRITE_LAST",
219 .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
220 | RXE_END_MASK,
221 .length = RXE_BTH_BYTES,
222 .offset = {
223 [RXE_BTH] = 0,
224 [RXE_PAYLOAD] = RXE_BTH_BYTES,
225 }
226 },
227 [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = {
228 .name = "IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE",
229 .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
230 | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK
231 | RXE_END_MASK,
232 .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
233 .offset = {
234 [RXE_BTH] = 0,
235 [RXE_IMMDT] = RXE_BTH_BYTES,
236 [RXE_PAYLOAD] = RXE_BTH_BYTES
237 + RXE_IMMDT_BYTES,
238 }
239 },
240 [IB_OPCODE_RC_RDMA_WRITE_ONLY] = {
241 .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY",
242 .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
243 | RXE_WRITE_MASK | RXE_START_MASK
244 | RXE_END_MASK,
245 .length = RXE_BTH_BYTES + RXE_RETH_BYTES,
246 .offset = {
247 [RXE_BTH] = 0,
248 [RXE_RETH] = RXE_BTH_BYTES,
249 [RXE_PAYLOAD] = RXE_BTH_BYTES
250 + RXE_RETH_BYTES,
251 }
252 },
253 [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = {
254 .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE",
255 .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
256 | RXE_REQ_MASK | RXE_WRITE_MASK
257 | RXE_COMP_MASK | RXE_RWR_MASK
258 | RXE_START_MASK | RXE_END_MASK,
259 .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES,
260 .offset = {
261 [RXE_BTH] = 0,
262 [RXE_RETH] = RXE_BTH_BYTES,
263 [RXE_IMMDT] = RXE_BTH_BYTES
264 + RXE_RETH_BYTES,
265 [RXE_PAYLOAD] = RXE_BTH_BYTES
266 + RXE_RETH_BYTES
267 + RXE_IMMDT_BYTES,
268 }
269 },
270 [IB_OPCODE_RC_RDMA_READ_REQUEST] = {
271 .name = "IB_OPCODE_RC_RDMA_READ_REQUEST",
272 .mask = RXE_RETH_MASK | RXE_REQ_MASK | RXE_READ_MASK
273 | RXE_START_MASK | RXE_END_MASK,
274 .length = RXE_BTH_BYTES + RXE_RETH_BYTES,
275 .offset = {
276 [RXE_BTH] = 0,
277 [RXE_RETH] = RXE_BTH_BYTES,
278 [RXE_PAYLOAD] = RXE_BTH_BYTES
279 + RXE_RETH_BYTES,
280 }
281 },
282 [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = {
283 .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST",
284 .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
285 | RXE_START_MASK,
286 .length = RXE_BTH_BYTES + RXE_AETH_BYTES,
287 .offset = {
288 [RXE_BTH] = 0,
289 [RXE_AETH] = RXE_BTH_BYTES,
290 [RXE_PAYLOAD] = RXE_BTH_BYTES
291 + RXE_AETH_BYTES,
292 }
293 },
294 [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = {
295 .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE",
296 .mask = RXE_PAYLOAD_MASK | RXE_ACK_MASK | RXE_MIDDLE_MASK,
297 .length = RXE_BTH_BYTES,
298 .offset = {
299 [RXE_BTH] = 0,
300 [RXE_PAYLOAD] = RXE_BTH_BYTES,
301 }
302 },
303 [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = {
304 .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST",
305 .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
306 | RXE_END_MASK,
307 .length = RXE_BTH_BYTES + RXE_AETH_BYTES,
308 .offset = {
309 [RXE_BTH] = 0,
310 [RXE_AETH] = RXE_BTH_BYTES,
311 [RXE_PAYLOAD] = RXE_BTH_BYTES
312 + RXE_AETH_BYTES,
313 }
314 },
315 [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = {
316 .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY",
317 .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
318 | RXE_START_MASK | RXE_END_MASK,
319 .length = RXE_BTH_BYTES + RXE_AETH_BYTES,
320 .offset = {
321 [RXE_BTH] = 0,
322 [RXE_AETH] = RXE_BTH_BYTES,
323 [RXE_PAYLOAD] = RXE_BTH_BYTES
324 + RXE_AETH_BYTES,
325 }
326 },
327 [IB_OPCODE_RC_ACKNOWLEDGE] = {
328 .name = "IB_OPCODE_RC_ACKNOWLEDGE",
329 .mask = RXE_AETH_MASK | RXE_ACK_MASK | RXE_START_MASK
330 | RXE_END_MASK,
331 .length = RXE_BTH_BYTES + RXE_AETH_BYTES,
332 .offset = {
333 [RXE_BTH] = 0,
334 [RXE_AETH] = RXE_BTH_BYTES,
335 [RXE_PAYLOAD] = RXE_BTH_BYTES
336 + RXE_AETH_BYTES,
337 }
338 },
339 [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = {
340 .name = "IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE",
341 .mask = RXE_AETH_MASK | RXE_ATMACK_MASK | RXE_ACK_MASK
342 | RXE_START_MASK | RXE_END_MASK,
343 .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES,
344 .offset = {
345 [RXE_BTH] = 0,
346 [RXE_AETH] = RXE_BTH_BYTES,
347 [RXE_ATMACK] = RXE_BTH_BYTES
348 + RXE_AETH_BYTES,
349 [RXE_PAYLOAD] = RXE_BTH_BYTES
350 + RXE_ATMACK_BYTES + RXE_AETH_BYTES,
351 }
352 },
353 [IB_OPCODE_RC_COMPARE_SWAP] = {
354 .name = "IB_OPCODE_RC_COMPARE_SWAP",
355 .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK
356 | RXE_START_MASK | RXE_END_MASK,
357 .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES,
358 .offset = {
359 [RXE_BTH] = 0,
360 [RXE_ATMETH] = RXE_BTH_BYTES,
361 [RXE_PAYLOAD] = RXE_BTH_BYTES
362 + RXE_ATMETH_BYTES,
363 }
364 },
365 [IB_OPCODE_RC_FETCH_ADD] = {
366 .name = "IB_OPCODE_RC_FETCH_ADD",
367 .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK
368 | RXE_START_MASK | RXE_END_MASK,
369 .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES,
370 .offset = {
371 [RXE_BTH] = 0,
372 [RXE_ATMETH] = RXE_BTH_BYTES,
373 [RXE_PAYLOAD] = RXE_BTH_BYTES
374 + RXE_ATMETH_BYTES,
375 }
376 },
377 [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = {
378 .name = "IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE",
379 .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
380 | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
381 .length = RXE_BTH_BYTES + RXE_IETH_BYTES,
382 .offset = {
383 [RXE_BTH] = 0,
384 [RXE_IETH] = RXE_BTH_BYTES,
385 [RXE_PAYLOAD] = RXE_BTH_BYTES
386 + RXE_IETH_BYTES,
387 }
388 },
389 [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = {
390 .name = "IB_OPCODE_RC_SEND_ONLY_INV",
391 .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
392 | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
393 | RXE_END_MASK,
394 .length = RXE_BTH_BYTES + RXE_IETH_BYTES,
395 .offset = {
396 [RXE_BTH] = 0,
397 [RXE_IETH] = RXE_BTH_BYTES,
398 [RXE_PAYLOAD] = RXE_BTH_BYTES
399 + RXE_IETH_BYTES,
400 }
401 },
402
403 /* UC */
404 [IB_OPCODE_UC_SEND_FIRST] = {
405 .name = "IB_OPCODE_UC_SEND_FIRST",
406 .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK
407 | RXE_SEND_MASK | RXE_START_MASK,
408 .length = RXE_BTH_BYTES,
409 .offset = {
410 [RXE_BTH] = 0,
411 [RXE_PAYLOAD] = RXE_BTH_BYTES,
412 }
413 },
414 [IB_OPCODE_UC_SEND_MIDDLE] = {
415 .name = "IB_OPCODE_UC_SEND_MIDDLE",
416 .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK
417 | RXE_MIDDLE_MASK,
418 .length = RXE_BTH_BYTES,
419 .offset = {
420 [RXE_BTH] = 0,
421 [RXE_PAYLOAD] = RXE_BTH_BYTES,
422 }
423 },
424 [IB_OPCODE_UC_SEND_LAST] = {
425 .name = "IB_OPCODE_UC_SEND_LAST",
426 .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
427 | RXE_SEND_MASK | RXE_END_MASK,
428 .length = RXE_BTH_BYTES,
429 .offset = {
430 [RXE_BTH] = 0,
431 [RXE_PAYLOAD] = RXE_BTH_BYTES,
432 }
433 },
434 [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = {
435 .name = "IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE",
436 .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
437 | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
438 .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
439 .offset = {
440 [RXE_BTH] = 0,
441 [RXE_IMMDT] = RXE_BTH_BYTES,
442 [RXE_PAYLOAD] = RXE_BTH_BYTES
443 + RXE_IMMDT_BYTES,
444 }
445 },
446 [IB_OPCODE_UC_SEND_ONLY] = {
447 .name = "IB_OPCODE_UC_SEND_ONLY",
448 .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
449 | RXE_RWR_MASK | RXE_SEND_MASK
450 | RXE_START_MASK | RXE_END_MASK,
451 .length = RXE_BTH_BYTES,
452 .offset = {
453 [RXE_BTH] = 0,
454 [RXE_PAYLOAD] = RXE_BTH_BYTES,
455 }
456 },
457 [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = {
458 .name = "IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE",
459 .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
460 | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
461 | RXE_START_MASK | RXE_END_MASK,
462 .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
463 .offset = {
464 [RXE_BTH] = 0,
465 [RXE_IMMDT] = RXE_BTH_BYTES,
466 [RXE_PAYLOAD] = RXE_BTH_BYTES
467 + RXE_IMMDT_BYTES,
468 }
469 },
470 [IB_OPCODE_UC_RDMA_WRITE_FIRST] = {
471 .name = "IB_OPCODE_UC_RDMA_WRITE_FIRST",
472 .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
473 | RXE_WRITE_MASK | RXE_START_MASK,
474 .length = RXE_BTH_BYTES + RXE_RETH_BYTES,
475 .offset = {
476 [RXE_BTH] = 0,
477 [RXE_RETH] = RXE_BTH_BYTES,
478 [RXE_PAYLOAD] = RXE_BTH_BYTES
479 + RXE_RETH_BYTES,
480 }
481 },
482 [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = {
483 .name = "IB_OPCODE_UC_RDMA_WRITE_MIDDLE",
484 .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
485 | RXE_MIDDLE_MASK,
486 .length = RXE_BTH_BYTES,
487 .offset = {
488 [RXE_BTH] = 0,
489 [RXE_PAYLOAD] = RXE_BTH_BYTES,
490 }
491 },
492 [IB_OPCODE_UC_RDMA_WRITE_LAST] = {
493 .name = "IB_OPCODE_UC_RDMA_WRITE_LAST",
494 .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
495 | RXE_END_MASK,
496 .length = RXE_BTH_BYTES,
497 .offset = {
498 [RXE_BTH] = 0,
499 [RXE_PAYLOAD] = RXE_BTH_BYTES,
500 }
501 },
502 [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = {
503 .name = "IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE",
504 .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
505 | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK
506 | RXE_END_MASK,
507 .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
508 .offset = {
509 [RXE_BTH] = 0,
510 [RXE_IMMDT] = RXE_BTH_BYTES,
511 [RXE_PAYLOAD] = RXE_BTH_BYTES
512 + RXE_IMMDT_BYTES,
513 }
514 },
515 [IB_OPCODE_UC_RDMA_WRITE_ONLY] = {
516 .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY",
517 .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
518 | RXE_WRITE_MASK | RXE_START_MASK
519 | RXE_END_MASK,
520 .length = RXE_BTH_BYTES + RXE_RETH_BYTES,
521 .offset = {
522 [RXE_BTH] = 0,
523 [RXE_RETH] = RXE_BTH_BYTES,
524 [RXE_PAYLOAD] = RXE_BTH_BYTES
525 + RXE_RETH_BYTES,
526 }
527 },
528 [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = {
529 .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE",
530 .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
531 | RXE_REQ_MASK | RXE_WRITE_MASK
532 | RXE_COMP_MASK | RXE_RWR_MASK
533 | RXE_START_MASK | RXE_END_MASK,
534 .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES,
535 .offset = {
536 [RXE_BTH] = 0,
537 [RXE_RETH] = RXE_BTH_BYTES,
538 [RXE_IMMDT] = RXE_BTH_BYTES
539 + RXE_RETH_BYTES,
540 [RXE_PAYLOAD] = RXE_BTH_BYTES
541 + RXE_RETH_BYTES
542 + RXE_IMMDT_BYTES,
543 }
544 },
545
546 /* RD */
547 [IB_OPCODE_RD_SEND_FIRST] = {
548 .name = "IB_OPCODE_RD_SEND_FIRST",
549 .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
550 | RXE_REQ_MASK | RXE_RWR_MASK | RXE_SEND_MASK
551 | RXE_START_MASK,
552 .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
553 .offset = {
554 [RXE_BTH] = 0,
555 [RXE_RDETH] = RXE_BTH_BYTES,
556 [RXE_DETH] = RXE_BTH_BYTES
557 + RXE_RDETH_BYTES,
558 [RXE_PAYLOAD] = RXE_BTH_BYTES
559 + RXE_RDETH_BYTES
560 + RXE_DETH_BYTES,
561 }
562 },
563 [IB_OPCODE_RD_SEND_MIDDLE] = {
564 .name = "IB_OPCODE_RD_SEND_MIDDLE",
565 .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
566 | RXE_REQ_MASK | RXE_SEND_MASK
567 | RXE_MIDDLE_MASK,
568 .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
569 .offset = {
570 [RXE_BTH] = 0,
571 [RXE_RDETH] = RXE_BTH_BYTES,
572 [RXE_DETH] = RXE_BTH_BYTES
573 + RXE_RDETH_BYTES,
574 [RXE_PAYLOAD] = RXE_BTH_BYTES
575 + RXE_RDETH_BYTES
576 + RXE_DETH_BYTES,
577 }
578 },
579 [IB_OPCODE_RD_SEND_LAST] = {
580 .name = "IB_OPCODE_RD_SEND_LAST",
581 .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
582 | RXE_REQ_MASK | RXE_COMP_MASK | RXE_SEND_MASK
583 | RXE_END_MASK,
584 .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
585 .offset = {
586 [RXE_BTH] = 0,
587 [RXE_RDETH] = RXE_BTH_BYTES,
588 [RXE_DETH] = RXE_BTH_BYTES
589 + RXE_RDETH_BYTES,
590 [RXE_PAYLOAD] = RXE_BTH_BYTES
591 + RXE_RDETH_BYTES
592 + RXE_DETH_BYTES,
593 }
594 },
595 [IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE] = {
596 .name = "IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE",
597 .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK
598 | RXE_PAYLOAD_MASK | RXE_REQ_MASK
599 | RXE_COMP_MASK | RXE_SEND_MASK
600 | RXE_END_MASK,
601 .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES
602 + RXE_RDETH_BYTES,
603 .offset = {
604 [RXE_BTH] = 0,
605 [RXE_RDETH] = RXE_BTH_BYTES,
606 [RXE_DETH] = RXE_BTH_BYTES
607 + RXE_RDETH_BYTES,
608 [RXE_IMMDT] = RXE_BTH_BYTES
609 + RXE_RDETH_BYTES
610 + RXE_DETH_BYTES,
611 [RXE_PAYLOAD] = RXE_BTH_BYTES
612 + RXE_RDETH_BYTES
613 + RXE_DETH_BYTES
614 + RXE_IMMDT_BYTES,
615 }
616 },
617 [IB_OPCODE_RD_SEND_ONLY] = {
618 .name = "IB_OPCODE_RD_SEND_ONLY",
619 .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
620 | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK
621 | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK,
622 .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
623 .offset = {
624 [RXE_BTH] = 0,
625 [RXE_RDETH] = RXE_BTH_BYTES,
626 [RXE_DETH] = RXE_BTH_BYTES
627 + RXE_RDETH_BYTES,
628 [RXE_PAYLOAD] = RXE_BTH_BYTES
629 + RXE_RDETH_BYTES
630 + RXE_DETH_BYTES,
631 }
632 },
633 [IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE] = {
634 .name = "IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE",
635 .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK
636 | RXE_PAYLOAD_MASK | RXE_REQ_MASK
637 | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
638 | RXE_START_MASK | RXE_END_MASK,
639 .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES
640 + RXE_RDETH_BYTES,
641 .offset = {
642 [RXE_BTH] = 0,
643 [RXE_RDETH] = RXE_BTH_BYTES,
644 [RXE_DETH] = RXE_BTH_BYTES
645 + RXE_RDETH_BYTES,
646 [RXE_IMMDT] = RXE_BTH_BYTES
647 + RXE_RDETH_BYTES
648 + RXE_DETH_BYTES,
649 [RXE_PAYLOAD] = RXE_BTH_BYTES
650 + RXE_RDETH_BYTES
651 + RXE_DETH_BYTES
652 + RXE_IMMDT_BYTES,
653 }
654 },
655 [IB_OPCODE_RD_RDMA_WRITE_FIRST] = {
656 .name = "IB_OPCODE_RD_RDMA_WRITE_FIRST",
657 .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
658 | RXE_PAYLOAD_MASK | RXE_REQ_MASK
659 | RXE_WRITE_MASK | RXE_START_MASK,
660 .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES
661 + RXE_RDETH_BYTES,
662 .offset = {
663 [RXE_BTH] = 0,
664 [RXE_RDETH] = RXE_BTH_BYTES,
665 [RXE_DETH] = RXE_BTH_BYTES
666 + RXE_RDETH_BYTES,
667 [RXE_RETH] = RXE_BTH_BYTES
668 + RXE_RDETH_BYTES
669 + RXE_DETH_BYTES,
670 [RXE_PAYLOAD] = RXE_BTH_BYTES
671 + RXE_RDETH_BYTES
672 + RXE_DETH_BYTES
673 + RXE_RETH_BYTES,
674 }
675 },
676 [IB_OPCODE_RD_RDMA_WRITE_MIDDLE] = {
677 .name = "IB_OPCODE_RD_RDMA_WRITE_MIDDLE",
678 .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
679 | RXE_REQ_MASK | RXE_WRITE_MASK
680 | RXE_MIDDLE_MASK,
681 .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
682 .offset = {
683 [RXE_BTH] = 0,
684 [RXE_RDETH] = RXE_BTH_BYTES,
685 [RXE_DETH] = RXE_BTH_BYTES
686 + RXE_RDETH_BYTES,
687 [RXE_PAYLOAD] = RXE_BTH_BYTES
688 + RXE_RDETH_BYTES
689 + RXE_DETH_BYTES,
690 }
691 },
692 [IB_OPCODE_RD_RDMA_WRITE_LAST] = {
693 .name = "IB_OPCODE_RD_RDMA_WRITE_LAST",
694 .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
695 | RXE_REQ_MASK | RXE_WRITE_MASK
696 | RXE_END_MASK,
697 .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
698 .offset = {
699 [RXE_BTH] = 0,
700 [RXE_RDETH] = RXE_BTH_BYTES,
701 [RXE_DETH] = RXE_BTH_BYTES
702 + RXE_RDETH_BYTES,
703 [RXE_PAYLOAD] = RXE_BTH_BYTES
704 + RXE_RDETH_BYTES
705 + RXE_DETH_BYTES,
706 }
707 },
708 [IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE] = {
709 .name = "IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE",
710 .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK
711 | RXE_PAYLOAD_MASK | RXE_REQ_MASK
712 | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK
713 | RXE_END_MASK,
714 .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES
715 + RXE_RDETH_BYTES,
716 .offset = {
717 [RXE_BTH] = 0,
718 [RXE_RDETH] = RXE_BTH_BYTES,
719 [RXE_DETH] = RXE_BTH_BYTES
720 + RXE_RDETH_BYTES,
721 [RXE_IMMDT] = RXE_BTH_BYTES
722 + RXE_RDETH_BYTES
723 + RXE_DETH_BYTES,
724 [RXE_PAYLOAD] = RXE_BTH_BYTES
725 + RXE_RDETH_BYTES
726 + RXE_DETH_BYTES
727 + RXE_IMMDT_BYTES,
728 }
729 },
730 [IB_OPCODE_RD_RDMA_WRITE_ONLY] = {
731 .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY",
732 .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
733 | RXE_PAYLOAD_MASK | RXE_REQ_MASK
734 | RXE_WRITE_MASK | RXE_START_MASK
735 | RXE_END_MASK,
736 .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES
737 + RXE_RDETH_BYTES,
738 .offset = {
739 [RXE_BTH] = 0,
740 [RXE_RDETH] = RXE_BTH_BYTES,
741 [RXE_DETH] = RXE_BTH_BYTES
742 + RXE_RDETH_BYTES,
743 [RXE_RETH] = RXE_BTH_BYTES
744 + RXE_RDETH_BYTES
745 + RXE_DETH_BYTES,
746 [RXE_PAYLOAD] = RXE_BTH_BYTES
747 + RXE_RDETH_BYTES
748 + RXE_DETH_BYTES
749 + RXE_RETH_BYTES,
750 }
751 },
752 [IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = {
753 .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE",
754 .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
755 | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
756 | RXE_REQ_MASK | RXE_WRITE_MASK
757 | RXE_COMP_MASK | RXE_RWR_MASK
758 | RXE_START_MASK | RXE_END_MASK,
759 .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES
760 + RXE_DETH_BYTES + RXE_RDETH_BYTES,
761 .offset = {
762 [RXE_BTH] = 0,
763 [RXE_RDETH] = RXE_BTH_BYTES,
764 [RXE_DETH] = RXE_BTH_BYTES
765 + RXE_RDETH_BYTES,
766 [RXE_RETH] = RXE_BTH_BYTES
767 + RXE_RDETH_BYTES
768 + RXE_DETH_BYTES,
769 [RXE_IMMDT] = RXE_BTH_BYTES
770 + RXE_RDETH_BYTES
771 + RXE_DETH_BYTES
772 + RXE_RETH_BYTES,
773 [RXE_PAYLOAD] = RXE_BTH_BYTES
774 + RXE_RDETH_BYTES
775 + RXE_DETH_BYTES
776 + RXE_RETH_BYTES
777 + RXE_IMMDT_BYTES,
778 }
779 },
780 [IB_OPCODE_RD_RDMA_READ_REQUEST] = {
781 .name = "IB_OPCODE_RD_RDMA_READ_REQUEST",
782 .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
783 | RXE_REQ_MASK | RXE_READ_MASK
784 | RXE_START_MASK | RXE_END_MASK,
785 .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES
786 + RXE_RDETH_BYTES,
787 .offset = {
788 [RXE_BTH] = 0,
789 [RXE_RDETH] = RXE_BTH_BYTES,
790 [RXE_DETH] = RXE_BTH_BYTES
791 + RXE_RDETH_BYTES,
792 [RXE_RETH] = RXE_BTH_BYTES
793 + RXE_RDETH_BYTES
794 + RXE_DETH_BYTES,
795 [RXE_PAYLOAD] = RXE_BTH_BYTES
796 + RXE_RETH_BYTES
797 + RXE_DETH_BYTES
798 + RXE_RDETH_BYTES,
799 }
800 },
801 [IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST] = {
802 .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST",
803 .mask = RXE_RDETH_MASK | RXE_AETH_MASK
804 | RXE_PAYLOAD_MASK | RXE_ACK_MASK
805 | RXE_START_MASK,
806 .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
807 .offset = {
808 [RXE_BTH] = 0,
809 [RXE_RDETH] = RXE_BTH_BYTES,
810 [RXE_AETH] = RXE_BTH_BYTES
811 + RXE_RDETH_BYTES,
812 [RXE_PAYLOAD] = RXE_BTH_BYTES
813 + RXE_RDETH_BYTES
814 + RXE_AETH_BYTES,
815 }
816 },
817 [IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE] = {
818 .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE",
819 .mask = RXE_RDETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
820 | RXE_MIDDLE_MASK,
821 .length = RXE_BTH_BYTES + RXE_RDETH_BYTES,
822 .offset = {
823 [RXE_BTH] = 0,
824 [RXE_RDETH] = RXE_BTH_BYTES,
825 [RXE_PAYLOAD] = RXE_BTH_BYTES
826 + RXE_RDETH_BYTES,
827 }
828 },
829 [IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST] = {
830 .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST",
831 .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK
832 | RXE_ACK_MASK | RXE_END_MASK,
833 .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
834 .offset = {
835 [RXE_BTH] = 0,
836 [RXE_RDETH] = RXE_BTH_BYTES,
837 [RXE_AETH] = RXE_BTH_BYTES
838 + RXE_RDETH_BYTES,
839 [RXE_PAYLOAD] = RXE_BTH_BYTES
840 + RXE_RDETH_BYTES
841 + RXE_AETH_BYTES,
842 }
843 },
844 [IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY] = {
845 .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY",
846 .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK
847 | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK,
848 .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
849 .offset = {
850 [RXE_BTH] = 0,
851 [RXE_RDETH] = RXE_BTH_BYTES,
852 [RXE_AETH] = RXE_BTH_BYTES
853 + RXE_RDETH_BYTES,
854 [RXE_PAYLOAD] = RXE_BTH_BYTES
855 + RXE_RDETH_BYTES
856 + RXE_AETH_BYTES,
857 }
858 },
859 [IB_OPCODE_RD_ACKNOWLEDGE] = {
860 .name = "IB_OPCODE_RD_ACKNOWLEDGE",
861 .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ACK_MASK
862 | RXE_START_MASK | RXE_END_MASK,
863 .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
864 .offset = {
865 [RXE_BTH] = 0,
866 [RXE_RDETH] = RXE_BTH_BYTES,
867 [RXE_AETH] = RXE_BTH_BYTES
868 + RXE_RDETH_BYTES,
869 }
870 },
871 [IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE] = {
872 .name = "IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE",
873 .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ATMACK_MASK
874 | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK,
875 .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES
876 + RXE_RDETH_BYTES,
877 .offset = {
878 [RXE_BTH] = 0,
879 [RXE_RDETH] = RXE_BTH_BYTES,
880 [RXE_AETH] = RXE_BTH_BYTES
881 + RXE_RDETH_BYTES,
882 [RXE_ATMACK] = RXE_BTH_BYTES
883 + RXE_RDETH_BYTES
884 + RXE_AETH_BYTES,
885 }
886 },
887 [IB_OPCODE_RD_COMPARE_SWAP] = {
888 .name = "RD_COMPARE_SWAP",
889 .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK
890 | RXE_REQ_MASK | RXE_ATOMIC_MASK
891 | RXE_START_MASK | RXE_END_MASK,
892 .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES
893 + RXE_RDETH_BYTES,
894 .offset = {
895 [RXE_BTH] = 0,
896 [RXE_RDETH] = RXE_BTH_BYTES,
897 [RXE_DETH] = RXE_BTH_BYTES
898 + RXE_RDETH_BYTES,
899 [RXE_ATMETH] = RXE_BTH_BYTES
900 + RXE_RDETH_BYTES
901 + RXE_DETH_BYTES,
902 [RXE_PAYLOAD] = RXE_BTH_BYTES +
903 + RXE_ATMETH_BYTES
904 + RXE_DETH_BYTES +
905 + RXE_RDETH_BYTES,
906 }
907 },
908 [IB_OPCODE_RD_FETCH_ADD] = {
909 .name = "IB_OPCODE_RD_FETCH_ADD",
910 .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK
911 | RXE_REQ_MASK | RXE_ATOMIC_MASK
912 | RXE_START_MASK | RXE_END_MASK,
913 .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES
914 + RXE_RDETH_BYTES,
915 .offset = {
916 [RXE_BTH] = 0,
917 [RXE_RDETH] = RXE_BTH_BYTES,
918 [RXE_DETH] = RXE_BTH_BYTES
919 + RXE_RDETH_BYTES,
920 [RXE_ATMETH] = RXE_BTH_BYTES
921 + RXE_RDETH_BYTES
922 + RXE_DETH_BYTES,
923 [RXE_PAYLOAD] = RXE_BTH_BYTES +
924 + RXE_ATMETH_BYTES
925 + RXE_DETH_BYTES +
926 + RXE_RDETH_BYTES,
927 }
928 },
929
930 /* UD */
931 [IB_OPCODE_UD_SEND_ONLY] = {
932 .name = "IB_OPCODE_UD_SEND_ONLY",
933 .mask = RXE_DETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
934 | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
935 | RXE_START_MASK | RXE_END_MASK,
936 .length = RXE_BTH_BYTES + RXE_DETH_BYTES,
937 .offset = {
938 [RXE_BTH] = 0,
939 [RXE_DETH] = RXE_BTH_BYTES,
940 [RXE_PAYLOAD] = RXE_BTH_BYTES
941 + RXE_DETH_BYTES,
942 }
943 },
944 [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = {
945 .name = "IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE",
946 .mask = RXE_DETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
947 | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK
948 | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK,
949 .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES,
950 .offset = {
951 [RXE_BTH] = 0,
952 [RXE_DETH] = RXE_BTH_BYTES,
953 [RXE_IMMDT] = RXE_BTH_BYTES
954 + RXE_DETH_BYTES,
955 [RXE_PAYLOAD] = RXE_BTH_BYTES
956 + RXE_DETH_BYTES
957 + RXE_IMMDT_BYTES,
958 }
959 },
960
961};
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h
new file mode 100644
index 000000000000..307604e9c78d
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.h
@@ -0,0 +1,129 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef RXE_OPCODE_H
35#define RXE_OPCODE_H
36
37/*
38 * contains header bit mask definitions and header lengths
39 * declaration of the rxe_opcode_info struct and
40 * rxe_wr_opcode_info struct
41 */
42
43enum rxe_wr_mask {
44 WR_INLINE_MASK = BIT(0),
45 WR_ATOMIC_MASK = BIT(1),
46 WR_SEND_MASK = BIT(2),
47 WR_READ_MASK = BIT(3),
48 WR_WRITE_MASK = BIT(4),
49 WR_LOCAL_MASK = BIT(5),
50 WR_REG_MASK = BIT(6),
51
52 WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK,
53 WR_READ_WRITE_OR_SEND_MASK = WR_READ_OR_WRITE_MASK | WR_SEND_MASK,
54 WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK,
55 WR_ATOMIC_OR_READ_MASK = WR_ATOMIC_MASK | WR_READ_MASK,
56};
57
58#define WR_MAX_QPT (8)
59
60struct rxe_wr_opcode_info {
61 char *name;
62 enum rxe_wr_mask mask[WR_MAX_QPT];
63};
64
65extern struct rxe_wr_opcode_info rxe_wr_opcode_info[];
66
67enum rxe_hdr_type {
68 RXE_LRH,
69 RXE_GRH,
70 RXE_BTH,
71 RXE_RETH,
72 RXE_AETH,
73 RXE_ATMETH,
74 RXE_ATMACK,
75 RXE_IETH,
76 RXE_RDETH,
77 RXE_DETH,
78 RXE_IMMDT,
79 RXE_PAYLOAD,
80 NUM_HDR_TYPES
81};
82
83enum rxe_hdr_mask {
84 RXE_LRH_MASK = BIT(RXE_LRH),
85 RXE_GRH_MASK = BIT(RXE_GRH),
86 RXE_BTH_MASK = BIT(RXE_BTH),
87 RXE_IMMDT_MASK = BIT(RXE_IMMDT),
88 RXE_RETH_MASK = BIT(RXE_RETH),
89 RXE_AETH_MASK = BIT(RXE_AETH),
90 RXE_ATMETH_MASK = BIT(RXE_ATMETH),
91 RXE_ATMACK_MASK = BIT(RXE_ATMACK),
92 RXE_IETH_MASK = BIT(RXE_IETH),
93 RXE_RDETH_MASK = BIT(RXE_RDETH),
94 RXE_DETH_MASK = BIT(RXE_DETH),
95 RXE_PAYLOAD_MASK = BIT(RXE_PAYLOAD),
96
97 RXE_REQ_MASK = BIT(NUM_HDR_TYPES + 0),
98 RXE_ACK_MASK = BIT(NUM_HDR_TYPES + 1),
99 RXE_SEND_MASK = BIT(NUM_HDR_TYPES + 2),
100 RXE_WRITE_MASK = BIT(NUM_HDR_TYPES + 3),
101 RXE_READ_MASK = BIT(NUM_HDR_TYPES + 4),
102 RXE_ATOMIC_MASK = BIT(NUM_HDR_TYPES + 5),
103
104 RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 6),
105 RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 7),
106
107 RXE_START_MASK = BIT(NUM_HDR_TYPES + 8),
108 RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 9),
109 RXE_END_MASK = BIT(NUM_HDR_TYPES + 10),
110
111 RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12),
112
113 RXE_READ_OR_ATOMIC = (RXE_READ_MASK | RXE_ATOMIC_MASK),
114 RXE_WRITE_OR_SEND = (RXE_WRITE_MASK | RXE_SEND_MASK),
115};
116
117#define OPCODE_NONE (-1)
118#define RXE_NUM_OPCODE 256
119
120struct rxe_opcode_info {
121 char *name;
122 enum rxe_hdr_mask mask;
123 int length;
124 int offset[NUM_HDR_TYPES];
125};
126
127extern struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE];
128
129#endif /* RXE_OPCODE_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
new file mode 100644
index 000000000000..f459c43a77c8
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -0,0 +1,172 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef RXE_PARAM_H
35#define RXE_PARAM_H
36
37static inline enum ib_mtu rxe_mtu_int_to_enum(int mtu)
38{
39 if (mtu < 256)
40 return 0;
41 else if (mtu < 512)
42 return IB_MTU_256;
43 else if (mtu < 1024)
44 return IB_MTU_512;
45 else if (mtu < 2048)
46 return IB_MTU_1024;
47 else if (mtu < 4096)
48 return IB_MTU_2048;
49 else
50 return IB_MTU_4096;
51}
52
53/* Find the IB mtu for a given network MTU. */
54static inline enum ib_mtu eth_mtu_int_to_enum(int mtu)
55{
56 mtu -= RXE_MAX_HDR_LENGTH;
57
58 return rxe_mtu_int_to_enum(mtu);
59}
60
61/* default/initial rxe device parameter settings */
62enum rxe_device_param {
63 RXE_FW_VER = 0,
64 RXE_MAX_MR_SIZE = -1ull,
65 RXE_PAGE_SIZE_CAP = 0xfffff000,
66 RXE_VENDOR_ID = 0,
67 RXE_VENDOR_PART_ID = 0,
68 RXE_HW_VER = 0,
69 RXE_MAX_QP = 0x10000,
70 RXE_MAX_QP_WR = 0x4000,
71 RXE_MAX_INLINE_DATA = 400,
72 RXE_DEVICE_CAP_FLAGS = IB_DEVICE_BAD_PKEY_CNTR
73 | IB_DEVICE_BAD_QKEY_CNTR
74 | IB_DEVICE_AUTO_PATH_MIG
75 | IB_DEVICE_CHANGE_PHY_PORT
76 | IB_DEVICE_UD_AV_PORT_ENFORCE
77 | IB_DEVICE_PORT_ACTIVE_EVENT
78 | IB_DEVICE_SYS_IMAGE_GUID
79 | IB_DEVICE_RC_RNR_NAK_GEN
80 | IB_DEVICE_SRQ_RESIZE
81 | IB_DEVICE_MEM_MGT_EXTENSIONS,
82 RXE_MAX_SGE = 32,
83 RXE_MAX_SGE_RD = 32,
84 RXE_MAX_CQ = 16384,
85 RXE_MAX_LOG_CQE = 13,
86 RXE_MAX_MR = 2 * 1024,
87 RXE_MAX_PD = 0x7ffc,
88 RXE_MAX_QP_RD_ATOM = 128,
89 RXE_MAX_EE_RD_ATOM = 0,
90 RXE_MAX_RES_RD_ATOM = 0x3f000,
91 RXE_MAX_QP_INIT_RD_ATOM = 128,
92 RXE_MAX_EE_INIT_RD_ATOM = 0,
93 RXE_ATOMIC_CAP = 1,
94 RXE_MAX_EE = 0,
95 RXE_MAX_RDD = 0,
96 RXE_MAX_MW = 0,
97 RXE_MAX_RAW_IPV6_QP = 0,
98 RXE_MAX_RAW_ETHY_QP = 0,
99 RXE_MAX_MCAST_GRP = 8192,
100 RXE_MAX_MCAST_QP_ATTACH = 56,
101 RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000,
102 RXE_MAX_AH = 100,
103 RXE_MAX_FMR = 0,
104 RXE_MAX_MAP_PER_FMR = 0,
105 RXE_MAX_SRQ = 960,
106 RXE_MAX_SRQ_WR = 0x4000,
107 RXE_MIN_SRQ_WR = 1,
108 RXE_MAX_SRQ_SGE = 27,
109 RXE_MIN_SRQ_SGE = 1,
110 RXE_MAX_FMR_PAGE_LIST_LEN = 512,
111 RXE_MAX_PKEYS = 64,
112 RXE_LOCAL_CA_ACK_DELAY = 15,
113
114 RXE_MAX_UCONTEXT = 512,
115
116 RXE_NUM_PORT = 1,
117 RXE_NUM_COMP_VECTORS = 1,
118
119 RXE_MIN_QP_INDEX = 16,
120 RXE_MAX_QP_INDEX = 0x00020000,
121
122 RXE_MIN_SRQ_INDEX = 0x00020001,
123 RXE_MAX_SRQ_INDEX = 0x00040000,
124
125 RXE_MIN_MR_INDEX = 0x00000001,
126 RXE_MAX_MR_INDEX = 0x00040000,
127 RXE_MIN_MW_INDEX = 0x00040001,
128 RXE_MAX_MW_INDEX = 0x00060000,
129 RXE_MAX_PKT_PER_ACK = 64,
130
131 RXE_MAX_UNACKED_PSNS = 128,
132
133 /* Max inflight SKBs per queue pair */
134 RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64,
135 RXE_INFLIGHT_SKBS_PER_QP_LOW = 16,
136
137 /* Delay before calling arbiter timer */
138 RXE_NSEC_ARB_TIMER_DELAY = 200,
139};
140
141/* default/initial rxe port parameters */
142enum rxe_port_param {
143 RXE_PORT_STATE = IB_PORT_DOWN,
144 RXE_PORT_MAX_MTU = IB_MTU_4096,
145 RXE_PORT_ACTIVE_MTU = IB_MTU_256,
146 RXE_PORT_GID_TBL_LEN = 1024,
147 RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP,
148 RXE_PORT_MAX_MSG_SZ = 0x800000,
149 RXE_PORT_BAD_PKEY_CNTR = 0,
150 RXE_PORT_QKEY_VIOL_CNTR = 0,
151 RXE_PORT_LID = 0,
152 RXE_PORT_SM_LID = 0,
153 RXE_PORT_SM_SL = 0,
154 RXE_PORT_LMC = 0,
155 RXE_PORT_MAX_VL_NUM = 1,
156 RXE_PORT_SUBNET_TIMEOUT = 0,
157 RXE_PORT_INIT_TYPE_REPLY = 0,
158 RXE_PORT_ACTIVE_WIDTH = IB_WIDTH_1X,
159 RXE_PORT_ACTIVE_SPEED = 1,
160 RXE_PORT_PKEY_TBL_LEN = 64,
161 RXE_PORT_PHYS_STATE = 2,
162 RXE_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL,
163};
164
165/* default/initial port info parameters */
166enum rxe_port_info_param {
167 RXE_PORT_INFO_VL_CAP = 4, /* 1-8 */
168 RXE_PORT_INFO_MTU_CAP = 5, /* 4096 */
169 RXE_PORT_INFO_OPER_VL = 1, /* 1 */
170};
171
172#endif /* RXE_PARAM_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
new file mode 100644
index 000000000000..6bac0717c540
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -0,0 +1,502 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include "rxe.h"
35#include "rxe_loc.h"
36
37/* info about object pools
38 * note that mr and mw share a single index space
39 * so that one can map an lkey to the correct type of object
40 */
41struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
42 [RXE_TYPE_UC] = {
43 .name = "rxe-uc",
44 .size = sizeof(struct rxe_ucontext),
45 },
46 [RXE_TYPE_PD] = {
47 .name = "rxe-pd",
48 .size = sizeof(struct rxe_pd),
49 },
50 [RXE_TYPE_AH] = {
51 .name = "rxe-ah",
52 .size = sizeof(struct rxe_ah),
53 .flags = RXE_POOL_ATOMIC,
54 },
55 [RXE_TYPE_SRQ] = {
56 .name = "rxe-srq",
57 .size = sizeof(struct rxe_srq),
58 .flags = RXE_POOL_INDEX,
59 .min_index = RXE_MIN_SRQ_INDEX,
60 .max_index = RXE_MAX_SRQ_INDEX,
61 },
62 [RXE_TYPE_QP] = {
63 .name = "rxe-qp",
64 .size = sizeof(struct rxe_qp),
65 .cleanup = rxe_qp_cleanup,
66 .flags = RXE_POOL_INDEX,
67 .min_index = RXE_MIN_QP_INDEX,
68 .max_index = RXE_MAX_QP_INDEX,
69 },
70 [RXE_TYPE_CQ] = {
71 .name = "rxe-cq",
72 .size = sizeof(struct rxe_cq),
73 .cleanup = rxe_cq_cleanup,
74 },
75 [RXE_TYPE_MR] = {
76 .name = "rxe-mr",
77 .size = sizeof(struct rxe_mem),
78 .cleanup = rxe_mem_cleanup,
79 .flags = RXE_POOL_INDEX,
80 .max_index = RXE_MAX_MR_INDEX,
81 .min_index = RXE_MIN_MR_INDEX,
82 },
83 [RXE_TYPE_MW] = {
84 .name = "rxe-mw",
85 .size = sizeof(struct rxe_mem),
86 .flags = RXE_POOL_INDEX,
87 .max_index = RXE_MAX_MW_INDEX,
88 .min_index = RXE_MIN_MW_INDEX,
89 },
90 [RXE_TYPE_MC_GRP] = {
91 .name = "rxe-mc_grp",
92 .size = sizeof(struct rxe_mc_grp),
93 .cleanup = rxe_mc_cleanup,
94 .flags = RXE_POOL_KEY,
95 .key_offset = offsetof(struct rxe_mc_grp, mgid),
96 .key_size = sizeof(union ib_gid),
97 },
98 [RXE_TYPE_MC_ELEM] = {
99 .name = "rxe-mc_elem",
100 .size = sizeof(struct rxe_mc_elem),
101 .flags = RXE_POOL_ATOMIC,
102 },
103};
104
105static inline char *pool_name(struct rxe_pool *pool)
106{
107 return rxe_type_info[pool->type].name;
108}
109
110static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)
111{
112 return rxe_type_info[pool->type].cache;
113}
114
115static inline enum rxe_elem_type rxe_type(void *arg)
116{
117 struct rxe_pool_entry *elem = arg;
118
119 return elem->pool->type;
120}
121
122int rxe_cache_init(void)
123{
124 int err;
125 int i;
126 size_t size;
127 struct rxe_type_info *type;
128
129 for (i = 0; i < RXE_NUM_TYPES; i++) {
130 type = &rxe_type_info[i];
131 size = ALIGN(type->size, RXE_POOL_ALIGN);
132 type->cache = kmem_cache_create(type->name, size,
133 RXE_POOL_ALIGN,
134 RXE_POOL_CACHE_FLAGS, NULL);
135 if (!type->cache) {
136 pr_err("Unable to init kmem cache for %s\n",
137 type->name);
138 err = -ENOMEM;
139 goto err1;
140 }
141 }
142
143 return 0;
144
145err1:
146 while (--i >= 0) {
147 kmem_cache_destroy(type->cache);
148 type->cache = NULL;
149 }
150
151 return err;
152}
153
154void rxe_cache_exit(void)
155{
156 int i;
157 struct rxe_type_info *type;
158
159 for (i = 0; i < RXE_NUM_TYPES; i++) {
160 type = &rxe_type_info[i];
161 kmem_cache_destroy(type->cache);
162 type->cache = NULL;
163 }
164}
165
166static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
167{
168 int err = 0;
169 size_t size;
170
171 if ((max - min + 1) < pool->max_elem) {
172 pr_warn("not enough indices for max_elem\n");
173 err = -EINVAL;
174 goto out;
175 }
176
177 pool->max_index = max;
178 pool->min_index = min;
179
180 size = BITS_TO_LONGS(max - min + 1) * sizeof(long);
181 pool->table = kmalloc(size, GFP_KERNEL);
182 if (!pool->table) {
183 pr_warn("no memory for bit table\n");
184 err = -ENOMEM;
185 goto out;
186 }
187
188 pool->table_size = size;
189 bitmap_zero(pool->table, max - min + 1);
190
191out:
192 return err;
193}
194
195int rxe_pool_init(
196 struct rxe_dev *rxe,
197 struct rxe_pool *pool,
198 enum rxe_elem_type type,
199 unsigned max_elem)
200{
201 int err = 0;
202 size_t size = rxe_type_info[type].size;
203
204 memset(pool, 0, sizeof(*pool));
205
206 pool->rxe = rxe;
207 pool->type = type;
208 pool->max_elem = max_elem;
209 pool->elem_size = ALIGN(size, RXE_POOL_ALIGN);
210 pool->flags = rxe_type_info[type].flags;
211 pool->tree = RB_ROOT;
212 pool->cleanup = rxe_type_info[type].cleanup;
213
214 atomic_set(&pool->num_elem, 0);
215
216 kref_init(&pool->ref_cnt);
217
218 spin_lock_init(&pool->pool_lock);
219
220 if (rxe_type_info[type].flags & RXE_POOL_INDEX) {
221 err = rxe_pool_init_index(pool,
222 rxe_type_info[type].max_index,
223 rxe_type_info[type].min_index);
224 if (err)
225 goto out;
226 }
227
228 if (rxe_type_info[type].flags & RXE_POOL_KEY) {
229 pool->key_offset = rxe_type_info[type].key_offset;
230 pool->key_size = rxe_type_info[type].key_size;
231 }
232
233 pool->state = rxe_pool_valid;
234
235out:
236 return err;
237}
238
239static void rxe_pool_release(struct kref *kref)
240{
241 struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
242
243 pool->state = rxe_pool_invalid;
244 kfree(pool->table);
245}
246
247static void rxe_pool_put(struct rxe_pool *pool)
248{
249 kref_put(&pool->ref_cnt, rxe_pool_release);
250}
251
252int rxe_pool_cleanup(struct rxe_pool *pool)
253{
254 unsigned long flags;
255
256 spin_lock_irqsave(&pool->pool_lock, flags);
257 pool->state = rxe_pool_invalid;
258 if (atomic_read(&pool->num_elem) > 0)
259 pr_warn("%s pool destroyed with unfree'd elem\n",
260 pool_name(pool));
261 spin_unlock_irqrestore(&pool->pool_lock, flags);
262
263 rxe_pool_put(pool);
264
265 return 0;
266}
267
268static u32 alloc_index(struct rxe_pool *pool)
269{
270 u32 index;
271 u32 range = pool->max_index - pool->min_index + 1;
272
273 index = find_next_zero_bit(pool->table, range, pool->last);
274 if (index >= range)
275 index = find_first_zero_bit(pool->table, range);
276
277 set_bit(index, pool->table);
278 pool->last = index;
279 return index + pool->min_index;
280}
281
282static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
283{
284 struct rb_node **link = &pool->tree.rb_node;
285 struct rb_node *parent = NULL;
286 struct rxe_pool_entry *elem;
287
288 while (*link) {
289 parent = *link;
290 elem = rb_entry(parent, struct rxe_pool_entry, node);
291
292 if (elem->index == new->index) {
293 pr_warn("element already exists!\n");
294 goto out;
295 }
296
297 if (elem->index > new->index)
298 link = &(*link)->rb_left;
299 else
300 link = &(*link)->rb_right;
301 }
302
303 rb_link_node(&new->node, parent, link);
304 rb_insert_color(&new->node, &pool->tree);
305out:
306 return;
307}
308
309static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
310{
311 struct rb_node **link = &pool->tree.rb_node;
312 struct rb_node *parent = NULL;
313 struct rxe_pool_entry *elem;
314 int cmp;
315
316 while (*link) {
317 parent = *link;
318 elem = rb_entry(parent, struct rxe_pool_entry, node);
319
320 cmp = memcmp((u8 *)elem + pool->key_offset,
321 (u8 *)new + pool->key_offset, pool->key_size);
322
323 if (cmp == 0) {
324 pr_warn("key already exists!\n");
325 goto out;
326 }
327
328 if (cmp > 0)
329 link = &(*link)->rb_left;
330 else
331 link = &(*link)->rb_right;
332 }
333
334 rb_link_node(&new->node, parent, link);
335 rb_insert_color(&new->node, &pool->tree);
336out:
337 return;
338}
339
340void rxe_add_key(void *arg, void *key)
341{
342 struct rxe_pool_entry *elem = arg;
343 struct rxe_pool *pool = elem->pool;
344 unsigned long flags;
345
346 spin_lock_irqsave(&pool->pool_lock, flags);
347 memcpy((u8 *)elem + pool->key_offset, key, pool->key_size);
348 insert_key(pool, elem);
349 spin_unlock_irqrestore(&pool->pool_lock, flags);
350}
351
352void rxe_drop_key(void *arg)
353{
354 struct rxe_pool_entry *elem = arg;
355 struct rxe_pool *pool = elem->pool;
356 unsigned long flags;
357
358 spin_lock_irqsave(&pool->pool_lock, flags);
359 rb_erase(&elem->node, &pool->tree);
360 spin_unlock_irqrestore(&pool->pool_lock, flags);
361}
362
363void rxe_add_index(void *arg)
364{
365 struct rxe_pool_entry *elem = arg;
366 struct rxe_pool *pool = elem->pool;
367 unsigned long flags;
368
369 spin_lock_irqsave(&pool->pool_lock, flags);
370 elem->index = alloc_index(pool);
371 insert_index(pool, elem);
372 spin_unlock_irqrestore(&pool->pool_lock, flags);
373}
374
375void rxe_drop_index(void *arg)
376{
377 struct rxe_pool_entry *elem = arg;
378 struct rxe_pool *pool = elem->pool;
379 unsigned long flags;
380
381 spin_lock_irqsave(&pool->pool_lock, flags);
382 clear_bit(elem->index - pool->min_index, pool->table);
383 rb_erase(&elem->node, &pool->tree);
384 spin_unlock_irqrestore(&pool->pool_lock, flags);
385}
386
387void *rxe_alloc(struct rxe_pool *pool)
388{
389 struct rxe_pool_entry *elem;
390 unsigned long flags;
391
392 might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
393
394 spin_lock_irqsave(&pool->pool_lock, flags);
395 if (pool->state != rxe_pool_valid) {
396 spin_unlock_irqrestore(&pool->pool_lock, flags);
397 return NULL;
398 }
399 kref_get(&pool->ref_cnt);
400 spin_unlock_irqrestore(&pool->pool_lock, flags);
401
402 kref_get(&pool->rxe->ref_cnt);
403
404 if (atomic_inc_return(&pool->num_elem) > pool->max_elem) {
405 atomic_dec(&pool->num_elem);
406 rxe_dev_put(pool->rxe);
407 rxe_pool_put(pool);
408 return NULL;
409 }
410
411 elem = kmem_cache_zalloc(pool_cache(pool),
412 (pool->flags & RXE_POOL_ATOMIC) ?
413 GFP_ATOMIC : GFP_KERNEL);
414
415 elem->pool = pool;
416 kref_init(&elem->ref_cnt);
417
418 return elem;
419}
420
421void rxe_elem_release(struct kref *kref)
422{
423 struct rxe_pool_entry *elem =
424 container_of(kref, struct rxe_pool_entry, ref_cnt);
425 struct rxe_pool *pool = elem->pool;
426
427 if (pool->cleanup)
428 pool->cleanup(elem);
429
430 kmem_cache_free(pool_cache(pool), elem);
431 atomic_dec(&pool->num_elem);
432 rxe_dev_put(pool->rxe);
433 rxe_pool_put(pool);
434}
435
436void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
437{
438 struct rb_node *node = NULL;
439 struct rxe_pool_entry *elem = NULL;
440 unsigned long flags;
441
442 spin_lock_irqsave(&pool->pool_lock, flags);
443
444 if (pool->state != rxe_pool_valid)
445 goto out;
446
447 node = pool->tree.rb_node;
448
449 while (node) {
450 elem = rb_entry(node, struct rxe_pool_entry, node);
451
452 if (elem->index > index)
453 node = node->rb_left;
454 else if (elem->index < index)
455 node = node->rb_right;
456 else
457 break;
458 }
459
460 if (node)
461 kref_get(&elem->ref_cnt);
462
463out:
464 spin_unlock_irqrestore(&pool->pool_lock, flags);
465 return node ? (void *)elem : NULL;
466}
467
468void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
469{
470 struct rb_node *node = NULL;
471 struct rxe_pool_entry *elem = NULL;
472 int cmp;
473 unsigned long flags;
474
475 spin_lock_irqsave(&pool->pool_lock, flags);
476
477 if (pool->state != rxe_pool_valid)
478 goto out;
479
480 node = pool->tree.rb_node;
481
482 while (node) {
483 elem = rb_entry(node, struct rxe_pool_entry, node);
484
485 cmp = memcmp((u8 *)elem + pool->key_offset,
486 key, pool->key_size);
487
488 if (cmp > 0)
489 node = node->rb_left;
490 else if (cmp < 0)
491 node = node->rb_right;
492 else
493 break;
494 }
495
496 if (node)
497 kref_get(&elem->ref_cnt);
498
499out:
500 spin_unlock_irqrestore(&pool->pool_lock, flags);
501 return node ? ((void *)elem) : NULL;
502}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
new file mode 100644
index 000000000000..4d04830adcae
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -0,0 +1,163 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef RXE_POOL_H
35#define RXE_POOL_H
36
37#define RXE_POOL_ALIGN (16)
38#define RXE_POOL_CACHE_FLAGS (0)
39
40enum rxe_pool_flags {
41 RXE_POOL_ATOMIC = BIT(0),
42 RXE_POOL_INDEX = BIT(1),
43 RXE_POOL_KEY = BIT(2),
44};
45
46enum rxe_elem_type {
47 RXE_TYPE_UC,
48 RXE_TYPE_PD,
49 RXE_TYPE_AH,
50 RXE_TYPE_SRQ,
51 RXE_TYPE_QP,
52 RXE_TYPE_CQ,
53 RXE_TYPE_MR,
54 RXE_TYPE_MW,
55 RXE_TYPE_MC_GRP,
56 RXE_TYPE_MC_ELEM,
57 RXE_NUM_TYPES, /* keep me last */
58};
59
60struct rxe_type_info {
61 char *name;
62 size_t size;
63 void (*cleanup)(void *obj);
64 enum rxe_pool_flags flags;
65 u32 max_index;
66 u32 min_index;
67 size_t key_offset;
68 size_t key_size;
69 struct kmem_cache *cache;
70};
71
72extern struct rxe_type_info rxe_type_info[];
73
74enum rxe_pool_state {
75 rxe_pool_invalid,
76 rxe_pool_valid,
77};
78
79struct rxe_pool_entry {
80 struct rxe_pool *pool;
81 struct kref ref_cnt;
82 struct list_head list;
83
84 /* only used if indexed or keyed */
85 struct rb_node node;
86 u32 index;
87};
88
89struct rxe_pool {
90 struct rxe_dev *rxe;
91 spinlock_t pool_lock; /* pool spinlock */
92 size_t elem_size;
93 struct kref ref_cnt;
94 void (*cleanup)(void *obj);
95 enum rxe_pool_state state;
96 enum rxe_pool_flags flags;
97 enum rxe_elem_type type;
98
99 unsigned int max_elem;
100 atomic_t num_elem;
101
102 /* only used if indexed or keyed */
103 struct rb_root tree;
104 unsigned long *table;
105 size_t table_size;
106 u32 max_index;
107 u32 min_index;
108 u32 last;
109 size_t key_offset;
110 size_t key_size;
111};
112
113/* initialize slab caches for managed objects */
114int rxe_cache_init(void);
115
116/* cleanup slab caches for managed objects */
117void rxe_cache_exit(void);
118
119/* initialize a pool of objects with given limit on
120 * number of elements. gets parameters from rxe_type_info
121 * pool elements will be allocated out of a slab cache
122 */
123int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
124 enum rxe_elem_type type, u32 max_elem);
125
126/* free resources from object pool */
127int rxe_pool_cleanup(struct rxe_pool *pool);
128
129/* allocate an object from pool */
130void *rxe_alloc(struct rxe_pool *pool);
131
132/* assign an index to an indexed object and insert object into
133 * pool's rb tree
134 */
135void rxe_add_index(void *elem);
136
137/* drop an index and remove object from rb tree */
138void rxe_drop_index(void *elem);
139
140/* assign a key to a keyed object and insert object into
141 * pool's rb tree
142 */
143void rxe_add_key(void *elem, void *key);
144
145/* remove elem from rb tree */
146void rxe_drop_key(void *elem);
147
148/* lookup an indexed object from index. takes a reference on object */
149void *rxe_pool_get_index(struct rxe_pool *pool, u32 index);
150
151/* lookup keyed object from key. takes a reference on the object */
152void *rxe_pool_get_key(struct rxe_pool *pool, void *key);
153
154/* cleanup an object when all references are dropped */
155void rxe_elem_release(struct kref *kref);
156
157/* take a reference on an object */
158#define rxe_add_ref(elem) kref_get(&(elem)->pelem.ref_cnt)
159
160/* drop a reference on an object */
161#define rxe_drop_ref(elem) kref_put(&(elem)->pelem.ref_cnt, rxe_elem_release)
162
163#endif /* RXE_POOL_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
new file mode 100644
index 000000000000..22ba24f2a2c1
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -0,0 +1,851 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/skbuff.h>
35#include <linux/delay.h>
36#include <linux/sched.h>
37
38#include "rxe.h"
39#include "rxe_loc.h"
40#include "rxe_queue.h"
41#include "rxe_task.h"
42
43char *rxe_qp_state_name[] = {
44 [QP_STATE_RESET] = "RESET",
45 [QP_STATE_INIT] = "INIT",
46 [QP_STATE_READY] = "READY",
47 [QP_STATE_DRAIN] = "DRAIN",
48 [QP_STATE_DRAINED] = "DRAINED",
49 [QP_STATE_ERROR] = "ERROR",
50};
51
52static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
53 int has_srq)
54{
55 if (cap->max_send_wr > rxe->attr.max_qp_wr) {
56 pr_warn("invalid send wr = %d > %d\n",
57 cap->max_send_wr, rxe->attr.max_qp_wr);
58 goto err1;
59 }
60
61 if (cap->max_send_sge > rxe->attr.max_sge) {
62 pr_warn("invalid send sge = %d > %d\n",
63 cap->max_send_sge, rxe->attr.max_sge);
64 goto err1;
65 }
66
67 if (!has_srq) {
68 if (cap->max_recv_wr > rxe->attr.max_qp_wr) {
69 pr_warn("invalid recv wr = %d > %d\n",
70 cap->max_recv_wr, rxe->attr.max_qp_wr);
71 goto err1;
72 }
73
74 if (cap->max_recv_sge > rxe->attr.max_sge) {
75 pr_warn("invalid recv sge = %d > %d\n",
76 cap->max_recv_sge, rxe->attr.max_sge);
77 goto err1;
78 }
79 }
80
81 if (cap->max_inline_data > rxe->max_inline_data) {
82 pr_warn("invalid max inline data = %d > %d\n",
83 cap->max_inline_data, rxe->max_inline_data);
84 goto err1;
85 }
86
87 return 0;
88
89err1:
90 return -EINVAL;
91}
92
93int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
94{
95 struct ib_qp_cap *cap = &init->cap;
96 struct rxe_port *port;
97 int port_num = init->port_num;
98
99 if (!init->recv_cq || !init->send_cq) {
100 pr_warn("missing cq\n");
101 goto err1;
102 }
103
104 if (rxe_qp_chk_cap(rxe, cap, !!init->srq))
105 goto err1;
106
107 if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) {
108 if (port_num != 1) {
109 pr_warn("invalid port = %d\n", port_num);
110 goto err1;
111 }
112
113 port = &rxe->port;
114
115 if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) {
116 pr_warn("SMI QP exists for port %d\n", port_num);
117 goto err1;
118 }
119
120 if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) {
121 pr_warn("GSI QP exists for port %d\n", port_num);
122 goto err1;
123 }
124 }
125
126 return 0;
127
128err1:
129 return -EINVAL;
130}
131
132static int alloc_rd_atomic_resources(struct rxe_qp *qp, unsigned int n)
133{
134 qp->resp.res_head = 0;
135 qp->resp.res_tail = 0;
136 qp->resp.resources = kcalloc(n, sizeof(struct resp_res), GFP_KERNEL);
137
138 if (!qp->resp.resources)
139 return -ENOMEM;
140
141 return 0;
142}
143
144static void free_rd_atomic_resources(struct rxe_qp *qp)
145{
146 if (qp->resp.resources) {
147 int i;
148
149 for (i = 0; i < qp->attr.max_rd_atomic; i++) {
150 struct resp_res *res = &qp->resp.resources[i];
151
152 free_rd_atomic_resource(qp, res);
153 }
154 kfree(qp->resp.resources);
155 qp->resp.resources = NULL;
156 }
157}
158
159void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res)
160{
161 if (res->type == RXE_ATOMIC_MASK) {
162 rxe_drop_ref(qp);
163 kfree_skb(res->atomic.skb);
164 } else if (res->type == RXE_READ_MASK) {
165 if (res->read.mr)
166 rxe_drop_ref(res->read.mr);
167 }
168 res->type = 0;
169}
170
171static void cleanup_rd_atomic_resources(struct rxe_qp *qp)
172{
173 int i;
174 struct resp_res *res;
175
176 if (qp->resp.resources) {
177 for (i = 0; i < qp->attr.max_rd_atomic; i++) {
178 res = &qp->resp.resources[i];
179 free_rd_atomic_resource(qp, res);
180 }
181 }
182}
183
184static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
185 struct ib_qp_init_attr *init)
186{
187 struct rxe_port *port;
188 u32 qpn;
189
190 qp->sq_sig_type = init->sq_sig_type;
191 qp->attr.path_mtu = 1;
192 qp->mtu = ib_mtu_enum_to_int(qp->attr.path_mtu);
193
194 qpn = qp->pelem.index;
195 port = &rxe->port;
196
197 switch (init->qp_type) {
198 case IB_QPT_SMI:
199 qp->ibqp.qp_num = 0;
200 port->qp_smi_index = qpn;
201 qp->attr.port_num = init->port_num;
202 break;
203
204 case IB_QPT_GSI:
205 qp->ibqp.qp_num = 1;
206 port->qp_gsi_index = qpn;
207 qp->attr.port_num = init->port_num;
208 break;
209
210 default:
211 qp->ibqp.qp_num = qpn;
212 break;
213 }
214
215 INIT_LIST_HEAD(&qp->grp_list);
216
217 skb_queue_head_init(&qp->send_pkts);
218
219 spin_lock_init(&qp->grp_lock);
220 spin_lock_init(&qp->state_lock);
221
222 atomic_set(&qp->ssn, 0);
223 atomic_set(&qp->skb_out, 0);
224}
225
226static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
227 struct ib_qp_init_attr *init,
228 struct ib_ucontext *context, struct ib_udata *udata)
229{
230 int err;
231 int wqe_size;
232
233 err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk);
234 if (err < 0)
235 return err;
236 qp->sk->sk->sk_user_data = qp;
237
238 qp->sq.max_wr = init->cap.max_send_wr;
239 qp->sq.max_sge = init->cap.max_send_sge;
240 qp->sq.max_inline = init->cap.max_inline_data;
241
242 wqe_size = max_t(int, sizeof(struct rxe_send_wqe) +
243 qp->sq.max_sge * sizeof(struct ib_sge),
244 sizeof(struct rxe_send_wqe) +
245 qp->sq.max_inline);
246
247 qp->sq.queue = rxe_queue_init(rxe,
248 &qp->sq.max_wr,
249 wqe_size);
250 if (!qp->sq.queue)
251 return -ENOMEM;
252
253 err = do_mmap_info(rxe, udata, true,
254 context, qp->sq.queue->buf,
255 qp->sq.queue->buf_size, &qp->sq.queue->ip);
256
257 if (err) {
258 kvfree(qp->sq.queue->buf);
259 kfree(qp->sq.queue);
260 return err;
261 }
262
263 qp->req.wqe_index = producer_index(qp->sq.queue);
264 qp->req.state = QP_STATE_RESET;
265 qp->req.opcode = -1;
266 qp->comp.opcode = -1;
267
268 spin_lock_init(&qp->sq.sq_lock);
269 skb_queue_head_init(&qp->req_pkts);
270
271 rxe_init_task(rxe, &qp->req.task, qp,
272 rxe_requester, "req");
273 rxe_init_task(rxe, &qp->comp.task, qp,
274 rxe_completer, "comp");
275
276 init_timer(&qp->rnr_nak_timer);
277 qp->rnr_nak_timer.function = rnr_nak_timer;
278 qp->rnr_nak_timer.data = (unsigned long)qp;
279
280 init_timer(&qp->retrans_timer);
281 qp->retrans_timer.function = retransmit_timer;
282 qp->retrans_timer.data = (unsigned long)qp;
283 qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
284
285 return 0;
286}
287
288static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
289 struct ib_qp_init_attr *init,
290 struct ib_ucontext *context, struct ib_udata *udata)
291{
292 int err;
293 int wqe_size;
294
295 if (!qp->srq) {
296 qp->rq.max_wr = init->cap.max_recv_wr;
297 qp->rq.max_sge = init->cap.max_recv_sge;
298
299 wqe_size = rcv_wqe_size(qp->rq.max_sge);
300
301 pr_debug("max_wr = %d, max_sge = %d, wqe_size = %d\n",
302 qp->rq.max_wr, qp->rq.max_sge, wqe_size);
303
304 qp->rq.queue = rxe_queue_init(rxe,
305 &qp->rq.max_wr,
306 wqe_size);
307 if (!qp->rq.queue)
308 return -ENOMEM;
309
310 err = do_mmap_info(rxe, udata, false, context,
311 qp->rq.queue->buf,
312 qp->rq.queue->buf_size,
313 &qp->rq.queue->ip);
314 if (err) {
315 kvfree(qp->rq.queue->buf);
316 kfree(qp->rq.queue);
317 return err;
318 }
319 }
320
321 spin_lock_init(&qp->rq.producer_lock);
322 spin_lock_init(&qp->rq.consumer_lock);
323
324 skb_queue_head_init(&qp->resp_pkts);
325
326 rxe_init_task(rxe, &qp->resp.task, qp,
327 rxe_responder, "resp");
328
329 qp->resp.opcode = OPCODE_NONE;
330 qp->resp.msn = 0;
331 qp->resp.state = QP_STATE_RESET;
332
333 return 0;
334}
335
336/* called by the create qp verb */
337int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
338 struct ib_qp_init_attr *init, struct ib_udata *udata,
339 struct ib_pd *ibpd)
340{
341 int err;
342 struct rxe_cq *rcq = to_rcq(init->recv_cq);
343 struct rxe_cq *scq = to_rcq(init->send_cq);
344 struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;
345 struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
346
347 rxe_add_ref(pd);
348 rxe_add_ref(rcq);
349 rxe_add_ref(scq);
350 if (srq)
351 rxe_add_ref(srq);
352
353 qp->pd = pd;
354 qp->rcq = rcq;
355 qp->scq = scq;
356 qp->srq = srq;
357
358 rxe_qp_init_misc(rxe, qp, init);
359
360 err = rxe_qp_init_req(rxe, qp, init, context, udata);
361 if (err)
362 goto err1;
363
364 err = rxe_qp_init_resp(rxe, qp, init, context, udata);
365 if (err)
366 goto err2;
367
368 qp->attr.qp_state = IB_QPS_RESET;
369 qp->valid = 1;
370
371 return 0;
372
373err2:
374 rxe_queue_cleanup(qp->sq.queue);
375err1:
376 if (srq)
377 rxe_drop_ref(srq);
378 rxe_drop_ref(scq);
379 rxe_drop_ref(rcq);
380 rxe_drop_ref(pd);
381
382 return err;
383}
384
385/* called by the query qp verb */
386int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init)
387{
388 init->event_handler = qp->ibqp.event_handler;
389 init->qp_context = qp->ibqp.qp_context;
390 init->send_cq = qp->ibqp.send_cq;
391 init->recv_cq = qp->ibqp.recv_cq;
392 init->srq = qp->ibqp.srq;
393
394 init->cap.max_send_wr = qp->sq.max_wr;
395 init->cap.max_send_sge = qp->sq.max_sge;
396 init->cap.max_inline_data = qp->sq.max_inline;
397
398 if (!qp->srq) {
399 init->cap.max_recv_wr = qp->rq.max_wr;
400 init->cap.max_recv_sge = qp->rq.max_sge;
401 }
402
403 init->sq_sig_type = qp->sq_sig_type;
404
405 init->qp_type = qp->ibqp.qp_type;
406 init->port_num = 1;
407
408 return 0;
409}
410
411/* called by the modify qp verb, this routine checks all the parameters before
412 * making any changes
413 */
414int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
415 struct ib_qp_attr *attr, int mask)
416{
417 enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ?
418 attr->cur_qp_state : qp->attr.qp_state;
419 enum ib_qp_state new_state = (mask & IB_QP_STATE) ?
420 attr->qp_state : cur_state;
421
422 if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask,
423 IB_LINK_LAYER_ETHERNET)) {
424 pr_warn("invalid mask or state for qp\n");
425 goto err1;
426 }
427
428 if (mask & IB_QP_STATE) {
429 if (cur_state == IB_QPS_SQD) {
430 if (qp->req.state == QP_STATE_DRAIN &&
431 new_state != IB_QPS_ERR)
432 goto err1;
433 }
434 }
435
436 if (mask & IB_QP_PORT) {
437 if (attr->port_num != 1) {
438 pr_warn("invalid port %d\n", attr->port_num);
439 goto err1;
440 }
441 }
442
443 if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq))
444 goto err1;
445
446 if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr))
447 goto err1;
448
449 if (mask & IB_QP_ALT_PATH) {
450 if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr))
451 goto err1;
452 if (attr->alt_port_num != 1) {
453 pr_warn("invalid alt port %d\n", attr->alt_port_num);
454 goto err1;
455 }
456 if (attr->alt_timeout > 31) {
457 pr_warn("invalid QP alt timeout %d > 31\n",
458 attr->alt_timeout);
459 goto err1;
460 }
461 }
462
463 if (mask & IB_QP_PATH_MTU) {
464 struct rxe_port *port = &rxe->port;
465
466 enum ib_mtu max_mtu = port->attr.max_mtu;
467 enum ib_mtu mtu = attr->path_mtu;
468
469 if (mtu > max_mtu) {
470 pr_debug("invalid mtu (%d) > (%d)\n",
471 ib_mtu_enum_to_int(mtu),
472 ib_mtu_enum_to_int(max_mtu));
473 goto err1;
474 }
475 }
476
477 if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
478 if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) {
479 pr_warn("invalid max_rd_atomic %d > %d\n",
480 attr->max_rd_atomic,
481 rxe->attr.max_qp_rd_atom);
482 goto err1;
483 }
484 }
485
486 if (mask & IB_QP_TIMEOUT) {
487 if (attr->timeout > 31) {
488 pr_warn("invalid QP timeout %d > 31\n",
489 attr->timeout);
490 goto err1;
491 }
492 }
493
494 return 0;
495
496err1:
497 return -EINVAL;
498}
499
500/* move the qp to the reset state */
501static void rxe_qp_reset(struct rxe_qp *qp)
502{
503 /* stop tasks from running */
504 rxe_disable_task(&qp->resp.task);
505
506 /* stop request/comp */
507 if (qp->sq.queue) {
508 if (qp_type(qp) == IB_QPT_RC)
509 rxe_disable_task(&qp->comp.task);
510 rxe_disable_task(&qp->req.task);
511 }
512
513 /* move qp to the reset state */
514 qp->req.state = QP_STATE_RESET;
515 qp->resp.state = QP_STATE_RESET;
516
517 /* let state machines reset themselves drain work and packet queues
518 * etc.
519 */
520 __rxe_do_task(&qp->resp.task);
521
522 if (qp->sq.queue) {
523 __rxe_do_task(&qp->comp.task);
524 __rxe_do_task(&qp->req.task);
525 }
526
527 /* cleanup attributes */
528 atomic_set(&qp->ssn, 0);
529 qp->req.opcode = -1;
530 qp->req.need_retry = 0;
531 qp->req.noack_pkts = 0;
532 qp->resp.msn = 0;
533 qp->resp.opcode = -1;
534 qp->resp.drop_msg = 0;
535 qp->resp.goto_error = 0;
536 qp->resp.sent_psn_nak = 0;
537
538 if (qp->resp.mr) {
539 rxe_drop_ref(qp->resp.mr);
540 qp->resp.mr = NULL;
541 }
542
543 cleanup_rd_atomic_resources(qp);
544
545 /* reenable tasks */
546 rxe_enable_task(&qp->resp.task);
547
548 if (qp->sq.queue) {
549 if (qp_type(qp) == IB_QPT_RC)
550 rxe_enable_task(&qp->comp.task);
551
552 rxe_enable_task(&qp->req.task);
553 }
554}
555
556/* drain the send queue */
557static void rxe_qp_drain(struct rxe_qp *qp)
558{
559 if (qp->sq.queue) {
560 if (qp->req.state != QP_STATE_DRAINED) {
561 qp->req.state = QP_STATE_DRAIN;
562 if (qp_type(qp) == IB_QPT_RC)
563 rxe_run_task(&qp->comp.task, 1);
564 else
565 __rxe_do_task(&qp->comp.task);
566 rxe_run_task(&qp->req.task, 1);
567 }
568 }
569}
570
571/* move the qp to the error state */
572void rxe_qp_error(struct rxe_qp *qp)
573{
574 qp->req.state = QP_STATE_ERROR;
575 qp->resp.state = QP_STATE_ERROR;
576
577 /* drain work and packet queues */
578 rxe_run_task(&qp->resp.task, 1);
579
580 if (qp_type(qp) == IB_QPT_RC)
581 rxe_run_task(&qp->comp.task, 1);
582 else
583 __rxe_do_task(&qp->comp.task);
584 rxe_run_task(&qp->req.task, 1);
585}
586
587/* called by the modify qp verb */
588int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
589 struct ib_udata *udata)
590{
591 int err;
592 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
593 union ib_gid sgid;
594 struct ib_gid_attr sgid_attr;
595
596 if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
597 int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic);
598
599 free_rd_atomic_resources(qp);
600
601 err = alloc_rd_atomic_resources(qp, max_rd_atomic);
602 if (err)
603 return err;
604
605 qp->attr.max_rd_atomic = max_rd_atomic;
606 atomic_set(&qp->req.rd_atomic, max_rd_atomic);
607 }
608
609 if (mask & IB_QP_CUR_STATE)
610 qp->attr.cur_qp_state = attr->qp_state;
611
612 if (mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
613 qp->attr.en_sqd_async_notify = attr->en_sqd_async_notify;
614
615 if (mask & IB_QP_ACCESS_FLAGS)
616 qp->attr.qp_access_flags = attr->qp_access_flags;
617
618 if (mask & IB_QP_PKEY_INDEX)
619 qp->attr.pkey_index = attr->pkey_index;
620
621 if (mask & IB_QP_PORT)
622 qp->attr.port_num = attr->port_num;
623
624 if (mask & IB_QP_QKEY)
625 qp->attr.qkey = attr->qkey;
626
627 if (mask & IB_QP_AV) {
628 ib_get_cached_gid(&rxe->ib_dev, 1,
629 attr->ah_attr.grh.sgid_index, &sgid,
630 &sgid_attr);
631 rxe_av_from_attr(rxe, attr->port_num, &qp->pri_av,
632 &attr->ah_attr);
633 rxe_av_fill_ip_info(rxe, &qp->pri_av, &attr->ah_attr,
634 &sgid_attr, &sgid);
635 if (sgid_attr.ndev)
636 dev_put(sgid_attr.ndev);
637 }
638
639 if (mask & IB_QP_ALT_PATH) {
640 ib_get_cached_gid(&rxe->ib_dev, 1,
641 attr->alt_ah_attr.grh.sgid_index, &sgid,
642 &sgid_attr);
643
644 rxe_av_from_attr(rxe, attr->alt_port_num, &qp->alt_av,
645 &attr->alt_ah_attr);
646 rxe_av_fill_ip_info(rxe, &qp->alt_av, &attr->alt_ah_attr,
647 &sgid_attr, &sgid);
648 if (sgid_attr.ndev)
649 dev_put(sgid_attr.ndev);
650
651 qp->attr.alt_port_num = attr->alt_port_num;
652 qp->attr.alt_pkey_index = attr->alt_pkey_index;
653 qp->attr.alt_timeout = attr->alt_timeout;
654 }
655
656 if (mask & IB_QP_PATH_MTU) {
657 qp->attr.path_mtu = attr->path_mtu;
658 qp->mtu = ib_mtu_enum_to_int(attr->path_mtu);
659 }
660
661 if (mask & IB_QP_TIMEOUT) {
662 qp->attr.timeout = attr->timeout;
663 if (attr->timeout == 0) {
664 qp->qp_timeout_jiffies = 0;
665 } else {
666 /* According to the spec, timeout = 4.096 * 2 ^ attr->timeout [us] */
667 int j = nsecs_to_jiffies(4096ULL << attr->timeout);
668
669 qp->qp_timeout_jiffies = j ? j : 1;
670 }
671 }
672
673 if (mask & IB_QP_RETRY_CNT) {
674 qp->attr.retry_cnt = attr->retry_cnt;
675 qp->comp.retry_cnt = attr->retry_cnt;
676 pr_debug("set retry count = %d\n", attr->retry_cnt);
677 }
678
679 if (mask & IB_QP_RNR_RETRY) {
680 qp->attr.rnr_retry = attr->rnr_retry;
681 qp->comp.rnr_retry = attr->rnr_retry;
682 pr_debug("set rnr retry count = %d\n", attr->rnr_retry);
683 }
684
685 if (mask & IB_QP_RQ_PSN) {
686 qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK);
687 qp->resp.psn = qp->attr.rq_psn;
688 pr_debug("set resp psn = 0x%x\n", qp->resp.psn);
689 }
690
691 if (mask & IB_QP_MIN_RNR_TIMER) {
692 qp->attr.min_rnr_timer = attr->min_rnr_timer;
693 pr_debug("set min rnr timer = 0x%x\n",
694 attr->min_rnr_timer);
695 }
696
697 if (mask & IB_QP_SQ_PSN) {
698 qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK);
699 qp->req.psn = qp->attr.sq_psn;
700 qp->comp.psn = qp->attr.sq_psn;
701 pr_debug("set req psn = 0x%x\n", qp->req.psn);
702 }
703
704 if (mask & IB_QP_MAX_DEST_RD_ATOMIC) {
705 qp->attr.max_dest_rd_atomic =
706 __roundup_pow_of_two(attr->max_dest_rd_atomic);
707 }
708
709 if (mask & IB_QP_PATH_MIG_STATE)
710 qp->attr.path_mig_state = attr->path_mig_state;
711
712 if (mask & IB_QP_DEST_QPN)
713 qp->attr.dest_qp_num = attr->dest_qp_num;
714
715 if (mask & IB_QP_STATE) {
716 qp->attr.qp_state = attr->qp_state;
717
718 switch (attr->qp_state) {
719 case IB_QPS_RESET:
720 pr_debug("qp state -> RESET\n");
721 rxe_qp_reset(qp);
722 break;
723
724 case IB_QPS_INIT:
725 pr_debug("qp state -> INIT\n");
726 qp->req.state = QP_STATE_INIT;
727 qp->resp.state = QP_STATE_INIT;
728 break;
729
730 case IB_QPS_RTR:
731 pr_debug("qp state -> RTR\n");
732 qp->resp.state = QP_STATE_READY;
733 break;
734
735 case IB_QPS_RTS:
736 pr_debug("qp state -> RTS\n");
737 qp->req.state = QP_STATE_READY;
738 break;
739
740 case IB_QPS_SQD:
741 pr_debug("qp state -> SQD\n");
742 rxe_qp_drain(qp);
743 break;
744
745 case IB_QPS_SQE:
746 pr_warn("qp state -> SQE !!?\n");
747 /* Not possible from modify_qp. */
748 break;
749
750 case IB_QPS_ERR:
751 pr_debug("qp state -> ERR\n");
752 rxe_qp_error(qp);
753 break;
754 }
755 }
756
757 return 0;
758}
759
760/* called by the query qp verb */
761int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
762{
763 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
764
765 *attr = qp->attr;
766
767 attr->rq_psn = qp->resp.psn;
768 attr->sq_psn = qp->req.psn;
769
770 attr->cap.max_send_wr = qp->sq.max_wr;
771 attr->cap.max_send_sge = qp->sq.max_sge;
772 attr->cap.max_inline_data = qp->sq.max_inline;
773
774 if (!qp->srq) {
775 attr->cap.max_recv_wr = qp->rq.max_wr;
776 attr->cap.max_recv_sge = qp->rq.max_sge;
777 }
778
779 rxe_av_to_attr(rxe, &qp->pri_av, &attr->ah_attr);
780 rxe_av_to_attr(rxe, &qp->alt_av, &attr->alt_ah_attr);
781
782 if (qp->req.state == QP_STATE_DRAIN) {
783 attr->sq_draining = 1;
784 /* applications that get this state
785 * typically spin on it. yield the
786 * processor
787 */
788 cond_resched();
789 } else {
790 attr->sq_draining = 0;
791 }
792
793 pr_debug("attr->sq_draining = %d\n", attr->sq_draining);
794
795 return 0;
796}
797
798/* called by the destroy qp verb */
799void rxe_qp_destroy(struct rxe_qp *qp)
800{
801 qp->valid = 0;
802 qp->qp_timeout_jiffies = 0;
803 rxe_cleanup_task(&qp->resp.task);
804
805 del_timer_sync(&qp->retrans_timer);
806 del_timer_sync(&qp->rnr_nak_timer);
807
808 rxe_cleanup_task(&qp->req.task);
809 if (qp_type(qp) == IB_QPT_RC)
810 rxe_cleanup_task(&qp->comp.task);
811
812 /* flush out any receive wr's or pending requests */
813 __rxe_do_task(&qp->req.task);
814 if (qp->sq.queue) {
815 __rxe_do_task(&qp->comp.task);
816 __rxe_do_task(&qp->req.task);
817 }
818}
819
820/* called when the last reference to the qp is dropped */
821void rxe_qp_cleanup(void *arg)
822{
823 struct rxe_qp *qp = arg;
824
825 rxe_drop_all_mcast_groups(qp);
826
827 if (qp->sq.queue)
828 rxe_queue_cleanup(qp->sq.queue);
829
830 if (qp->srq)
831 rxe_drop_ref(qp->srq);
832
833 if (qp->rq.queue)
834 rxe_queue_cleanup(qp->rq.queue);
835
836 if (qp->scq)
837 rxe_drop_ref(qp->scq);
838 if (qp->rcq)
839 rxe_drop_ref(qp->rcq);
840 if (qp->pd)
841 rxe_drop_ref(qp->pd);
842
843 if (qp->resp.mr) {
844 rxe_drop_ref(qp->resp.mr);
845 qp->resp.mr = NULL;
846 }
847
848 free_rd_atomic_resources(qp);
849
850 kernel_sock_shutdown(qp->sk, SHUT_RDWR);
851}
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c
new file mode 100644
index 000000000000..08274254eb88
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_queue.c
@@ -0,0 +1,217 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must retailuce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/vmalloc.h>
35#include "rxe.h"
36#include "rxe_loc.h"
37#include "rxe_queue.h"
38
39int do_mmap_info(struct rxe_dev *rxe,
40 struct ib_udata *udata,
41 bool is_req,
42 struct ib_ucontext *context,
43 struct rxe_queue_buf *buf,
44 size_t buf_size,
45 struct rxe_mmap_info **ip_p)
46{
47 int err;
48 u32 len, offset;
49 struct rxe_mmap_info *ip = NULL;
50
51 if (udata) {
52 if (is_req) {
53 len = udata->outlen - sizeof(struct mminfo);
54 offset = sizeof(struct mminfo);
55 } else {
56 len = udata->outlen;
57 offset = 0;
58 }
59
60 if (len < sizeof(ip->info))
61 goto err1;
62
63 ip = rxe_create_mmap_info(rxe, buf_size, context, buf);
64 if (!ip)
65 goto err1;
66
67 err = copy_to_user(udata->outbuf + offset, &ip->info,
68 sizeof(ip->info));
69 if (err)
70 goto err2;
71
72 spin_lock_bh(&rxe->pending_lock);
73 list_add(&ip->pending_mmaps, &rxe->pending_mmaps);
74 spin_unlock_bh(&rxe->pending_lock);
75 }
76
77 *ip_p = ip;
78
79 return 0;
80
81err2:
82 kfree(ip);
83err1:
84 return -EINVAL;
85}
86
87struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
88 int *num_elem,
89 unsigned int elem_size)
90{
91 struct rxe_queue *q;
92 size_t buf_size;
93 unsigned int num_slots;
94
95 /* num_elem == 0 is allowed, but uninteresting */
96 if (*num_elem < 0)
97 goto err1;
98
99 q = kmalloc(sizeof(*q), GFP_KERNEL);
100 if (!q)
101 goto err1;
102
103 q->rxe = rxe;
104
105 /* used in resize, only need to copy used part of queue */
106 q->elem_size = elem_size;
107
108 /* pad element up to at least a cacheline and always a power of 2 */
109 if (elem_size < cache_line_size())
110 elem_size = cache_line_size();
111 elem_size = roundup_pow_of_two(elem_size);
112
113 q->log2_elem_size = order_base_2(elem_size);
114
115 num_slots = *num_elem + 1;
116 num_slots = roundup_pow_of_two(num_slots);
117 q->index_mask = num_slots - 1;
118
119 buf_size = sizeof(struct rxe_queue_buf) + num_slots * elem_size;
120
121 q->buf = vmalloc_user(buf_size);
122 if (!q->buf)
123 goto err2;
124
125 q->buf->log2_elem_size = q->log2_elem_size;
126 q->buf->index_mask = q->index_mask;
127
128 q->buf_size = buf_size;
129
130 *num_elem = num_slots - 1;
131 return q;
132
133err2:
134 kfree(q);
135err1:
136 return NULL;
137}
138
139/* copies elements from original q to new q and then swaps the contents of the
140 * two q headers. This is so that if anyone is holding a pointer to q it will
141 * still work
142 */
143static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q,
144 unsigned int num_elem)
145{
146 if (!queue_empty(q) && (num_elem < queue_count(q)))
147 return -EINVAL;
148
149 while (!queue_empty(q)) {
150 memcpy(producer_addr(new_q), consumer_addr(q),
151 new_q->elem_size);
152 advance_producer(new_q);
153 advance_consumer(q);
154 }
155
156 swap(*q, *new_q);
157
158 return 0;
159}
160
161int rxe_queue_resize(struct rxe_queue *q,
162 unsigned int *num_elem_p,
163 unsigned int elem_size,
164 struct ib_ucontext *context,
165 struct ib_udata *udata,
166 spinlock_t *producer_lock,
167 spinlock_t *consumer_lock)
168{
169 struct rxe_queue *new_q;
170 unsigned int num_elem = *num_elem_p;
171 int err;
172 unsigned long flags = 0, flags1;
173
174 new_q = rxe_queue_init(q->rxe, &num_elem, elem_size);
175 if (!new_q)
176 return -ENOMEM;
177
178 err = do_mmap_info(new_q->rxe, udata, false, context, new_q->buf,
179 new_q->buf_size, &new_q->ip);
180 if (err) {
181 vfree(new_q->buf);
182 kfree(new_q);
183 goto err1;
184 }
185
186 spin_lock_irqsave(consumer_lock, flags1);
187
188 if (producer_lock) {
189 spin_lock_irqsave(producer_lock, flags);
190 err = resize_finish(q, new_q, num_elem);
191 spin_unlock_irqrestore(producer_lock, flags);
192 } else {
193 err = resize_finish(q, new_q, num_elem);
194 }
195
196 spin_unlock_irqrestore(consumer_lock, flags1);
197
198 rxe_queue_cleanup(new_q); /* new/old dep on err */
199 if (err)
200 goto err1;
201
202 *num_elem_p = num_elem;
203 return 0;
204
205err1:
206 return err;
207}
208
209void rxe_queue_cleanup(struct rxe_queue *q)
210{
211 if (q->ip)
212 kref_put(&q->ip->ref, rxe_mmap_release);
213 else
214 vfree(q->buf);
215
216 kfree(q);
217}
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h
new file mode 100644
index 000000000000..239fd609c31e
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_queue.h
@@ -0,0 +1,178 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef RXE_QUEUE_H
35#define RXE_QUEUE_H
36
37/* implements a simple circular buffer that can optionally be
38 * shared between user space and the kernel and can be resized
39
40 * the requested element size is rounded up to a power of 2
41 * and the number of elements in the buffer is also rounded
42 * up to a power of 2. Since the queue is empty when the
43 * producer and consumer indices match the maximum capacity
44 * of the queue is one less than the number of element slots
45 */
46
47/* this data structure is shared between user space and kernel
48 * space for those cases where the queue is shared. It contains
49 * the producer and consumer indices. Is also contains a copy
50 * of the queue size parameters for user space to use but the
51 * kernel must use the parameters in the rxe_queue struct
52 * this MUST MATCH the corresponding librxe struct
53 * for performance reasons arrange to have producer and consumer
54 * pointers in separate cache lines
55 * the kernel should always mask the indices to avoid accessing
56 * memory outside of the data area
57 */
58struct rxe_queue_buf {
59 __u32 log2_elem_size;
60 __u32 index_mask;
61 __u32 pad_1[30];
62 __u32 producer_index;
63 __u32 pad_2[31];
64 __u32 consumer_index;
65 __u32 pad_3[31];
66 __u8 data[0];
67};
68
69struct rxe_queue {
70 struct rxe_dev *rxe;
71 struct rxe_queue_buf *buf;
72 struct rxe_mmap_info *ip;
73 size_t buf_size;
74 size_t elem_size;
75 unsigned int log2_elem_size;
76 unsigned int index_mask;
77};
78
79int do_mmap_info(struct rxe_dev *rxe,
80 struct ib_udata *udata,
81 bool is_req,
82 struct ib_ucontext *context,
83 struct rxe_queue_buf *buf,
84 size_t buf_size,
85 struct rxe_mmap_info **ip_p);
86
87struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
88 int *num_elem,
89 unsigned int elem_size);
90
91int rxe_queue_resize(struct rxe_queue *q,
92 unsigned int *num_elem_p,
93 unsigned int elem_size,
94 struct ib_ucontext *context,
95 struct ib_udata *udata,
96 /* Protect producers while resizing queue */
97 spinlock_t *producer_lock,
98 /* Protect consumers while resizing queue */
99 spinlock_t *consumer_lock);
100
101void rxe_queue_cleanup(struct rxe_queue *queue);
102
103static inline int next_index(struct rxe_queue *q, int index)
104{
105 return (index + 1) & q->buf->index_mask;
106}
107
108static inline int queue_empty(struct rxe_queue *q)
109{
110 return ((q->buf->producer_index - q->buf->consumer_index)
111 & q->index_mask) == 0;
112}
113
114static inline int queue_full(struct rxe_queue *q)
115{
116 return ((q->buf->producer_index + 1 - q->buf->consumer_index)
117 & q->index_mask) == 0;
118}
119
120static inline void advance_producer(struct rxe_queue *q)
121{
122 q->buf->producer_index = (q->buf->producer_index + 1)
123 & q->index_mask;
124}
125
126static inline void advance_consumer(struct rxe_queue *q)
127{
128 q->buf->consumer_index = (q->buf->consumer_index + 1)
129 & q->index_mask;
130}
131
132static inline void *producer_addr(struct rxe_queue *q)
133{
134 return q->buf->data + ((q->buf->producer_index & q->index_mask)
135 << q->log2_elem_size);
136}
137
138static inline void *consumer_addr(struct rxe_queue *q)
139{
140 return q->buf->data + ((q->buf->consumer_index & q->index_mask)
141 << q->log2_elem_size);
142}
143
144static inline unsigned int producer_index(struct rxe_queue *q)
145{
146 return q->buf->producer_index;
147}
148
149static inline unsigned int consumer_index(struct rxe_queue *q)
150{
151 return q->buf->consumer_index;
152}
153
154static inline void *addr_from_index(struct rxe_queue *q, unsigned int index)
155{
156 return q->buf->data + ((index & q->index_mask)
157 << q->buf->log2_elem_size);
158}
159
160static inline unsigned int index_from_addr(const struct rxe_queue *q,
161 const void *addr)
162{
163 return (((u8 *)addr - q->buf->data) >> q->log2_elem_size)
164 & q->index_mask;
165}
166
167static inline unsigned int queue_count(const struct rxe_queue *q)
168{
169 return (q->buf->producer_index - q->buf->consumer_index)
170 & q->index_mask;
171}
172
173static inline void *queue_head(struct rxe_queue *q)
174{
175 return queue_empty(q) ? NULL : consumer_addr(q);
176}
177
178#endif /* RXE_QUEUE_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
new file mode 100644
index 000000000000..3d464c23e08b
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -0,0 +1,420 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/skbuff.h>
35
36#include "rxe.h"
37#include "rxe_loc.h"
38
39static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
40 struct rxe_qp *qp)
41{
42 if (unlikely(!qp->valid))
43 goto err1;
44
45 switch (qp_type(qp)) {
46 case IB_QPT_RC:
47 if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) {
48 pr_warn_ratelimited("bad qp type\n");
49 goto err1;
50 }
51 break;
52 case IB_QPT_UC:
53 if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) {
54 pr_warn_ratelimited("bad qp type\n");
55 goto err1;
56 }
57 break;
58 case IB_QPT_UD:
59 case IB_QPT_SMI:
60 case IB_QPT_GSI:
61 if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) {
62 pr_warn_ratelimited("bad qp type\n");
63 goto err1;
64 }
65 break;
66 default:
67 pr_warn_ratelimited("unsupported qp type\n");
68 goto err1;
69 }
70
71 if (pkt->mask & RXE_REQ_MASK) {
72 if (unlikely(qp->resp.state != QP_STATE_READY))
73 goto err1;
74 } else if (unlikely(qp->req.state < QP_STATE_READY ||
75 qp->req.state > QP_STATE_DRAINED)) {
76 goto err1;
77 }
78
79 return 0;
80
81err1:
82 return -EINVAL;
83}
84
85static void set_bad_pkey_cntr(struct rxe_port *port)
86{
87 spin_lock_bh(&port->port_lock);
88 port->attr.bad_pkey_cntr = min((u32)0xffff,
89 port->attr.bad_pkey_cntr + 1);
90 spin_unlock_bh(&port->port_lock);
91}
92
93static void set_qkey_viol_cntr(struct rxe_port *port)
94{
95 spin_lock_bh(&port->port_lock);
96 port->attr.qkey_viol_cntr = min((u32)0xffff,
97 port->attr.qkey_viol_cntr + 1);
98 spin_unlock_bh(&port->port_lock);
99}
100
101static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
102 u32 qpn, struct rxe_qp *qp)
103{
104 int i;
105 int found_pkey = 0;
106 struct rxe_port *port = &rxe->port;
107 u16 pkey = bth_pkey(pkt);
108
109 pkt->pkey_index = 0;
110
111 if (qpn == 1) {
112 for (i = 0; i < port->attr.pkey_tbl_len; i++) {
113 if (pkey_match(pkey, port->pkey_tbl[i])) {
114 pkt->pkey_index = i;
115 found_pkey = 1;
116 break;
117 }
118 }
119
120 if (!found_pkey) {
121 pr_warn_ratelimited("bad pkey = 0x%x\n", pkey);
122 set_bad_pkey_cntr(port);
123 goto err1;
124 }
125 } else if (qpn != 0) {
126 if (unlikely(!pkey_match(pkey,
127 port->pkey_tbl[qp->attr.pkey_index]
128 ))) {
129 pr_warn_ratelimited("bad pkey = 0x%0x\n", pkey);
130 set_bad_pkey_cntr(port);
131 goto err1;
132 }
133 pkt->pkey_index = qp->attr.pkey_index;
134 }
135
136 if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) &&
137 qpn != 0 && pkt->mask) {
138 u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey;
139
140 if (unlikely(deth_qkey(pkt) != qkey)) {
141 pr_warn_ratelimited("bad qkey, got 0x%x expected 0x%x for qpn 0x%x\n",
142 deth_qkey(pkt), qkey, qpn);
143 set_qkey_viol_cntr(port);
144 goto err1;
145 }
146 }
147
148 return 0;
149
150err1:
151 return -EINVAL;
152}
153
154static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
155 struct rxe_qp *qp)
156{
157 struct sk_buff *skb = PKT_TO_SKB(pkt);
158
159 if (qp_type(qp) != IB_QPT_RC && qp_type(qp) != IB_QPT_UC)
160 goto done;
161
162 if (unlikely(pkt->port_num != qp->attr.port_num)) {
163 pr_warn_ratelimited("port %d != qp port %d\n",
164 pkt->port_num, qp->attr.port_num);
165 goto err1;
166 }
167
168 if (skb->protocol == htons(ETH_P_IP)) {
169 struct in_addr *saddr =
170 &qp->pri_av.sgid_addr._sockaddr_in.sin_addr;
171 struct in_addr *daddr =
172 &qp->pri_av.dgid_addr._sockaddr_in.sin_addr;
173
174 if (ip_hdr(skb)->daddr != saddr->s_addr) {
175 pr_warn_ratelimited("dst addr %pI4 != qp source addr %pI4\n",
176 &ip_hdr(skb)->daddr,
177 &saddr->s_addr);
178 goto err1;
179 }
180
181 if (ip_hdr(skb)->saddr != daddr->s_addr) {
182 pr_warn_ratelimited("source addr %pI4 != qp dst addr %pI4\n",
183 &ip_hdr(skb)->saddr,
184 &daddr->s_addr);
185 goto err1;
186 }
187
188 } else if (skb->protocol == htons(ETH_P_IPV6)) {
189 struct in6_addr *saddr =
190 &qp->pri_av.sgid_addr._sockaddr_in6.sin6_addr;
191 struct in6_addr *daddr =
192 &qp->pri_av.dgid_addr._sockaddr_in6.sin6_addr;
193
194 if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr))) {
195 pr_warn_ratelimited("dst addr %pI6 != qp source addr %pI6\n",
196 &ipv6_hdr(skb)->daddr, saddr);
197 goto err1;
198 }
199
200 if (memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr))) {
201 pr_warn_ratelimited("source addr %pI6 != qp dst addr %pI6\n",
202 &ipv6_hdr(skb)->saddr, daddr);
203 goto err1;
204 }
205 }
206
207done:
208 return 0;
209
210err1:
211 return -EINVAL;
212}
213
214static int hdr_check(struct rxe_pkt_info *pkt)
215{
216 struct rxe_dev *rxe = pkt->rxe;
217 struct rxe_port *port = &rxe->port;
218 struct rxe_qp *qp = NULL;
219 u32 qpn = bth_qpn(pkt);
220 int index;
221 int err;
222
223 if (unlikely(bth_tver(pkt) != BTH_TVER)) {
224 pr_warn_ratelimited("bad tver\n");
225 goto err1;
226 }
227
228 if (qpn != IB_MULTICAST_QPN) {
229 index = (qpn == 0) ? port->qp_smi_index :
230 ((qpn == 1) ? port->qp_gsi_index : qpn);
231 qp = rxe_pool_get_index(&rxe->qp_pool, index);
232 if (unlikely(!qp)) {
233 pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn);
234 goto err1;
235 }
236
237 err = check_type_state(rxe, pkt, qp);
238 if (unlikely(err))
239 goto err2;
240
241 err = check_addr(rxe, pkt, qp);
242 if (unlikely(err))
243 goto err2;
244
245 err = check_keys(rxe, pkt, qpn, qp);
246 if (unlikely(err))
247 goto err2;
248 } else {
249 if (unlikely((pkt->mask & RXE_GRH_MASK) == 0)) {
250 pr_warn_ratelimited("no grh for mcast qpn\n");
251 goto err1;
252 }
253 }
254
255 pkt->qp = qp;
256 return 0;
257
258err2:
259 if (qp)
260 rxe_drop_ref(qp);
261err1:
262 return -EINVAL;
263}
264
265static inline void rxe_rcv_pkt(struct rxe_dev *rxe,
266 struct rxe_pkt_info *pkt,
267 struct sk_buff *skb)
268{
269 if (pkt->mask & RXE_REQ_MASK)
270 rxe_resp_queue_pkt(rxe, pkt->qp, skb);
271 else
272 rxe_comp_queue_pkt(rxe, pkt->qp, skb);
273}
274
275static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
276{
277 struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
278 struct rxe_mc_grp *mcg;
279 struct sk_buff *skb_copy;
280 struct rxe_mc_elem *mce;
281 struct rxe_qp *qp;
282 union ib_gid dgid;
283 int err;
284
285 if (skb->protocol == htons(ETH_P_IP))
286 ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
287 (struct in6_addr *)&dgid);
288 else if (skb->protocol == htons(ETH_P_IPV6))
289 memcpy(&dgid, &ipv6_hdr(skb)->daddr, sizeof(dgid));
290
291 /* lookup mcast group corresponding to mgid, takes a ref */
292 mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid);
293 if (!mcg)
294 goto err1; /* mcast group not registered */
295
296 spin_lock_bh(&mcg->mcg_lock);
297
298 list_for_each_entry(mce, &mcg->qp_list, qp_list) {
299 qp = mce->qp;
300 pkt = SKB_TO_PKT(skb);
301
302 /* validate qp for incoming packet */
303 err = check_type_state(rxe, pkt, qp);
304 if (err)
305 continue;
306
307 err = check_keys(rxe, pkt, bth_qpn(pkt), qp);
308 if (err)
309 continue;
310
311 /* if *not* the last qp in the list
312 * make a copy of the skb to post to the next qp
313 */
314 skb_copy = (mce->qp_list.next != &mcg->qp_list) ?
315 skb_clone(skb, GFP_KERNEL) : NULL;
316
317 pkt->qp = qp;
318 rxe_add_ref(qp);
319 rxe_rcv_pkt(rxe, pkt, skb);
320
321 skb = skb_copy;
322 if (!skb)
323 break;
324 }
325
326 spin_unlock_bh(&mcg->mcg_lock);
327
328 rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */
329
330err1:
331 if (skb)
332 kfree_skb(skb);
333}
334
335static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
336{
337 union ib_gid dgid;
338 union ib_gid *pdgid;
339 u16 index;
340
341 if (skb->protocol == htons(ETH_P_IP)) {
342 ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
343 (struct in6_addr *)&dgid);
344 pdgid = &dgid;
345 } else {
346 pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr;
347 }
348
349 return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid,
350 IB_GID_TYPE_ROCE_UDP_ENCAP,
351 1, rxe->ndev, &index);
352}
353
354/* rxe_rcv is called from the interface driver */
355int rxe_rcv(struct sk_buff *skb)
356{
357 int err;
358 struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
359 struct rxe_dev *rxe = pkt->rxe;
360 __be32 *icrcp;
361 u32 calc_icrc, pack_icrc;
362
363 pkt->offset = 0;
364
365 if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES))
366 goto drop;
367
368 if (unlikely(rxe_match_dgid(rxe, skb) < 0)) {
369 pr_warn_ratelimited("failed matching dgid\n");
370 goto drop;
371 }
372
373 pkt->opcode = bth_opcode(pkt);
374 pkt->psn = bth_psn(pkt);
375 pkt->qp = NULL;
376 pkt->mask |= rxe_opcode[pkt->opcode].mask;
377
378 if (unlikely(skb->len < header_size(pkt)))
379 goto drop;
380
381 err = hdr_check(pkt);
382 if (unlikely(err))
383 goto drop;
384
385 /* Verify ICRC */
386 icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE);
387 pack_icrc = be32_to_cpu(*icrcp);
388
389 calc_icrc = rxe_icrc_hdr(pkt, skb);
390 calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt), payload_size(pkt));
391 calc_icrc = cpu_to_be32(~calc_icrc);
392 if (unlikely(calc_icrc != pack_icrc)) {
393 char saddr[sizeof(struct in6_addr)];
394
395 if (skb->protocol == htons(ETH_P_IPV6))
396 sprintf(saddr, "%pI6", &ipv6_hdr(skb)->saddr);
397 else if (skb->protocol == htons(ETH_P_IP))
398 sprintf(saddr, "%pI4", &ip_hdr(skb)->saddr);
399 else
400 sprintf(saddr, "unknown");
401
402 pr_warn_ratelimited("bad ICRC from %s\n", saddr);
403 goto drop;
404 }
405
406 if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN))
407 rxe_rcv_mcast_pkt(rxe, skb);
408 else
409 rxe_rcv_pkt(rxe, pkt, skb);
410
411 return 0;
412
413drop:
414 if (pkt->qp)
415 rxe_drop_ref(pkt->qp);
416
417 kfree_skb(skb);
418 return 0;
419}
420EXPORT_SYMBOL(rxe_rcv);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
new file mode 100644
index 000000000000..33b2d9d77021
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -0,0 +1,726 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/skbuff.h>
35
36#include "rxe.h"
37#include "rxe_loc.h"
38#include "rxe_queue.h"
39
40static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
41 unsigned opcode);
42
43static inline void retry_first_write_send(struct rxe_qp *qp,
44 struct rxe_send_wqe *wqe,
45 unsigned mask, int npsn)
46{
47 int i;
48
49 for (i = 0; i < npsn; i++) {
50 int to_send = (wqe->dma.resid > qp->mtu) ?
51 qp->mtu : wqe->dma.resid;
52
53 qp->req.opcode = next_opcode(qp, wqe,
54 wqe->wr.opcode);
55
56 if (wqe->wr.send_flags & IB_SEND_INLINE) {
57 wqe->dma.resid -= to_send;
58 wqe->dma.sge_offset += to_send;
59 } else {
60 advance_dma_data(&wqe->dma, to_send);
61 }
62 if (mask & WR_WRITE_MASK)
63 wqe->iova += qp->mtu;
64 }
65}
66
67static void req_retry(struct rxe_qp *qp)
68{
69 struct rxe_send_wqe *wqe;
70 unsigned int wqe_index;
71 unsigned int mask;
72 int npsn;
73 int first = 1;
74
75 wqe = queue_head(qp->sq.queue);
76 npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK;
77
78 qp->req.wqe_index = consumer_index(qp->sq.queue);
79 qp->req.psn = qp->comp.psn;
80 qp->req.opcode = -1;
81
82 for (wqe_index = consumer_index(qp->sq.queue);
83 wqe_index != producer_index(qp->sq.queue);
84 wqe_index = next_index(qp->sq.queue, wqe_index)) {
85 wqe = addr_from_index(qp->sq.queue, wqe_index);
86 mask = wr_opcode_mask(wqe->wr.opcode, qp);
87
88 if (wqe->state == wqe_state_posted)
89 break;
90
91 if (wqe->state == wqe_state_done)
92 continue;
93
94 wqe->iova = (mask & WR_ATOMIC_MASK) ?
95 wqe->wr.wr.atomic.remote_addr :
96 (mask & WR_READ_OR_WRITE_MASK) ?
97 wqe->wr.wr.rdma.remote_addr :
98 0;
99
100 if (!first || (mask & WR_READ_MASK) == 0) {
101 wqe->dma.resid = wqe->dma.length;
102 wqe->dma.cur_sge = 0;
103 wqe->dma.sge_offset = 0;
104 }
105
106 if (first) {
107 first = 0;
108
109 if (mask & WR_WRITE_OR_SEND_MASK)
110 retry_first_write_send(qp, wqe, mask, npsn);
111
112 if (mask & WR_READ_MASK)
113 wqe->iova += npsn * qp->mtu;
114 }
115
116 wqe->state = wqe_state_posted;
117 }
118}
119
120void rnr_nak_timer(unsigned long data)
121{
122 struct rxe_qp *qp = (struct rxe_qp *)data;
123
124 pr_debug("rnr nak timer fired\n");
125 rxe_run_task(&qp->req.task, 1);
126}
127
128static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
129{
130 struct rxe_send_wqe *wqe = queue_head(qp->sq.queue);
131 unsigned long flags;
132
133 if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
134 /* check to see if we are drained;
135 * state_lock used by requester and completer
136 */
137 spin_lock_irqsave(&qp->state_lock, flags);
138 do {
139 if (qp->req.state != QP_STATE_DRAIN) {
140 /* comp just finished */
141 spin_unlock_irqrestore(&qp->state_lock,
142 flags);
143 break;
144 }
145
146 if (wqe && ((qp->req.wqe_index !=
147 consumer_index(qp->sq.queue)) ||
148 (wqe->state != wqe_state_posted))) {
149 /* comp not done yet */
150 spin_unlock_irqrestore(&qp->state_lock,
151 flags);
152 break;
153 }
154
155 qp->req.state = QP_STATE_DRAINED;
156 spin_unlock_irqrestore(&qp->state_lock, flags);
157
158 if (qp->ibqp.event_handler) {
159 struct ib_event ev;
160
161 ev.device = qp->ibqp.device;
162 ev.element.qp = &qp->ibqp;
163 ev.event = IB_EVENT_SQ_DRAINED;
164 qp->ibqp.event_handler(&ev,
165 qp->ibqp.qp_context);
166 }
167 } while (0);
168 }
169
170 if (qp->req.wqe_index == producer_index(qp->sq.queue))
171 return NULL;
172
173 wqe = addr_from_index(qp->sq.queue, qp->req.wqe_index);
174
175 if (unlikely((qp->req.state == QP_STATE_DRAIN ||
176 qp->req.state == QP_STATE_DRAINED) &&
177 (wqe->state != wqe_state_processing)))
178 return NULL;
179
180 if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) &&
181 (qp->req.wqe_index != consumer_index(qp->sq.queue)))) {
182 qp->req.wait_fence = 1;
183 return NULL;
184 }
185
186 wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
187 return wqe;
188}
189
190static int next_opcode_rc(struct rxe_qp *qp, unsigned opcode, int fits)
191{
192 switch (opcode) {
193 case IB_WR_RDMA_WRITE:
194 if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||
195 qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)
196 return fits ?
197 IB_OPCODE_RC_RDMA_WRITE_LAST :
198 IB_OPCODE_RC_RDMA_WRITE_MIDDLE;
199 else
200 return fits ?
201 IB_OPCODE_RC_RDMA_WRITE_ONLY :
202 IB_OPCODE_RC_RDMA_WRITE_FIRST;
203
204 case IB_WR_RDMA_WRITE_WITH_IMM:
205 if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||
206 qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)
207 return fits ?
208 IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE :
209 IB_OPCODE_RC_RDMA_WRITE_MIDDLE;
210 else
211 return fits ?
212 IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :
213 IB_OPCODE_RC_RDMA_WRITE_FIRST;
214
215 case IB_WR_SEND:
216 if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
217 qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
218 return fits ?
219 IB_OPCODE_RC_SEND_LAST :
220 IB_OPCODE_RC_SEND_MIDDLE;
221 else
222 return fits ?
223 IB_OPCODE_RC_SEND_ONLY :
224 IB_OPCODE_RC_SEND_FIRST;
225
226 case IB_WR_SEND_WITH_IMM:
227 if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
228 qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
229 return fits ?
230 IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE :
231 IB_OPCODE_RC_SEND_MIDDLE;
232 else
233 return fits ?
234 IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE :
235 IB_OPCODE_RC_SEND_FIRST;
236
237 case IB_WR_RDMA_READ:
238 return IB_OPCODE_RC_RDMA_READ_REQUEST;
239
240 case IB_WR_ATOMIC_CMP_AND_SWP:
241 return IB_OPCODE_RC_COMPARE_SWAP;
242
243 case IB_WR_ATOMIC_FETCH_AND_ADD:
244 return IB_OPCODE_RC_FETCH_ADD;
245
246 case IB_WR_SEND_WITH_INV:
247 if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
248 qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
249 return fits ? IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE :
250 IB_OPCODE_RC_SEND_MIDDLE;
251 else
252 return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE :
253 IB_OPCODE_RC_SEND_FIRST;
254 case IB_WR_REG_MR:
255 case IB_WR_LOCAL_INV:
256 return opcode;
257 }
258
259 return -EINVAL;
260}
261
262static int next_opcode_uc(struct rxe_qp *qp, unsigned opcode, int fits)
263{
264 switch (opcode) {
265 case IB_WR_RDMA_WRITE:
266 if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||
267 qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)
268 return fits ?
269 IB_OPCODE_UC_RDMA_WRITE_LAST :
270 IB_OPCODE_UC_RDMA_WRITE_MIDDLE;
271 else
272 return fits ?
273 IB_OPCODE_UC_RDMA_WRITE_ONLY :
274 IB_OPCODE_UC_RDMA_WRITE_FIRST;
275
276 case IB_WR_RDMA_WRITE_WITH_IMM:
277 if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||
278 qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)
279 return fits ?
280 IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE :
281 IB_OPCODE_UC_RDMA_WRITE_MIDDLE;
282 else
283 return fits ?
284 IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :
285 IB_OPCODE_UC_RDMA_WRITE_FIRST;
286
287 case IB_WR_SEND:
288 if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||
289 qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)
290 return fits ?
291 IB_OPCODE_UC_SEND_LAST :
292 IB_OPCODE_UC_SEND_MIDDLE;
293 else
294 return fits ?
295 IB_OPCODE_UC_SEND_ONLY :
296 IB_OPCODE_UC_SEND_FIRST;
297
298 case IB_WR_SEND_WITH_IMM:
299 if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||
300 qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)
301 return fits ?
302 IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE :
303 IB_OPCODE_UC_SEND_MIDDLE;
304 else
305 return fits ?
306 IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE :
307 IB_OPCODE_UC_SEND_FIRST;
308 }
309
310 return -EINVAL;
311}
312
313static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
314 unsigned opcode)
315{
316 int fits = (wqe->dma.resid <= qp->mtu);
317
318 switch (qp_type(qp)) {
319 case IB_QPT_RC:
320 return next_opcode_rc(qp, opcode, fits);
321
322 case IB_QPT_UC:
323 return next_opcode_uc(qp, opcode, fits);
324
325 case IB_QPT_SMI:
326 case IB_QPT_UD:
327 case IB_QPT_GSI:
328 switch (opcode) {
329 case IB_WR_SEND:
330 return IB_OPCODE_UD_SEND_ONLY;
331
332 case IB_WR_SEND_WITH_IMM:
333 return IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
334 }
335 break;
336
337 default:
338 break;
339 }
340
341 return -EINVAL;
342}
343
344static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
345{
346 int depth;
347
348 if (wqe->has_rd_atomic)
349 return 0;
350
351 qp->req.need_rd_atomic = 1;
352 depth = atomic_dec_return(&qp->req.rd_atomic);
353
354 if (depth >= 0) {
355 qp->req.need_rd_atomic = 0;
356 wqe->has_rd_atomic = 1;
357 return 0;
358 }
359
360 atomic_inc(&qp->req.rd_atomic);
361 return -EAGAIN;
362}
363
364static inline int get_mtu(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
365{
366 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
367 struct rxe_port *port;
368 struct rxe_av *av;
369
370 if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC))
371 return qp->mtu;
372
373 av = &wqe->av;
374 port = &rxe->port;
375
376 return port->mtu_cap;
377}
378
379static struct sk_buff *init_req_packet(struct rxe_qp *qp,
380 struct rxe_send_wqe *wqe,
381 int opcode, int payload,
382 struct rxe_pkt_info *pkt)
383{
384 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
385 struct rxe_port *port = &rxe->port;
386 struct sk_buff *skb;
387 struct rxe_send_wr *ibwr = &wqe->wr;
388 struct rxe_av *av;
389 int pad = (-payload) & 0x3;
390 int paylen;
391 int solicited;
392 u16 pkey;
393 u32 qp_num;
394 int ack_req;
395
396 /* length from start of bth to end of icrc */
397 paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
398
399 /* pkt->hdr, rxe, port_num and mask are initialized in ifc
400 * layer
401 */
402 pkt->opcode = opcode;
403 pkt->qp = qp;
404 pkt->psn = qp->req.psn;
405 pkt->mask = rxe_opcode[opcode].mask;
406 pkt->paylen = paylen;
407 pkt->offset = 0;
408 pkt->wqe = wqe;
409
410 /* init skb */
411 av = rxe_get_av(pkt);
412 skb = rxe->ifc_ops->init_packet(rxe, av, paylen, pkt);
413 if (unlikely(!skb))
414 return NULL;
415
416 /* init bth */
417 solicited = (ibwr->send_flags & IB_SEND_SOLICITED) &&
418 (pkt->mask & RXE_END_MASK) &&
419 ((pkt->mask & (RXE_SEND_MASK)) ||
420 (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) ==
421 (RXE_WRITE_MASK | RXE_IMMDT_MASK));
422
423 pkey = (qp_type(qp) == IB_QPT_GSI) ?
424 port->pkey_tbl[ibwr->wr.ud.pkey_index] :
425 port->pkey_tbl[qp->attr.pkey_index];
426
427 qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn :
428 qp->attr.dest_qp_num;
429
430 ack_req = ((pkt->mask & RXE_END_MASK) ||
431 (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK));
432 if (ack_req)
433 qp->req.noack_pkts = 0;
434
435 bth_init(pkt, pkt->opcode, solicited, 0, pad, pkey, qp_num,
436 ack_req, pkt->psn);
437
438 /* init optional headers */
439 if (pkt->mask & RXE_RETH_MASK) {
440 reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
441 reth_set_va(pkt, wqe->iova);
442 reth_set_len(pkt, wqe->dma.length);
443 }
444
445 if (pkt->mask & RXE_IMMDT_MASK)
446 immdt_set_imm(pkt, ibwr->ex.imm_data);
447
448 if (pkt->mask & RXE_IETH_MASK)
449 ieth_set_rkey(pkt, ibwr->ex.invalidate_rkey);
450
451 if (pkt->mask & RXE_ATMETH_MASK) {
452 atmeth_set_va(pkt, wqe->iova);
453 if (opcode == IB_OPCODE_RC_COMPARE_SWAP ||
454 opcode == IB_OPCODE_RD_COMPARE_SWAP) {
455 atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap);
456 atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add);
457 } else {
458 atmeth_set_swap_add(pkt, ibwr->wr.atomic.compare_add);
459 }
460 atmeth_set_rkey(pkt, ibwr->wr.atomic.rkey);
461 }
462
463 if (pkt->mask & RXE_DETH_MASK) {
464 if (qp->ibqp.qp_num == 1)
465 deth_set_qkey(pkt, GSI_QKEY);
466 else
467 deth_set_qkey(pkt, ibwr->wr.ud.remote_qkey);
468 deth_set_sqp(pkt, qp->ibqp.qp_num);
469 }
470
471 return skb;
472}
473
474static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
475 struct rxe_pkt_info *pkt, struct sk_buff *skb,
476 int paylen)
477{
478 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
479 u32 crc = 0;
480 u32 *p;
481 int err;
482
483 err = rxe->ifc_ops->prepare(rxe, pkt, skb, &crc);
484 if (err)
485 return err;
486
487 if (pkt->mask & RXE_WRITE_OR_SEND) {
488 if (wqe->wr.send_flags & IB_SEND_INLINE) {
489 u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
490
491 crc = crc32_le(crc, tmp, paylen);
492
493 memcpy(payload_addr(pkt), tmp, paylen);
494
495 wqe->dma.resid -= paylen;
496 wqe->dma.sge_offset += paylen;
497 } else {
498 err = copy_data(rxe, qp->pd, 0, &wqe->dma,
499 payload_addr(pkt), paylen,
500 from_mem_obj,
501 &crc);
502 if (err)
503 return err;
504 }
505 }
506 p = payload_addr(pkt) + paylen + bth_pad(pkt);
507
508 *p = ~crc;
509
510 return 0;
511}
512
513static void update_wqe_state(struct rxe_qp *qp,
514 struct rxe_send_wqe *wqe,
515 struct rxe_pkt_info *pkt,
516 enum wqe_state *prev_state)
517{
518 enum wqe_state prev_state_ = wqe->state;
519
520 if (pkt->mask & RXE_END_MASK) {
521 if (qp_type(qp) == IB_QPT_RC)
522 wqe->state = wqe_state_pending;
523 } else {
524 wqe->state = wqe_state_processing;
525 }
526
527 *prev_state = prev_state_;
528}
529
530static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
531 struct rxe_pkt_info *pkt, int payload)
532{
533 /* number of packets left to send including current one */
534 int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;
535
536 /* handle zero length packet case */
537 if (num_pkt == 0)
538 num_pkt = 1;
539
540 if (pkt->mask & RXE_START_MASK) {
541 wqe->first_psn = qp->req.psn;
542 wqe->last_psn = (qp->req.psn + num_pkt - 1) & BTH_PSN_MASK;
543 }
544
545 if (pkt->mask & RXE_READ_MASK)
546 qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK;
547 else
548 qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
549
550 qp->req.opcode = pkt->opcode;
551
552
553 if (pkt->mask & RXE_END_MASK)
554 qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
555
556 qp->need_req_skb = 0;
557
558 if (qp->qp_timeout_jiffies && !timer_pending(&qp->retrans_timer))
559 mod_timer(&qp->retrans_timer,
560 jiffies + qp->qp_timeout_jiffies);
561}
562
563int rxe_requester(void *arg)
564{
565 struct rxe_qp *qp = (struct rxe_qp *)arg;
566 struct rxe_pkt_info pkt;
567 struct sk_buff *skb;
568 struct rxe_send_wqe *wqe;
569 unsigned mask;
570 int payload;
571 int mtu;
572 int opcode;
573 int ret;
574 enum wqe_state prev_state;
575
576next_wqe:
577 if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
578 goto exit;
579
580 if (unlikely(qp->req.state == QP_STATE_RESET)) {
581 qp->req.wqe_index = consumer_index(qp->sq.queue);
582 qp->req.opcode = -1;
583 qp->req.need_rd_atomic = 0;
584 qp->req.wait_psn = 0;
585 qp->req.need_retry = 0;
586 goto exit;
587 }
588
589 if (unlikely(qp->req.need_retry)) {
590 req_retry(qp);
591 qp->req.need_retry = 0;
592 }
593
594 wqe = req_next_wqe(qp);
595 if (unlikely(!wqe))
596 goto exit;
597
598 if (wqe->mask & WR_REG_MASK) {
599 if (wqe->wr.opcode == IB_WR_LOCAL_INV) {
600 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
601 struct rxe_mem *rmr;
602
603 rmr = rxe_pool_get_index(&rxe->mr_pool,
604 wqe->wr.ex.invalidate_rkey >> 8);
605 if (!rmr) {
606 pr_err("No mr for key %#x\n", wqe->wr.ex.invalidate_rkey);
607 wqe->state = wqe_state_error;
608 wqe->status = IB_WC_MW_BIND_ERR;
609 goto exit;
610 }
611 rmr->state = RXE_MEM_STATE_FREE;
612 wqe->state = wqe_state_done;
613 wqe->status = IB_WC_SUCCESS;
614 } else if (wqe->wr.opcode == IB_WR_REG_MR) {
615 struct rxe_mem *rmr = to_rmr(wqe->wr.wr.reg.mr);
616
617 rmr->state = RXE_MEM_STATE_VALID;
618 rmr->access = wqe->wr.wr.reg.access;
619 rmr->lkey = wqe->wr.wr.reg.key;
620 rmr->rkey = wqe->wr.wr.reg.key;
621 wqe->state = wqe_state_done;
622 wqe->status = IB_WC_SUCCESS;
623 } else {
624 goto exit;
625 }
626 qp->req.wqe_index = next_index(qp->sq.queue,
627 qp->req.wqe_index);
628 goto next_wqe;
629 }
630
631 if (unlikely(qp_type(qp) == IB_QPT_RC &&
632 qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) {
633 qp->req.wait_psn = 1;
634 goto exit;
635 }
636
637 /* Limit the number of inflight SKBs per QP */
638 if (unlikely(atomic_read(&qp->skb_out) >
639 RXE_INFLIGHT_SKBS_PER_QP_HIGH)) {
640 qp->need_req_skb = 1;
641 goto exit;
642 }
643
644 opcode = next_opcode(qp, wqe, wqe->wr.opcode);
645 if (unlikely(opcode < 0)) {
646 wqe->status = IB_WC_LOC_QP_OP_ERR;
647 goto exit;
648 }
649
650 mask = rxe_opcode[opcode].mask;
651 if (unlikely(mask & RXE_READ_OR_ATOMIC)) {
652 if (check_init_depth(qp, wqe))
653 goto exit;
654 }
655
656 mtu = get_mtu(qp, wqe);
657 payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0;
658 if (payload > mtu) {
659 if (qp_type(qp) == IB_QPT_UD) {
660 /* C10-93.1.1: If the total sum of all the buffer lengths specified for a
661 * UD message exceeds the MTU of the port as returned by QueryHCA, the CI
662 * shall not emit any packets for this message. Further, the CI shall not
663 * generate an error due to this condition.
664 */
665
666 /* fake a successful UD send */
667 wqe->first_psn = qp->req.psn;
668 wqe->last_psn = qp->req.psn;
669 qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
670 qp->req.opcode = IB_OPCODE_UD_SEND_ONLY;
671 qp->req.wqe_index = next_index(qp->sq.queue,
672 qp->req.wqe_index);
673 wqe->state = wqe_state_done;
674 wqe->status = IB_WC_SUCCESS;
675 goto complete;
676 }
677 payload = mtu;
678 }
679
680 skb = init_req_packet(qp, wqe, opcode, payload, &pkt);
681 if (unlikely(!skb)) {
682 pr_err("Failed allocating skb\n");
683 goto err;
684 }
685
686 if (fill_packet(qp, wqe, &pkt, skb, payload)) {
687 pr_debug("Error during fill packet\n");
688 goto err;
689 }
690
691 update_wqe_state(qp, wqe, &pkt, &prev_state);
692 ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
693 if (ret) {
694 qp->need_req_skb = 1;
695 kfree_skb(skb);
696
697 wqe->state = prev_state;
698
699 if (ret == -EAGAIN) {
700 rxe_run_task(&qp->req.task, 1);
701 goto exit;
702 }
703
704 goto err;
705 }
706
707 update_state(qp, wqe, &pkt, payload);
708
709 goto next_wqe;
710
711err:
712 kfree_skb(skb);
713 wqe->status = IB_WC_LOC_PROT_ERR;
714 wqe->state = wqe_state_error;
715
716complete:
717 if (qp_type(qp) != IB_QPT_RC) {
718 while (rxe_completer(qp) == 0)
719 ;
720 }
721
722 return 0;
723
724exit:
725 return -EAGAIN;
726}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
new file mode 100644
index 000000000000..ebb03b46e2ad
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -0,0 +1,1380 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/skbuff.h>
35
36#include "rxe.h"
37#include "rxe_loc.h"
38#include "rxe_queue.h"
39
40enum resp_states {
41 RESPST_NONE,
42 RESPST_GET_REQ,
43 RESPST_CHK_PSN,
44 RESPST_CHK_OP_SEQ,
45 RESPST_CHK_OP_VALID,
46 RESPST_CHK_RESOURCE,
47 RESPST_CHK_LENGTH,
48 RESPST_CHK_RKEY,
49 RESPST_EXECUTE,
50 RESPST_READ_REPLY,
51 RESPST_COMPLETE,
52 RESPST_ACKNOWLEDGE,
53 RESPST_CLEANUP,
54 RESPST_DUPLICATE_REQUEST,
55 RESPST_ERR_MALFORMED_WQE,
56 RESPST_ERR_UNSUPPORTED_OPCODE,
57 RESPST_ERR_MISALIGNED_ATOMIC,
58 RESPST_ERR_PSN_OUT_OF_SEQ,
59 RESPST_ERR_MISSING_OPCODE_FIRST,
60 RESPST_ERR_MISSING_OPCODE_LAST_C,
61 RESPST_ERR_MISSING_OPCODE_LAST_D1E,
62 RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
63 RESPST_ERR_RNR,
64 RESPST_ERR_RKEY_VIOLATION,
65 RESPST_ERR_LENGTH,
66 RESPST_ERR_CQ_OVERFLOW,
67 RESPST_ERROR,
68 RESPST_RESET,
69 RESPST_DONE,
70 RESPST_EXIT,
71};
72
73static char *resp_state_name[] = {
74 [RESPST_NONE] = "NONE",
75 [RESPST_GET_REQ] = "GET_REQ",
76 [RESPST_CHK_PSN] = "CHK_PSN",
77 [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ",
78 [RESPST_CHK_OP_VALID] = "CHK_OP_VALID",
79 [RESPST_CHK_RESOURCE] = "CHK_RESOURCE",
80 [RESPST_CHK_LENGTH] = "CHK_LENGTH",
81 [RESPST_CHK_RKEY] = "CHK_RKEY",
82 [RESPST_EXECUTE] = "EXECUTE",
83 [RESPST_READ_REPLY] = "READ_REPLY",
84 [RESPST_COMPLETE] = "COMPLETE",
85 [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE",
86 [RESPST_CLEANUP] = "CLEANUP",
87 [RESPST_DUPLICATE_REQUEST] = "DUPLICATE_REQUEST",
88 [RESPST_ERR_MALFORMED_WQE] = "ERR_MALFORMED_WQE",
89 [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE",
90 [RESPST_ERR_MISALIGNED_ATOMIC] = "ERR_MISALIGNED_ATOMIC",
91 [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ",
92 [RESPST_ERR_MISSING_OPCODE_FIRST] = "ERR_MISSING_OPCODE_FIRST",
93 [RESPST_ERR_MISSING_OPCODE_LAST_C] = "ERR_MISSING_OPCODE_LAST_C",
94 [RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E",
95 [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ",
96 [RESPST_ERR_RNR] = "ERR_RNR",
97 [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION",
98 [RESPST_ERR_LENGTH] = "ERR_LENGTH",
99 [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW",
100 [RESPST_ERROR] = "ERROR",
101 [RESPST_RESET] = "RESET",
102 [RESPST_DONE] = "DONE",
103 [RESPST_EXIT] = "EXIT",
104};
105
106/* rxe_recv calls here to add a request packet to the input queue */
107void rxe_resp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp,
108 struct sk_buff *skb)
109{
110 int must_sched;
111 struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
112
113 skb_queue_tail(&qp->req_pkts, skb);
114
115 must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||
116 (skb_queue_len(&qp->req_pkts) > 1);
117
118 rxe_run_task(&qp->resp.task, must_sched);
119}
120
121static inline enum resp_states get_req(struct rxe_qp *qp,
122 struct rxe_pkt_info **pkt_p)
123{
124 struct sk_buff *skb;
125
126 if (qp->resp.state == QP_STATE_ERROR) {
127 skb = skb_dequeue(&qp->req_pkts);
128 if (skb) {
129 /* drain request packet queue */
130 rxe_drop_ref(qp);
131 kfree_skb(skb);
132 return RESPST_GET_REQ;
133 }
134
135 /* go drain recv wr queue */
136 return RESPST_CHK_RESOURCE;
137 }
138
139 skb = skb_peek(&qp->req_pkts);
140 if (!skb)
141 return RESPST_EXIT;
142
143 *pkt_p = SKB_TO_PKT(skb);
144
145 return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN;
146}
147
148static enum resp_states check_psn(struct rxe_qp *qp,
149 struct rxe_pkt_info *pkt)
150{
151 int diff = psn_compare(pkt->psn, qp->resp.psn);
152
153 switch (qp_type(qp)) {
154 case IB_QPT_RC:
155 if (diff > 0) {
156 if (qp->resp.sent_psn_nak)
157 return RESPST_CLEANUP;
158
159 qp->resp.sent_psn_nak = 1;
160 return RESPST_ERR_PSN_OUT_OF_SEQ;
161
162 } else if (diff < 0) {
163 return RESPST_DUPLICATE_REQUEST;
164 }
165
166 if (qp->resp.sent_psn_nak)
167 qp->resp.sent_psn_nak = 0;
168
169 break;
170
171 case IB_QPT_UC:
172 if (qp->resp.drop_msg || diff != 0) {
173 if (pkt->mask & RXE_START_MASK) {
174 qp->resp.drop_msg = 0;
175 return RESPST_CHK_OP_SEQ;
176 }
177
178 qp->resp.drop_msg = 1;
179 return RESPST_CLEANUP;
180 }
181 break;
182 default:
183 break;
184 }
185
186 return RESPST_CHK_OP_SEQ;
187}
188
189static enum resp_states check_op_seq(struct rxe_qp *qp,
190 struct rxe_pkt_info *pkt)
191{
192 switch (qp_type(qp)) {
193 case IB_QPT_RC:
194 switch (qp->resp.opcode) {
195 case IB_OPCODE_RC_SEND_FIRST:
196 case IB_OPCODE_RC_SEND_MIDDLE:
197 switch (pkt->opcode) {
198 case IB_OPCODE_RC_SEND_MIDDLE:
199 case IB_OPCODE_RC_SEND_LAST:
200 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
201 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
202 return RESPST_CHK_OP_VALID;
203 default:
204 return RESPST_ERR_MISSING_OPCODE_LAST_C;
205 }
206
207 case IB_OPCODE_RC_RDMA_WRITE_FIRST:
208 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
209 switch (pkt->opcode) {
210 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
211 case IB_OPCODE_RC_RDMA_WRITE_LAST:
212 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
213 return RESPST_CHK_OP_VALID;
214 default:
215 return RESPST_ERR_MISSING_OPCODE_LAST_C;
216 }
217
218 default:
219 switch (pkt->opcode) {
220 case IB_OPCODE_RC_SEND_MIDDLE:
221 case IB_OPCODE_RC_SEND_LAST:
222 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
223 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
224 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
225 case IB_OPCODE_RC_RDMA_WRITE_LAST:
226 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
227 return RESPST_ERR_MISSING_OPCODE_FIRST;
228 default:
229 return RESPST_CHK_OP_VALID;
230 }
231 }
232 break;
233
234 case IB_QPT_UC:
235 switch (qp->resp.opcode) {
236 case IB_OPCODE_UC_SEND_FIRST:
237 case IB_OPCODE_UC_SEND_MIDDLE:
238 switch (pkt->opcode) {
239 case IB_OPCODE_UC_SEND_MIDDLE:
240 case IB_OPCODE_UC_SEND_LAST:
241 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
242 return RESPST_CHK_OP_VALID;
243 default:
244 return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
245 }
246
247 case IB_OPCODE_UC_RDMA_WRITE_FIRST:
248 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
249 switch (pkt->opcode) {
250 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
251 case IB_OPCODE_UC_RDMA_WRITE_LAST:
252 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
253 return RESPST_CHK_OP_VALID;
254 default:
255 return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
256 }
257
258 default:
259 switch (pkt->opcode) {
260 case IB_OPCODE_UC_SEND_MIDDLE:
261 case IB_OPCODE_UC_SEND_LAST:
262 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
263 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
264 case IB_OPCODE_UC_RDMA_WRITE_LAST:
265 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
266 qp->resp.drop_msg = 1;
267 return RESPST_CLEANUP;
268 default:
269 return RESPST_CHK_OP_VALID;
270 }
271 }
272 break;
273
274 default:
275 return RESPST_CHK_OP_VALID;
276 }
277}
278
279static enum resp_states check_op_valid(struct rxe_qp *qp,
280 struct rxe_pkt_info *pkt)
281{
282 switch (qp_type(qp)) {
283 case IB_QPT_RC:
284 if (((pkt->mask & RXE_READ_MASK) &&
285 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
286 ((pkt->mask & RXE_WRITE_MASK) &&
287 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
288 ((pkt->mask & RXE_ATOMIC_MASK) &&
289 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) {
290 return RESPST_ERR_UNSUPPORTED_OPCODE;
291 }
292
293 break;
294
295 case IB_QPT_UC:
296 if ((pkt->mask & RXE_WRITE_MASK) &&
297 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) {
298 qp->resp.drop_msg = 1;
299 return RESPST_CLEANUP;
300 }
301
302 break;
303
304 case IB_QPT_UD:
305 case IB_QPT_SMI:
306 case IB_QPT_GSI:
307 break;
308
309 default:
310 WARN_ON(1);
311 break;
312 }
313
314 return RESPST_CHK_RESOURCE;
315}
316
317static enum resp_states get_srq_wqe(struct rxe_qp *qp)
318{
319 struct rxe_srq *srq = qp->srq;
320 struct rxe_queue *q = srq->rq.queue;
321 struct rxe_recv_wqe *wqe;
322 struct ib_event ev;
323
324 if (srq->error)
325 return RESPST_ERR_RNR;
326
327 spin_lock_bh(&srq->rq.consumer_lock);
328
329 wqe = queue_head(q);
330 if (!wqe) {
331 spin_unlock_bh(&srq->rq.consumer_lock);
332 return RESPST_ERR_RNR;
333 }
334
335 /* note kernel and user space recv wqes have same size */
336 memcpy(&qp->resp.srq_wqe, wqe, sizeof(qp->resp.srq_wqe));
337
338 qp->resp.wqe = &qp->resp.srq_wqe.wqe;
339 advance_consumer(q);
340
341 if (srq->limit && srq->ibsrq.event_handler &&
342 (queue_count(q) < srq->limit)) {
343 srq->limit = 0;
344 goto event;
345 }
346
347 spin_unlock_bh(&srq->rq.consumer_lock);
348 return RESPST_CHK_LENGTH;
349
350event:
351 spin_unlock_bh(&srq->rq.consumer_lock);
352 ev.device = qp->ibqp.device;
353 ev.element.srq = qp->ibqp.srq;
354 ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
355 srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context);
356 return RESPST_CHK_LENGTH;
357}
358
359static enum resp_states check_resource(struct rxe_qp *qp,
360 struct rxe_pkt_info *pkt)
361{
362 struct rxe_srq *srq = qp->srq;
363
364 if (qp->resp.state == QP_STATE_ERROR) {
365 if (qp->resp.wqe) {
366 qp->resp.status = IB_WC_WR_FLUSH_ERR;
367 return RESPST_COMPLETE;
368 } else if (!srq) {
369 qp->resp.wqe = queue_head(qp->rq.queue);
370 if (qp->resp.wqe) {
371 qp->resp.status = IB_WC_WR_FLUSH_ERR;
372 return RESPST_COMPLETE;
373 } else {
374 return RESPST_EXIT;
375 }
376 } else {
377 return RESPST_EXIT;
378 }
379 }
380
381 if (pkt->mask & RXE_READ_OR_ATOMIC) {
382 /* it is the requesters job to not send
383 * too many read/atomic ops, we just
384 * recycle the responder resource queue
385 */
386 if (likely(qp->attr.max_rd_atomic > 0))
387 return RESPST_CHK_LENGTH;
388 else
389 return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ;
390 }
391
392 if (pkt->mask & RXE_RWR_MASK) {
393 if (srq)
394 return get_srq_wqe(qp);
395
396 qp->resp.wqe = queue_head(qp->rq.queue);
397 return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR;
398 }
399
400 return RESPST_CHK_LENGTH;
401}
402
403static enum resp_states check_length(struct rxe_qp *qp,
404 struct rxe_pkt_info *pkt)
405{
406 switch (qp_type(qp)) {
407 case IB_QPT_RC:
408 return RESPST_CHK_RKEY;
409
410 case IB_QPT_UC:
411 return RESPST_CHK_RKEY;
412
413 default:
414 return RESPST_CHK_RKEY;
415 }
416}
417
418static enum resp_states check_rkey(struct rxe_qp *qp,
419 struct rxe_pkt_info *pkt)
420{
421 struct rxe_mem *mem;
422 u64 va;
423 u32 rkey;
424 u32 resid;
425 u32 pktlen;
426 int mtu = qp->mtu;
427 enum resp_states state;
428 int access;
429
430 if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) {
431 if (pkt->mask & RXE_RETH_MASK) {
432 qp->resp.va = reth_va(pkt);
433 qp->resp.rkey = reth_rkey(pkt);
434 qp->resp.resid = reth_len(pkt);
435 }
436 access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
437 : IB_ACCESS_REMOTE_WRITE;
438 } else if (pkt->mask & RXE_ATOMIC_MASK) {
439 qp->resp.va = atmeth_va(pkt);
440 qp->resp.rkey = atmeth_rkey(pkt);
441 qp->resp.resid = sizeof(u64);
442 access = IB_ACCESS_REMOTE_ATOMIC;
443 } else {
444 return RESPST_EXECUTE;
445 }
446
447 va = qp->resp.va;
448 rkey = qp->resp.rkey;
449 resid = qp->resp.resid;
450 pktlen = payload_size(pkt);
451
452 mem = lookup_mem(qp->pd, access, rkey, lookup_remote);
453 if (!mem) {
454 state = RESPST_ERR_RKEY_VIOLATION;
455 goto err1;
456 }
457
458 if (unlikely(mem->state == RXE_MEM_STATE_FREE)) {
459 state = RESPST_ERR_RKEY_VIOLATION;
460 goto err1;
461 }
462
463 if (mem_check_range(mem, va, resid)) {
464 state = RESPST_ERR_RKEY_VIOLATION;
465 goto err2;
466 }
467
468 if (pkt->mask & RXE_WRITE_MASK) {
469 if (resid > mtu) {
470 if (pktlen != mtu || bth_pad(pkt)) {
471 state = RESPST_ERR_LENGTH;
472 goto err2;
473 }
474
475 resid = mtu;
476 } else {
477 if (pktlen != resid) {
478 state = RESPST_ERR_LENGTH;
479 goto err2;
480 }
481 if ((bth_pad(pkt) != (0x3 & (-resid)))) {
482 /* This case may not be exactly that
483 * but nothing else fits.
484 */
485 state = RESPST_ERR_LENGTH;
486 goto err2;
487 }
488 }
489 }
490
491 WARN_ON(qp->resp.mr);
492
493 qp->resp.mr = mem;
494 return RESPST_EXECUTE;
495
496err2:
497 rxe_drop_ref(mem);
498err1:
499 return state;
500}
501
502static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
503 int data_len)
504{
505 int err;
506 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
507
508 err = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
509 data_addr, data_len, to_mem_obj, NULL);
510 if (unlikely(err))
511 return (err == -ENOSPC) ? RESPST_ERR_LENGTH
512 : RESPST_ERR_MALFORMED_WQE;
513
514 return RESPST_NONE;
515}
516
517static enum resp_states write_data_in(struct rxe_qp *qp,
518 struct rxe_pkt_info *pkt)
519{
520 enum resp_states rc = RESPST_NONE;
521 int err;
522 int data_len = payload_size(pkt);
523
524 err = rxe_mem_copy(qp->resp.mr, qp->resp.va, payload_addr(pkt),
525 data_len, to_mem_obj, NULL);
526 if (err) {
527 rc = RESPST_ERR_RKEY_VIOLATION;
528 goto out;
529 }
530
531 qp->resp.va += data_len;
532 qp->resp.resid -= data_len;
533
534out:
535 return rc;
536}
537
538/* Guarantee atomicity of atomic operations at the machine level. */
539static DEFINE_SPINLOCK(atomic_ops_lock);
540
541static enum resp_states process_atomic(struct rxe_qp *qp,
542 struct rxe_pkt_info *pkt)
543{
544 u64 iova = atmeth_va(pkt);
545 u64 *vaddr;
546 enum resp_states ret;
547 struct rxe_mem *mr = qp->resp.mr;
548
549 if (mr->state != RXE_MEM_STATE_VALID) {
550 ret = RESPST_ERR_RKEY_VIOLATION;
551 goto out;
552 }
553
554 vaddr = iova_to_vaddr(mr, iova, sizeof(u64));
555
556 /* check vaddr is 8 bytes aligned. */
557 if (!vaddr || (uintptr_t)vaddr & 7) {
558 ret = RESPST_ERR_MISALIGNED_ATOMIC;
559 goto out;
560 }
561
562 spin_lock_bh(&atomic_ops_lock);
563
564 qp->resp.atomic_orig = *vaddr;
565
566 if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP ||
567 pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) {
568 if (*vaddr == atmeth_comp(pkt))
569 *vaddr = atmeth_swap_add(pkt);
570 } else {
571 *vaddr += atmeth_swap_add(pkt);
572 }
573
574 spin_unlock_bh(&atomic_ops_lock);
575
576 ret = RESPST_NONE;
577out:
578 return ret;
579}
580
581static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
582 struct rxe_pkt_info *pkt,
583 struct rxe_pkt_info *ack,
584 int opcode,
585 int payload,
586 u32 psn,
587 u8 syndrome,
588 u32 *crcp)
589{
590 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
591 struct sk_buff *skb;
592 u32 crc = 0;
593 u32 *p;
594 int paylen;
595 int pad;
596 int err;
597
598 /*
599 * allocate packet
600 */
601 pad = (-payload) & 0x3;
602 paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
603
604 skb = rxe->ifc_ops->init_packet(rxe, &qp->pri_av, paylen, ack);
605 if (!skb)
606 return NULL;
607
608 ack->qp = qp;
609 ack->opcode = opcode;
610 ack->mask = rxe_opcode[opcode].mask;
611 ack->offset = pkt->offset;
612 ack->paylen = paylen;
613
614 /* fill in bth using the request packet headers */
615 memcpy(ack->hdr, pkt->hdr, pkt->offset + RXE_BTH_BYTES);
616
617 bth_set_opcode(ack, opcode);
618 bth_set_qpn(ack, qp->attr.dest_qp_num);
619 bth_set_pad(ack, pad);
620 bth_set_se(ack, 0);
621 bth_set_psn(ack, psn);
622 bth_set_ack(ack, 0);
623 ack->psn = psn;
624
625 if (ack->mask & RXE_AETH_MASK) {
626 aeth_set_syn(ack, syndrome);
627 aeth_set_msn(ack, qp->resp.msn);
628 }
629
630 if (ack->mask & RXE_ATMACK_MASK)
631 atmack_set_orig(ack, qp->resp.atomic_orig);
632
633 err = rxe->ifc_ops->prepare(rxe, ack, skb, &crc);
634 if (err) {
635 kfree_skb(skb);
636 return NULL;
637 }
638
639 if (crcp) {
640 /* CRC computation will be continued by the caller */
641 *crcp = crc;
642 } else {
643 p = payload_addr(ack) + payload + bth_pad(ack);
644 *p = ~crc;
645 }
646
647 return skb;
648}
649
650/* RDMA read response. If res is not NULL, then we have a current RDMA request
651 * being processed or replayed.
652 */
653static enum resp_states read_reply(struct rxe_qp *qp,
654 struct rxe_pkt_info *req_pkt)
655{
656 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
657 struct rxe_pkt_info ack_pkt;
658 struct sk_buff *skb;
659 int mtu = qp->mtu;
660 enum resp_states state;
661 int payload;
662 int opcode;
663 int err;
664 struct resp_res *res = qp->resp.res;
665 u32 icrc;
666 u32 *p;
667
668 if (!res) {
669 /* This is the first time we process that request. Get a
670 * resource
671 */
672 res = &qp->resp.resources[qp->resp.res_head];
673
674 free_rd_atomic_resource(qp, res);
675 rxe_advance_resp_resource(qp);
676
677 res->type = RXE_READ_MASK;
678
679 res->read.va = qp->resp.va;
680 res->read.va_org = qp->resp.va;
681
682 res->first_psn = req_pkt->psn;
683 res->last_psn = req_pkt->psn +
684 (reth_len(req_pkt) + mtu - 1) /
685 mtu - 1;
686 res->cur_psn = req_pkt->psn;
687
688 res->read.resid = qp->resp.resid;
689 res->read.length = qp->resp.resid;
690 res->read.rkey = qp->resp.rkey;
691
692 /* note res inherits the reference to mr from qp */
693 res->read.mr = qp->resp.mr;
694 qp->resp.mr = NULL;
695
696 qp->resp.res = res;
697 res->state = rdatm_res_state_new;
698 }
699
700 if (res->state == rdatm_res_state_new) {
701 if (res->read.resid <= mtu)
702 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;
703 else
704 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST;
705 } else {
706 if (res->read.resid > mtu)
707 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE;
708 else
709 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST;
710 }
711
712 res->state = rdatm_res_state_next;
713
714 payload = min_t(int, res->read.resid, mtu);
715
716 skb = prepare_ack_packet(qp, req_pkt, &ack_pkt, opcode, payload,
717 res->cur_psn, AETH_ACK_UNLIMITED, &icrc);
718 if (!skb)
719 return RESPST_ERR_RNR;
720
721 err = rxe_mem_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt),
722 payload, from_mem_obj, &icrc);
723 if (err)
724 pr_err("Failed copying memory\n");
725
726 p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt);
727 *p = ~icrc;
728
729 err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
730 if (err) {
731 pr_err("Failed sending RDMA reply.\n");
732 kfree_skb(skb);
733 return RESPST_ERR_RNR;
734 }
735
736 res->read.va += payload;
737 res->read.resid -= payload;
738 res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK;
739
740 if (res->read.resid > 0) {
741 state = RESPST_DONE;
742 } else {
743 qp->resp.res = NULL;
744 qp->resp.opcode = -1;
745 qp->resp.psn = res->cur_psn;
746 state = RESPST_CLEANUP;
747 }
748
749 return state;
750}
751
752/* Executes a new request. A retried request never reach that function (send
753 * and writes are discarded, and reads and atomics are retried elsewhere.
754 */
755static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
756{
757 enum resp_states err;
758
759 if (pkt->mask & RXE_SEND_MASK) {
760 if (qp_type(qp) == IB_QPT_UD ||
761 qp_type(qp) == IB_QPT_SMI ||
762 qp_type(qp) == IB_QPT_GSI) {
763 union rdma_network_hdr hdr;
764 struct sk_buff *skb = PKT_TO_SKB(pkt);
765
766 memset(&hdr, 0, sizeof(hdr));
767 if (skb->protocol == htons(ETH_P_IP))
768 memcpy(&hdr.roce4grh, ip_hdr(skb), sizeof(hdr.roce4grh));
769 else if (skb->protocol == htons(ETH_P_IPV6))
770 memcpy(&hdr.ibgrh, ipv6_hdr(skb), sizeof(hdr.ibgrh));
771
772 err = send_data_in(qp, &hdr, sizeof(hdr));
773 if (err)
774 return err;
775 }
776 err = send_data_in(qp, payload_addr(pkt), payload_size(pkt));
777 if (err)
778 return err;
779 } else if (pkt->mask & RXE_WRITE_MASK) {
780 err = write_data_in(qp, pkt);
781 if (err)
782 return err;
783 } else if (pkt->mask & RXE_READ_MASK) {
784 /* For RDMA Read we can increment the msn now. See C9-148. */
785 qp->resp.msn++;
786 return RESPST_READ_REPLY;
787 } else if (pkt->mask & RXE_ATOMIC_MASK) {
788 err = process_atomic(qp, pkt);
789 if (err)
790 return err;
791 } else
792 /* Unreachable */
793 WARN_ON(1);
794
795 /* We successfully processed this new request. */
796 qp->resp.msn++;
797
798 /* next expected psn, read handles this separately */
799 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
800
801 qp->resp.opcode = pkt->opcode;
802 qp->resp.status = IB_WC_SUCCESS;
803
804 if (pkt->mask & RXE_COMP_MASK)
805 return RESPST_COMPLETE;
806 else if (qp_type(qp) == IB_QPT_RC)
807 return RESPST_ACKNOWLEDGE;
808 else
809 return RESPST_CLEANUP;
810}
811
812static enum resp_states do_complete(struct rxe_qp *qp,
813 struct rxe_pkt_info *pkt)
814{
815 struct rxe_cqe cqe;
816 struct ib_wc *wc = &cqe.ibwc;
817 struct ib_uverbs_wc *uwc = &cqe.uibwc;
818 struct rxe_recv_wqe *wqe = qp->resp.wqe;
819
820 if (unlikely(!wqe))
821 return RESPST_CLEANUP;
822
823 memset(&cqe, 0, sizeof(cqe));
824
825 wc->wr_id = wqe->wr_id;
826 wc->status = qp->resp.status;
827 wc->qp = &qp->ibqp;
828
829 /* fields after status are not required for errors */
830 if (wc->status == IB_WC_SUCCESS) {
831 wc->opcode = (pkt->mask & RXE_IMMDT_MASK &&
832 pkt->mask & RXE_WRITE_MASK) ?
833 IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
834 wc->vendor_err = 0;
835 wc->byte_len = wqe->dma.length - wqe->dma.resid;
836
837 /* fields after byte_len are different between kernel and user
838 * space
839 */
840 if (qp->rcq->is_user) {
841 uwc->wc_flags = IB_WC_GRH;
842
843 if (pkt->mask & RXE_IMMDT_MASK) {
844 uwc->wc_flags |= IB_WC_WITH_IMM;
845 uwc->ex.imm_data =
846 (__u32 __force)immdt_imm(pkt);
847 }
848
849 if (pkt->mask & RXE_IETH_MASK) {
850 uwc->wc_flags |= IB_WC_WITH_INVALIDATE;
851 uwc->ex.invalidate_rkey = ieth_rkey(pkt);
852 }
853
854 uwc->qp_num = qp->ibqp.qp_num;
855
856 if (pkt->mask & RXE_DETH_MASK)
857 uwc->src_qp = deth_sqp(pkt);
858
859 uwc->port_num = qp->attr.port_num;
860 } else {
861 struct sk_buff *skb = PKT_TO_SKB(pkt);
862
863 wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE;
864 if (skb->protocol == htons(ETH_P_IP))
865 wc->network_hdr_type = RDMA_NETWORK_IPV4;
866 else
867 wc->network_hdr_type = RDMA_NETWORK_IPV6;
868
869 if (pkt->mask & RXE_IMMDT_MASK) {
870 wc->wc_flags |= IB_WC_WITH_IMM;
871 wc->ex.imm_data = immdt_imm(pkt);
872 }
873
874 if (pkt->mask & RXE_IETH_MASK) {
875 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
876 struct rxe_mem *rmr;
877
878 wc->wc_flags |= IB_WC_WITH_INVALIDATE;
879 wc->ex.invalidate_rkey = ieth_rkey(pkt);
880
881 rmr = rxe_pool_get_index(&rxe->mr_pool,
882 wc->ex.invalidate_rkey >> 8);
883 if (unlikely(!rmr)) {
884 pr_err("Bad rkey %#x invalidation\n", wc->ex.invalidate_rkey);
885 return RESPST_ERROR;
886 }
887 rmr->state = RXE_MEM_STATE_FREE;
888 }
889
890 wc->qp = &qp->ibqp;
891
892 if (pkt->mask & RXE_DETH_MASK)
893 wc->src_qp = deth_sqp(pkt);
894
895 wc->port_num = qp->attr.port_num;
896 }
897 }
898
899 /* have copy for srq and reference for !srq */
900 if (!qp->srq)
901 advance_consumer(qp->rq.queue);
902
903 qp->resp.wqe = NULL;
904
905 if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1))
906 return RESPST_ERR_CQ_OVERFLOW;
907
908 if (qp->resp.state == QP_STATE_ERROR)
909 return RESPST_CHK_RESOURCE;
910
911 if (!pkt)
912 return RESPST_DONE;
913 else if (qp_type(qp) == IB_QPT_RC)
914 return RESPST_ACKNOWLEDGE;
915 else
916 return RESPST_CLEANUP;
917}
918
919static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
920 u8 syndrome, u32 psn)
921{
922 int err = 0;
923 struct rxe_pkt_info ack_pkt;
924 struct sk_buff *skb;
925 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
926
927 skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE,
928 0, psn, syndrome, NULL);
929 if (!skb) {
930 err = -ENOMEM;
931 goto err1;
932 }
933
934 err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
935 if (err) {
936 pr_err_ratelimited("Failed sending ack\n");
937 kfree_skb(skb);
938 }
939
940err1:
941 return err;
942}
943
944static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
945 u8 syndrome)
946{
947 int rc = 0;
948 struct rxe_pkt_info ack_pkt;
949 struct sk_buff *skb;
950 struct sk_buff *skb_copy;
951 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
952 struct resp_res *res;
953
954 skb = prepare_ack_packet(qp, pkt, &ack_pkt,
955 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, 0, pkt->psn,
956 syndrome, NULL);
957 if (!skb) {
958 rc = -ENOMEM;
959 goto out;
960 }
961
962 skb_copy = skb_clone(skb, GFP_ATOMIC);
963 if (skb_copy)
964 rxe_add_ref(qp); /* for the new SKB */
965 else {
966 pr_warn("Could not clone atomic response\n");
967 rc = -ENOMEM;
968 goto out;
969 }
970
971 res = &qp->resp.resources[qp->resp.res_head];
972 free_rd_atomic_resource(qp, res);
973 rxe_advance_resp_resource(qp);
974
975 res->type = RXE_ATOMIC_MASK;
976 res->atomic.skb = skb;
977 res->first_psn = qp->resp.psn;
978 res->last_psn = qp->resp.psn;
979 res->cur_psn = qp->resp.psn;
980
981 rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy);
982 if (rc) {
983 pr_err_ratelimited("Failed sending ack\n");
984 rxe_drop_ref(qp);
985 kfree_skb(skb_copy);
986 }
987
988out:
989 return rc;
990}
991
992static enum resp_states acknowledge(struct rxe_qp *qp,
993 struct rxe_pkt_info *pkt)
994{
995 if (qp_type(qp) != IB_QPT_RC)
996 return RESPST_CLEANUP;
997
998 if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED)
999 send_ack(qp, pkt, qp->resp.aeth_syndrome, pkt->psn);
1000 else if (pkt->mask & RXE_ATOMIC_MASK)
1001 send_atomic_ack(qp, pkt, AETH_ACK_UNLIMITED);
1002 else if (bth_ack(pkt))
1003 send_ack(qp, pkt, AETH_ACK_UNLIMITED, pkt->psn);
1004
1005 return RESPST_CLEANUP;
1006}
1007
1008static enum resp_states cleanup(struct rxe_qp *qp,
1009 struct rxe_pkt_info *pkt)
1010{
1011 struct sk_buff *skb;
1012
1013 if (pkt) {
1014 skb = skb_dequeue(&qp->req_pkts);
1015 rxe_drop_ref(qp);
1016 kfree_skb(skb);
1017 }
1018
1019 if (qp->resp.mr) {
1020 rxe_drop_ref(qp->resp.mr);
1021 qp->resp.mr = NULL;
1022 }
1023
1024 return RESPST_DONE;
1025}
1026
1027static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn)
1028{
1029 int i;
1030
1031 for (i = 0; i < qp->attr.max_rd_atomic; i++) {
1032 struct resp_res *res = &qp->resp.resources[i];
1033
1034 if (res->type == 0)
1035 continue;
1036
1037 if (psn_compare(psn, res->first_psn) >= 0 &&
1038 psn_compare(psn, res->last_psn) <= 0) {
1039 return res;
1040 }
1041 }
1042
1043 return NULL;
1044}
1045
1046static enum resp_states duplicate_request(struct rxe_qp *qp,
1047 struct rxe_pkt_info *pkt)
1048{
1049 enum resp_states rc;
1050
1051 if (pkt->mask & RXE_SEND_MASK ||
1052 pkt->mask & RXE_WRITE_MASK) {
1053 /* SEND. Ack again and cleanup. C9-105. */
1054 if (bth_ack(pkt))
1055 send_ack(qp, pkt, AETH_ACK_UNLIMITED, qp->resp.psn - 1);
1056 rc = RESPST_CLEANUP;
1057 goto out;
1058 } else if (pkt->mask & RXE_READ_MASK) {
1059 struct resp_res *res;
1060
1061 res = find_resource(qp, pkt->psn);
1062 if (!res) {
1063 /* Resource not found. Class D error. Drop the
1064 * request.
1065 */
1066 rc = RESPST_CLEANUP;
1067 goto out;
1068 } else {
1069 /* Ensure this new request is the same as the previous
1070 * one or a subset of it.
1071 */
1072 u64 iova = reth_va(pkt);
1073 u32 resid = reth_len(pkt);
1074
1075 if (iova < res->read.va_org ||
1076 resid > res->read.length ||
1077 (iova + resid) > (res->read.va_org +
1078 res->read.length)) {
1079 rc = RESPST_CLEANUP;
1080 goto out;
1081 }
1082
1083 if (reth_rkey(pkt) != res->read.rkey) {
1084 rc = RESPST_CLEANUP;
1085 goto out;
1086 }
1087
1088 res->cur_psn = pkt->psn;
1089 res->state = (pkt->psn == res->first_psn) ?
1090 rdatm_res_state_new :
1091 rdatm_res_state_replay;
1092
1093 /* Reset the resource, except length. */
1094 res->read.va_org = iova;
1095 res->read.va = iova;
1096 res->read.resid = resid;
1097
1098 /* Replay the RDMA read reply. */
1099 qp->resp.res = res;
1100 rc = RESPST_READ_REPLY;
1101 goto out;
1102 }
1103 } else {
1104 struct resp_res *res;
1105
1106 /* Find the operation in our list of responder resources. */
1107 res = find_resource(qp, pkt->psn);
1108 if (res) {
1109 struct sk_buff *skb_copy;
1110
1111 skb_copy = skb_clone(res->atomic.skb, GFP_ATOMIC);
1112 if (skb_copy) {
1113 rxe_add_ref(qp); /* for the new SKB */
1114 } else {
1115 pr_warn("Couldn't clone atomic resp\n");
1116 rc = RESPST_CLEANUP;
1117 goto out;
1118 }
1119 bth_set_psn(SKB_TO_PKT(skb_copy),
1120 qp->resp.psn - 1);
1121 /* Resend the result. */
1122 rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp,
1123 pkt, skb_copy);
1124 if (rc) {
1125 pr_err("Failed resending result. This flow is not handled - skb ignored\n");
1126 kfree_skb(skb_copy);
1127 rc = RESPST_CLEANUP;
1128 goto out;
1129 }
1130 }
1131
1132 /* Resource not found. Class D error. Drop the request. */
1133 rc = RESPST_CLEANUP;
1134 goto out;
1135 }
1136out:
1137 return rc;
1138}
1139
1140/* Process a class A or C. Both are treated the same in this implementation. */
1141static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome,
1142 enum ib_wc_status status)
1143{
1144 qp->resp.aeth_syndrome = syndrome;
1145 qp->resp.status = status;
1146
1147 /* indicate that we should go through the ERROR state */
1148 qp->resp.goto_error = 1;
1149}
1150
1151static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
1152{
1153 /* UC */
1154 if (qp->srq) {
1155 /* Class E */
1156 qp->resp.drop_msg = 1;
1157 if (qp->resp.wqe) {
1158 qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1159 return RESPST_COMPLETE;
1160 } else {
1161 return RESPST_CLEANUP;
1162 }
1163 } else {
1164 /* Class D1. This packet may be the start of a
1165 * new message and could be valid. The previous
1166 * message is invalid and ignored. reset the
1167 * recv wr to its original state
1168 */
1169 if (qp->resp.wqe) {
1170 qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length;
1171 qp->resp.wqe->dma.cur_sge = 0;
1172 qp->resp.wqe->dma.sge_offset = 0;
1173 qp->resp.opcode = -1;
1174 }
1175
1176 if (qp->resp.mr) {
1177 rxe_drop_ref(qp->resp.mr);
1178 qp->resp.mr = NULL;
1179 }
1180
1181 return RESPST_CLEANUP;
1182 }
1183}
1184
1185int rxe_responder(void *arg)
1186{
1187 struct rxe_qp *qp = (struct rxe_qp *)arg;
1188 enum resp_states state;
1189 struct rxe_pkt_info *pkt = NULL;
1190 int ret = 0;
1191
1192 qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
1193
1194 if (!qp->valid) {
1195 ret = -EINVAL;
1196 goto done;
1197 }
1198
1199 switch (qp->resp.state) {
1200 case QP_STATE_RESET:
1201 state = RESPST_RESET;
1202 break;
1203
1204 default:
1205 state = RESPST_GET_REQ;
1206 break;
1207 }
1208
1209 while (1) {
1210 pr_debug("state = %s\n", resp_state_name[state]);
1211 switch (state) {
1212 case RESPST_GET_REQ:
1213 state = get_req(qp, &pkt);
1214 break;
1215 case RESPST_CHK_PSN:
1216 state = check_psn(qp, pkt);
1217 break;
1218 case RESPST_CHK_OP_SEQ:
1219 state = check_op_seq(qp, pkt);
1220 break;
1221 case RESPST_CHK_OP_VALID:
1222 state = check_op_valid(qp, pkt);
1223 break;
1224 case RESPST_CHK_RESOURCE:
1225 state = check_resource(qp, pkt);
1226 break;
1227 case RESPST_CHK_LENGTH:
1228 state = check_length(qp, pkt);
1229 break;
1230 case RESPST_CHK_RKEY:
1231 state = check_rkey(qp, pkt);
1232 break;
1233 case RESPST_EXECUTE:
1234 state = execute(qp, pkt);
1235 break;
1236 case RESPST_COMPLETE:
1237 state = do_complete(qp, pkt);
1238 break;
1239 case RESPST_READ_REPLY:
1240 state = read_reply(qp, pkt);
1241 break;
1242 case RESPST_ACKNOWLEDGE:
1243 state = acknowledge(qp, pkt);
1244 break;
1245 case RESPST_CLEANUP:
1246 state = cleanup(qp, pkt);
1247 break;
1248 case RESPST_DUPLICATE_REQUEST:
1249 state = duplicate_request(qp, pkt);
1250 break;
1251 case RESPST_ERR_PSN_OUT_OF_SEQ:
1252 /* RC only - Class B. Drop packet. */
1253 send_ack(qp, pkt, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
1254 state = RESPST_CLEANUP;
1255 break;
1256
1257 case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ:
1258 case RESPST_ERR_MISSING_OPCODE_FIRST:
1259 case RESPST_ERR_MISSING_OPCODE_LAST_C:
1260 case RESPST_ERR_UNSUPPORTED_OPCODE:
1261 case RESPST_ERR_MISALIGNED_ATOMIC:
1262 /* RC Only - Class C. */
1263 do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
1264 IB_WC_REM_INV_REQ_ERR);
1265 state = RESPST_COMPLETE;
1266 break;
1267
1268 case RESPST_ERR_MISSING_OPCODE_LAST_D1E:
1269 state = do_class_d1e_error(qp);
1270 break;
1271 case RESPST_ERR_RNR:
1272 if (qp_type(qp) == IB_QPT_RC) {
1273 /* RC - class B */
1274 send_ack(qp, pkt, AETH_RNR_NAK |
1275 (~AETH_TYPE_MASK &
1276 qp->attr.min_rnr_timer),
1277 pkt->psn);
1278 } else {
1279 /* UD/UC - class D */
1280 qp->resp.drop_msg = 1;
1281 }
1282 state = RESPST_CLEANUP;
1283 break;
1284
1285 case RESPST_ERR_RKEY_VIOLATION:
1286 if (qp_type(qp) == IB_QPT_RC) {
1287 /* Class C */
1288 do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR,
1289 IB_WC_REM_ACCESS_ERR);
1290 state = RESPST_COMPLETE;
1291 } else {
1292 qp->resp.drop_msg = 1;
1293 if (qp->srq) {
1294 /* UC/SRQ Class D */
1295 qp->resp.status = IB_WC_REM_ACCESS_ERR;
1296 state = RESPST_COMPLETE;
1297 } else {
1298 /* UC/non-SRQ Class E. */
1299 state = RESPST_CLEANUP;
1300 }
1301 }
1302 break;
1303
1304 case RESPST_ERR_LENGTH:
1305 if (qp_type(qp) == IB_QPT_RC) {
1306 /* Class C */
1307 do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
1308 IB_WC_REM_INV_REQ_ERR);
1309 state = RESPST_COMPLETE;
1310 } else if (qp->srq) {
1311 /* UC/UD - class E */
1312 qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1313 state = RESPST_COMPLETE;
1314 } else {
1315 /* UC/UD - class D */
1316 qp->resp.drop_msg = 1;
1317 state = RESPST_CLEANUP;
1318 }
1319 break;
1320
1321 case RESPST_ERR_MALFORMED_WQE:
1322 /* All, Class A. */
1323 do_class_ac_error(qp, AETH_NAK_REM_OP_ERR,
1324 IB_WC_LOC_QP_OP_ERR);
1325 state = RESPST_COMPLETE;
1326 break;
1327
1328 case RESPST_ERR_CQ_OVERFLOW:
1329 /* All - Class G */
1330 state = RESPST_ERROR;
1331 break;
1332
1333 case RESPST_DONE:
1334 if (qp->resp.goto_error) {
1335 state = RESPST_ERROR;
1336 break;
1337 }
1338
1339 goto done;
1340
1341 case RESPST_EXIT:
1342 if (qp->resp.goto_error) {
1343 state = RESPST_ERROR;
1344 break;
1345 }
1346
1347 goto exit;
1348
1349 case RESPST_RESET: {
1350 struct sk_buff *skb;
1351
1352 while ((skb = skb_dequeue(&qp->req_pkts))) {
1353 rxe_drop_ref(qp);
1354 kfree_skb(skb);
1355 }
1356
1357 while (!qp->srq && qp->rq.queue &&
1358 queue_head(qp->rq.queue))
1359 advance_consumer(qp->rq.queue);
1360
1361 qp->resp.wqe = NULL;
1362 goto exit;
1363 }
1364
1365 case RESPST_ERROR:
1366 qp->resp.goto_error = 0;
1367 pr_warn("qp#%d moved to error state\n", qp_num(qp));
1368 rxe_qp_error(qp);
1369 goto exit;
1370
1371 default:
1372 WARN_ON(1);
1373 }
1374 }
1375
1376exit:
1377 ret = -EAGAIN;
1378done:
1379 return ret;
1380}
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
new file mode 100644
index 000000000000..2a6e3cd2d4e8
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -0,0 +1,193 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include "rxe.h"
35#include "rxe_loc.h"
36#include "rxe_queue.h"
37
38int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
39 struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)
40{
41 if (srq && srq->error) {
42 pr_warn("srq in error state\n");
43 goto err1;
44 }
45
46 if (mask & IB_SRQ_MAX_WR) {
47 if (attr->max_wr > rxe->attr.max_srq_wr) {
48 pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
49 attr->max_wr, rxe->attr.max_srq_wr);
50 goto err1;
51 }
52
53 if (attr->max_wr <= 0) {
54 pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
55 goto err1;
56 }
57
58 if (srq && srq->limit && (attr->max_wr < srq->limit)) {
59 pr_warn("max_wr (%d) < srq->limit (%d)\n",
60 attr->max_wr, srq->limit);
61 goto err1;
62 }
63
64 if (attr->max_wr < RXE_MIN_SRQ_WR)
65 attr->max_wr = RXE_MIN_SRQ_WR;
66 }
67
68 if (mask & IB_SRQ_LIMIT) {
69 if (attr->srq_limit > rxe->attr.max_srq_wr) {
70 pr_warn("srq_limit(%d) > max_srq_wr(%d)\n",
71 attr->srq_limit, rxe->attr.max_srq_wr);
72 goto err1;
73 }
74
75 if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) {
76 pr_warn("srq_limit (%d) > cur limit(%d)\n",
77 attr->srq_limit,
78 srq->rq.queue->buf->index_mask);
79 goto err1;
80 }
81 }
82
83 if (mask == IB_SRQ_INIT_MASK) {
84 if (attr->max_sge > rxe->attr.max_srq_sge) {
85 pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
86 attr->max_sge, rxe->attr.max_srq_sge);
87 goto err1;
88 }
89
90 if (attr->max_sge < RXE_MIN_SRQ_SGE)
91 attr->max_sge = RXE_MIN_SRQ_SGE;
92 }
93
94 return 0;
95
96err1:
97 return -EINVAL;
98}
99
100int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
101 struct ib_srq_init_attr *init,
102 struct ib_ucontext *context, struct ib_udata *udata)
103{
104 int err;
105 int srq_wqe_size;
106 struct rxe_queue *q;
107
108 srq->ibsrq.event_handler = init->event_handler;
109 srq->ibsrq.srq_context = init->srq_context;
110 srq->limit = init->attr.srq_limit;
111 srq->srq_num = srq->pelem.index;
112 srq->rq.max_wr = init->attr.max_wr;
113 srq->rq.max_sge = init->attr.max_sge;
114
115 srq_wqe_size = rcv_wqe_size(srq->rq.max_sge);
116
117 spin_lock_init(&srq->rq.producer_lock);
118 spin_lock_init(&srq->rq.consumer_lock);
119
120 q = rxe_queue_init(rxe, &srq->rq.max_wr,
121 srq_wqe_size);
122 if (!q) {
123 pr_warn("unable to allocate queue for srq\n");
124 return -ENOMEM;
125 }
126
127 srq->rq.queue = q;
128
129 err = do_mmap_info(rxe, udata, false, context, q->buf,
130 q->buf_size, &q->ip);
131 if (err)
132 return err;
133
134 if (udata && udata->outlen >= sizeof(struct mminfo) + sizeof(u32)) {
135 if (copy_to_user(udata->outbuf + sizeof(struct mminfo),
136 &srq->srq_num, sizeof(u32)))
137 return -EFAULT;
138 }
139 return 0;
140}
141
142int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
143 struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
144 struct ib_udata *udata)
145{
146 int err;
147 struct rxe_queue *q = srq->rq.queue;
148 struct mminfo mi = { .offset = 1, .size = 0};
149
150 if (mask & IB_SRQ_MAX_WR) {
151 /* Check that we can write the mminfo struct to user space */
152 if (udata && udata->inlen >= sizeof(__u64)) {
153 __u64 mi_addr;
154
155 /* Get address of user space mminfo struct */
156 err = ib_copy_from_udata(&mi_addr, udata,
157 sizeof(mi_addr));
158 if (err)
159 goto err1;
160
161 udata->outbuf = (void __user *)(unsigned long)mi_addr;
162 udata->outlen = sizeof(mi);
163
164 if (!access_ok(VERIFY_WRITE,
165 (void __user *)udata->outbuf,
166 udata->outlen)) {
167 err = -EFAULT;
168 goto err1;
169 }
170 }
171
172 err = rxe_queue_resize(q, (unsigned int *)&attr->max_wr,
173 rcv_wqe_size(srq->rq.max_sge),
174 srq->rq.queue->ip ?
175 srq->rq.queue->ip->context :
176 NULL,
177 udata, &srq->rq.producer_lock,
178 &srq->rq.consumer_lock);
179 if (err)
180 goto err2;
181 }
182
183 if (mask & IB_SRQ_LIMIT)
184 srq->limit = attr->srq_limit;
185
186 return 0;
187
188err2:
189 rxe_queue_cleanup(q);
190 srq->rq.queue = NULL;
191err1:
192 return err;
193}
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
new file mode 100644
index 000000000000..cf8e77800046
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c
@@ -0,0 +1,157 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include "rxe.h"
35#include "rxe_net.h"
36
37/* Copy argument and remove trailing CR. Return the new length. */
38static int sanitize_arg(const char *val, char *intf, int intf_len)
39{
40 int len;
41
42 if (!val)
43 return 0;
44
45 /* Remove newline. */
46 for (len = 0; len < intf_len - 1 && val[len] && val[len] != '\n'; len++)
47 intf[len] = val[len];
48 intf[len] = 0;
49
50 if (len == 0 || (val[len] != 0 && val[len] != '\n'))
51 return 0;
52
53 return len;
54}
55
56static void rxe_set_port_state(struct net_device *ndev)
57{
58 struct rxe_dev *rxe = net_to_rxe(ndev);
59 bool is_up = netif_running(ndev) && netif_carrier_ok(ndev);
60
61 if (!rxe)
62 goto out;
63
64 if (is_up)
65 rxe_port_up(rxe);
66 else
67 rxe_port_down(rxe); /* down for unknown state */
68out:
69 return;
70}
71
72static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
73{
74 int len;
75 int err = 0;
76 char intf[32];
77 struct net_device *ndev = NULL;
78 struct rxe_dev *rxe;
79
80 len = sanitize_arg(val, intf, sizeof(intf));
81 if (!len) {
82 pr_err("rxe: add: invalid interface name\n");
83 err = -EINVAL;
84 goto err;
85 }
86
87 ndev = dev_get_by_name(&init_net, intf);
88 if (!ndev) {
89 pr_err("interface %s not found\n", intf);
90 err = -EINVAL;
91 goto err;
92 }
93
94 if (net_to_rxe(ndev)) {
95 pr_err("rxe: already configured on %s\n", intf);
96 err = -EINVAL;
97 goto err;
98 }
99
100 rxe = rxe_net_add(ndev);
101 if (!rxe) {
102 pr_err("rxe: failed to add %s\n", intf);
103 err = -EINVAL;
104 goto err;
105 }
106
107 rxe_set_port_state(ndev);
108 pr_info("rxe: added %s to %s\n", rxe->ib_dev.name, intf);
109err:
110 if (ndev)
111 dev_put(ndev);
112 return err;
113}
114
115static int rxe_param_set_remove(const char *val, const struct kernel_param *kp)
116{
117 int len;
118 char intf[32];
119 struct rxe_dev *rxe;
120
121 len = sanitize_arg(val, intf, sizeof(intf));
122 if (!len) {
123 pr_err("rxe: add: invalid interface name\n");
124 return -EINVAL;
125 }
126
127 if (strncmp("all", intf, len) == 0) {
128 pr_info("rxe_sys: remove all");
129 rxe_remove_all();
130 return 0;
131 }
132
133 rxe = get_rxe_by_name(intf);
134
135 if (!rxe) {
136 pr_err("rxe: not configured on %s\n", intf);
137 return -EINVAL;
138 }
139
140 list_del(&rxe->list);
141 rxe_remove(rxe);
142
143 return 0;
144}
145
146static const struct kernel_param_ops rxe_add_ops = {
147 .set = rxe_param_set_add,
148};
149
150static const struct kernel_param_ops rxe_remove_ops = {
151 .set = rxe_param_set_remove,
152};
153
154module_param_cb(add, &rxe_add_ops, NULL, 0200);
155MODULE_PARM_DESC(add, "Create RXE device over network interface");
156module_param_cb(remove, &rxe_remove_ops, NULL, 0200);
157MODULE_PARM_DESC(remove, "Remove RXE device over network interface");
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
new file mode 100644
index 000000000000..1e19bf828a6e
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -0,0 +1,154 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/kernel.h>
35#include <linux/interrupt.h>
36#include <linux/hardirq.h>
37
38#include "rxe_task.h"
39
40int __rxe_do_task(struct rxe_task *task)
41
42{
43 int ret;
44
45 while ((ret = task->func(task->arg)) == 0)
46 ;
47
48 task->ret = ret;
49
50 return ret;
51}
52
53/*
54 * this locking is due to a potential race where
55 * a second caller finds the task already running
56 * but looks just after the last call to func
57 */
58void rxe_do_task(unsigned long data)
59{
60 int cont;
61 int ret;
62 unsigned long flags;
63 struct rxe_task *task = (struct rxe_task *)data;
64
65 spin_lock_irqsave(&task->state_lock, flags);
66 switch (task->state) {
67 case TASK_STATE_START:
68 task->state = TASK_STATE_BUSY;
69 spin_unlock_irqrestore(&task->state_lock, flags);
70 break;
71
72 case TASK_STATE_BUSY:
73 task->state = TASK_STATE_ARMED;
74 /* fall through to */
75 case TASK_STATE_ARMED:
76 spin_unlock_irqrestore(&task->state_lock, flags);
77 return;
78
79 default:
80 spin_unlock_irqrestore(&task->state_lock, flags);
81 pr_warn("bad state = %d in rxe_do_task\n", task->state);
82 return;
83 }
84
85 do {
86 cont = 0;
87 ret = task->func(task->arg);
88
89 spin_lock_irqsave(&task->state_lock, flags);
90 switch (task->state) {
91 case TASK_STATE_BUSY:
92 if (ret)
93 task->state = TASK_STATE_START;
94 else
95 cont = 1;
96 break;
97
98 /* soneone tried to run the task since the last time we called
99 * func, so we will call one more time regardless of the
100 * return value
101 */
102 case TASK_STATE_ARMED:
103 task->state = TASK_STATE_BUSY;
104 cont = 1;
105 break;
106
107 default:
108 pr_warn("bad state = %d in rxe_do_task\n",
109 task->state);
110 }
111 spin_unlock_irqrestore(&task->state_lock, flags);
112 } while (cont);
113
114 task->ret = ret;
115}
116
117int rxe_init_task(void *obj, struct rxe_task *task,
118 void *arg, int (*func)(void *), char *name)
119{
120 task->obj = obj;
121 task->arg = arg;
122 task->func = func;
123 snprintf(task->name, sizeof(task->name), "%s", name);
124
125 tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task);
126
127 task->state = TASK_STATE_START;
128 spin_lock_init(&task->state_lock);
129
130 return 0;
131}
132
133void rxe_cleanup_task(struct rxe_task *task)
134{
135 tasklet_kill(&task->tasklet);
136}
137
138void rxe_run_task(struct rxe_task *task, int sched)
139{
140 if (sched)
141 tasklet_schedule(&task->tasklet);
142 else
143 rxe_do_task((unsigned long)task);
144}
145
146void rxe_disable_task(struct rxe_task *task)
147{
148 tasklet_disable(&task->tasklet);
149}
150
151void rxe_enable_task(struct rxe_task *task)
152{
153 tasklet_enable(&task->tasklet);
154}
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
new file mode 100644
index 000000000000..d14aa6daed05
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -0,0 +1,95 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef RXE_TASK_H
35#define RXE_TASK_H
36
37enum {
38 TASK_STATE_START = 0,
39 TASK_STATE_BUSY = 1,
40 TASK_STATE_ARMED = 2,
41};
42
43/*
44 * data structure to describe a 'task' which is a short
45 * function that returns 0 as long as it needs to be
46 * called again.
47 */
48struct rxe_task {
49 void *obj;
50 struct tasklet_struct tasklet;
51 int state;
52 spinlock_t state_lock; /* spinlock for task state */
53 void *arg;
54 int (*func)(void *arg);
55 int ret;
56 char name[16];
57};
58
59/*
60 * init rxe_task structure
61 * arg => parameter to pass to fcn
62 * fcn => function to call until it returns != 0
63 */
64int rxe_init_task(void *obj, struct rxe_task *task,
65 void *arg, int (*func)(void *), char *name);
66
67/* cleanup task */
68void rxe_cleanup_task(struct rxe_task *task);
69
70/*
71 * raw call to func in loop without any checking
72 * can call when tasklets are disabled
73 */
74int __rxe_do_task(struct rxe_task *task);
75
76/*
77 * common function called by any of the main tasklets
78 * If there is any chance that there is additional
79 * work to do someone must reschedule the task before
80 * leaving
81 */
82void rxe_do_task(unsigned long data);
83
84/* run a task, else schedule it to run as a tasklet, The decision
85 * to run or schedule tasklet is based on the parameter sched.
86 */
87void rxe_run_task(struct rxe_task *task, int sched);
88
89/* keep a task from scheduling */
90void rxe_disable_task(struct rxe_task *task);
91
92/* allow task to run */
93void rxe_enable_task(struct rxe_task *task);
94
95#endif /* RXE_TASK_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
new file mode 100644
index 000000000000..4552be960c6a
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -0,0 +1,1330 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include "rxe.h"
35#include "rxe_loc.h"
36#include "rxe_queue.h"
37
38static int rxe_query_device(struct ib_device *dev,
39 struct ib_device_attr *attr,
40 struct ib_udata *uhw)
41{
42 struct rxe_dev *rxe = to_rdev(dev);
43
44 if (uhw->inlen || uhw->outlen)
45 return -EINVAL;
46
47 *attr = rxe->attr;
48 return 0;
49}
50
51static void rxe_eth_speed_to_ib_speed(int speed, u8 *active_speed,
52 u8 *active_width)
53{
54 if (speed <= 1000) {
55 *active_width = IB_WIDTH_1X;
56 *active_speed = IB_SPEED_SDR;
57 } else if (speed <= 10000) {
58 *active_width = IB_WIDTH_1X;
59 *active_speed = IB_SPEED_FDR10;
60 } else if (speed <= 20000) {
61 *active_width = IB_WIDTH_4X;
62 *active_speed = IB_SPEED_DDR;
63 } else if (speed <= 30000) {
64 *active_width = IB_WIDTH_4X;
65 *active_speed = IB_SPEED_QDR;
66 } else if (speed <= 40000) {
67 *active_width = IB_WIDTH_4X;
68 *active_speed = IB_SPEED_FDR10;
69 } else {
70 *active_width = IB_WIDTH_4X;
71 *active_speed = IB_SPEED_EDR;
72 }
73}
74
75static int rxe_query_port(struct ib_device *dev,
76 u8 port_num, struct ib_port_attr *attr)
77{
78 struct rxe_dev *rxe = to_rdev(dev);
79 struct rxe_port *port;
80 u32 speed;
81
82 if (unlikely(port_num != 1)) {
83 pr_warn("invalid port_number %d\n", port_num);
84 goto err1;
85 }
86
87 port = &rxe->port;
88
89 *attr = port->attr;
90
91 mutex_lock(&rxe->usdev_lock);
92 if (rxe->ndev->ethtool_ops->get_link_ksettings) {
93 struct ethtool_link_ksettings ks;
94
95 rxe->ndev->ethtool_ops->get_link_ksettings(rxe->ndev, &ks);
96 speed = ks.base.speed;
97 } else if (rxe->ndev->ethtool_ops->get_settings) {
98 struct ethtool_cmd cmd;
99
100 rxe->ndev->ethtool_ops->get_settings(rxe->ndev, &cmd);
101 speed = cmd.speed;
102 } else {
103 pr_warn("%s speed is unknown, defaulting to 1000\n", rxe->ndev->name);
104 speed = 1000;
105 }
106 rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, &attr->active_width);
107 mutex_unlock(&rxe->usdev_lock);
108
109 return 0;
110
111err1:
112 return -EINVAL;
113}
114
115static int rxe_query_gid(struct ib_device *device,
116 u8 port_num, int index, union ib_gid *gid)
117{
118 int ret;
119
120 if (index > RXE_PORT_GID_TBL_LEN)
121 return -EINVAL;
122
123 ret = ib_get_cached_gid(device, port_num, index, gid, NULL);
124 if (ret == -EAGAIN) {
125 memcpy(gid, &zgid, sizeof(*gid));
126 return 0;
127 }
128
129 return ret;
130}
131
132static int rxe_add_gid(struct ib_device *device, u8 port_num, unsigned int
133 index, const union ib_gid *gid,
134 const struct ib_gid_attr *attr, void **context)
135{
136 if (index >= RXE_PORT_GID_TBL_LEN)
137 return -EINVAL;
138 return 0;
139}
140
141static int rxe_del_gid(struct ib_device *device, u8 port_num, unsigned int
142 index, void **context)
143{
144 if (index >= RXE_PORT_GID_TBL_LEN)
145 return -EINVAL;
146 return 0;
147}
148
149static struct net_device *rxe_get_netdev(struct ib_device *device,
150 u8 port_num)
151{
152 struct rxe_dev *rxe = to_rdev(device);
153
154 if (rxe->ndev) {
155 dev_hold(rxe->ndev);
156 return rxe->ndev;
157 }
158
159 return NULL;
160}
161
162static int rxe_query_pkey(struct ib_device *device,
163 u8 port_num, u16 index, u16 *pkey)
164{
165 struct rxe_dev *rxe = to_rdev(device);
166 struct rxe_port *port;
167
168 if (unlikely(port_num != 1)) {
169 dev_warn(device->dma_device, "invalid port_num = %d\n",
170 port_num);
171 goto err1;
172 }
173
174 port = &rxe->port;
175
176 if (unlikely(index >= port->attr.pkey_tbl_len)) {
177 dev_warn(device->dma_device, "invalid index = %d\n",
178 index);
179 goto err1;
180 }
181
182 *pkey = port->pkey_tbl[index];
183 return 0;
184
185err1:
186 return -EINVAL;
187}
188
189static int rxe_modify_device(struct ib_device *dev,
190 int mask, struct ib_device_modify *attr)
191{
192 struct rxe_dev *rxe = to_rdev(dev);
193
194 if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
195 rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);
196
197 if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
198 memcpy(rxe->ib_dev.node_desc,
199 attr->node_desc, sizeof(rxe->ib_dev.node_desc));
200 }
201
202 return 0;
203}
204
205static int rxe_modify_port(struct ib_device *dev,
206 u8 port_num, int mask, struct ib_port_modify *attr)
207{
208 struct rxe_dev *rxe = to_rdev(dev);
209 struct rxe_port *port;
210
211 if (unlikely(port_num != 1)) {
212 pr_warn("invalid port_num = %d\n", port_num);
213 goto err1;
214 }
215
216 port = &rxe->port;
217
218 port->attr.port_cap_flags |= attr->set_port_cap_mask;
219 port->attr.port_cap_flags &= ~attr->clr_port_cap_mask;
220
221 if (mask & IB_PORT_RESET_QKEY_CNTR)
222 port->attr.qkey_viol_cntr = 0;
223
224 return 0;
225
226err1:
227 return -EINVAL;
228}
229
230static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
231 u8 port_num)
232{
233 struct rxe_dev *rxe = to_rdev(dev);
234
235 return rxe->ifc_ops->link_layer(rxe, port_num);
236}
237
238static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev,
239 struct ib_udata *udata)
240{
241 struct rxe_dev *rxe = to_rdev(dev);
242 struct rxe_ucontext *uc;
243
244 uc = rxe_alloc(&rxe->uc_pool);
245 return uc ? &uc->ibuc : ERR_PTR(-ENOMEM);
246}
247
248static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
249{
250 struct rxe_ucontext *uc = to_ruc(ibuc);
251
252 rxe_drop_ref(uc);
253 return 0;
254}
255
256static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
257 struct ib_port_immutable *immutable)
258{
259 int err;
260 struct ib_port_attr attr;
261
262 err = rxe_query_port(dev, port_num, &attr);
263 if (err)
264 return err;
265
266 immutable->pkey_tbl_len = attr.pkey_tbl_len;
267 immutable->gid_tbl_len = attr.gid_tbl_len;
268 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
269 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
270
271 return 0;
272}
273
274static struct ib_pd *rxe_alloc_pd(struct ib_device *dev,
275 struct ib_ucontext *context,
276 struct ib_udata *udata)
277{
278 struct rxe_dev *rxe = to_rdev(dev);
279 struct rxe_pd *pd;
280
281 pd = rxe_alloc(&rxe->pd_pool);
282 return pd ? &pd->ibpd : ERR_PTR(-ENOMEM);
283}
284
285static int rxe_dealloc_pd(struct ib_pd *ibpd)
286{
287 struct rxe_pd *pd = to_rpd(ibpd);
288
289 rxe_drop_ref(pd);
290 return 0;
291}
292
293static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr,
294 struct rxe_av *av)
295{
296 int err;
297 union ib_gid sgid;
298 struct ib_gid_attr sgid_attr;
299
300 err = ib_get_cached_gid(&rxe->ib_dev, attr->port_num,
301 attr->grh.sgid_index, &sgid,
302 &sgid_attr);
303 if (err) {
304 pr_err("Failed to query sgid. err = %d\n", err);
305 return err;
306 }
307
308 err = rxe_av_from_attr(rxe, attr->port_num, av, attr);
309 if (!err)
310 err = rxe_av_fill_ip_info(rxe, av, attr, &sgid_attr, &sgid);
311
312 if (sgid_attr.ndev)
313 dev_put(sgid_attr.ndev);
314 return err;
315}
316
317static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
318{
319 int err;
320 struct rxe_dev *rxe = to_rdev(ibpd->device);
321 struct rxe_pd *pd = to_rpd(ibpd);
322 struct rxe_ah *ah;
323
324 err = rxe_av_chk_attr(rxe, attr);
325 if (err)
326 goto err1;
327
328 ah = rxe_alloc(&rxe->ah_pool);
329 if (!ah) {
330 err = -ENOMEM;
331 goto err1;
332 }
333
334 rxe_add_ref(pd);
335 ah->pd = pd;
336
337 err = rxe_init_av(rxe, attr, &ah->av);
338 if (err)
339 goto err2;
340
341 return &ah->ibah;
342
343err2:
344 rxe_drop_ref(pd);
345 rxe_drop_ref(ah);
346err1:
347 return ERR_PTR(err);
348}
349
350static int rxe_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
351{
352 int err;
353 struct rxe_dev *rxe = to_rdev(ibah->device);
354 struct rxe_ah *ah = to_rah(ibah);
355
356 err = rxe_av_chk_attr(rxe, attr);
357 if (err)
358 return err;
359
360 err = rxe_init_av(rxe, attr, &ah->av);
361 if (err)
362 return err;
363
364 return 0;
365}
366
367static int rxe_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
368{
369 struct rxe_dev *rxe = to_rdev(ibah->device);
370 struct rxe_ah *ah = to_rah(ibah);
371
372 rxe_av_to_attr(rxe, &ah->av, attr);
373 return 0;
374}
375
376static int rxe_destroy_ah(struct ib_ah *ibah)
377{
378 struct rxe_ah *ah = to_rah(ibah);
379
380 rxe_drop_ref(ah->pd);
381 rxe_drop_ref(ah);
382 return 0;
383}
384
385static int post_one_recv(struct rxe_rq *rq, struct ib_recv_wr *ibwr)
386{
387 int err;
388 int i;
389 u32 length;
390 struct rxe_recv_wqe *recv_wqe;
391 int num_sge = ibwr->num_sge;
392
393 if (unlikely(queue_full(rq->queue))) {
394 err = -ENOMEM;
395 goto err1;
396 }
397
398 if (unlikely(num_sge > rq->max_sge)) {
399 err = -EINVAL;
400 goto err1;
401 }
402
403 length = 0;
404 for (i = 0; i < num_sge; i++)
405 length += ibwr->sg_list[i].length;
406
407 recv_wqe = producer_addr(rq->queue);
408 recv_wqe->wr_id = ibwr->wr_id;
409 recv_wqe->num_sge = num_sge;
410
411 memcpy(recv_wqe->dma.sge, ibwr->sg_list,
412 num_sge * sizeof(struct ib_sge));
413
414 recv_wqe->dma.length = length;
415 recv_wqe->dma.resid = length;
416 recv_wqe->dma.num_sge = num_sge;
417 recv_wqe->dma.cur_sge = 0;
418 recv_wqe->dma.sge_offset = 0;
419
420 /* make sure all changes to the work queue are written before we
421 * update the producer pointer
422 */
423 smp_wmb();
424
425 advance_producer(rq->queue);
426 return 0;
427
428err1:
429 return err;
430}
431
432static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd,
433 struct ib_srq_init_attr *init,
434 struct ib_udata *udata)
435{
436 int err;
437 struct rxe_dev *rxe = to_rdev(ibpd->device);
438 struct rxe_pd *pd = to_rpd(ibpd);
439 struct rxe_srq *srq;
440 struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
441
442 err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
443 if (err)
444 goto err1;
445
446 srq = rxe_alloc(&rxe->srq_pool);
447 if (!srq) {
448 err = -ENOMEM;
449 goto err1;
450 }
451
452 rxe_add_index(srq);
453 rxe_add_ref(pd);
454 srq->pd = pd;
455
456 err = rxe_srq_from_init(rxe, srq, init, context, udata);
457 if (err)
458 goto err2;
459
460 return &srq->ibsrq;
461
462err2:
463 rxe_drop_ref(pd);
464 rxe_drop_index(srq);
465 rxe_drop_ref(srq);
466err1:
467 return ERR_PTR(err);
468}
469
470static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
471 enum ib_srq_attr_mask mask,
472 struct ib_udata *udata)
473{
474 int err;
475 struct rxe_srq *srq = to_rsrq(ibsrq);
476 struct rxe_dev *rxe = to_rdev(ibsrq->device);
477
478 err = rxe_srq_chk_attr(rxe, srq, attr, mask);
479 if (err)
480 goto err1;
481
482 err = rxe_srq_from_attr(rxe, srq, attr, mask, udata);
483 if (err)
484 goto err1;
485
486 return 0;
487
488err1:
489 return err;
490}
491
492static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
493{
494 struct rxe_srq *srq = to_rsrq(ibsrq);
495
496 if (srq->error)
497 return -EINVAL;
498
499 attr->max_wr = srq->rq.queue->buf->index_mask;
500 attr->max_sge = srq->rq.max_sge;
501 attr->srq_limit = srq->limit;
502 return 0;
503}
504
505static int rxe_destroy_srq(struct ib_srq *ibsrq)
506{
507 struct rxe_srq *srq = to_rsrq(ibsrq);
508
509 if (srq->rq.queue)
510 rxe_queue_cleanup(srq->rq.queue);
511
512 rxe_drop_ref(srq->pd);
513 rxe_drop_index(srq);
514 rxe_drop_ref(srq);
515
516 return 0;
517}
518
519static int rxe_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
520 struct ib_recv_wr **bad_wr)
521{
522 int err = 0;
523 unsigned long flags;
524 struct rxe_srq *srq = to_rsrq(ibsrq);
525
526 spin_lock_irqsave(&srq->rq.producer_lock, flags);
527
528 while (wr) {
529 err = post_one_recv(&srq->rq, wr);
530 if (unlikely(err))
531 break;
532 wr = wr->next;
533 }
534
535 spin_unlock_irqrestore(&srq->rq.producer_lock, flags);
536
537 if (err)
538 *bad_wr = wr;
539
540 return err;
541}
542
543static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
544 struct ib_qp_init_attr *init,
545 struct ib_udata *udata)
546{
547 int err;
548 struct rxe_dev *rxe = to_rdev(ibpd->device);
549 struct rxe_pd *pd = to_rpd(ibpd);
550 struct rxe_qp *qp;
551
552 err = rxe_qp_chk_init(rxe, init);
553 if (err)
554 goto err1;
555
556 qp = rxe_alloc(&rxe->qp_pool);
557 if (!qp) {
558 err = -ENOMEM;
559 goto err1;
560 }
561
562 if (udata) {
563 if (udata->inlen) {
564 err = -EINVAL;
565 goto err1;
566 }
567 qp->is_user = 1;
568 }
569
570 rxe_add_index(qp);
571
572 err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd);
573 if (err)
574 goto err2;
575
576 return &qp->ibqp;
577
578err2:
579 rxe_drop_index(qp);
580 rxe_drop_ref(qp);
581err1:
582 return ERR_PTR(err);
583}
584
585static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
586 int mask, struct ib_udata *udata)
587{
588 int err;
589 struct rxe_dev *rxe = to_rdev(ibqp->device);
590 struct rxe_qp *qp = to_rqp(ibqp);
591
592 err = rxe_qp_chk_attr(rxe, qp, attr, mask);
593 if (err)
594 goto err1;
595
596 err = rxe_qp_from_attr(qp, attr, mask, udata);
597 if (err)
598 goto err1;
599
600 return 0;
601
602err1:
603 return err;
604}
605
606static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
607 int mask, struct ib_qp_init_attr *init)
608{
609 struct rxe_qp *qp = to_rqp(ibqp);
610
611 rxe_qp_to_init(qp, init);
612 rxe_qp_to_attr(qp, attr, mask);
613
614 return 0;
615}
616
617static int rxe_destroy_qp(struct ib_qp *ibqp)
618{
619 struct rxe_qp *qp = to_rqp(ibqp);
620
621 rxe_qp_destroy(qp);
622 rxe_drop_index(qp);
623 rxe_drop_ref(qp);
624 return 0;
625}
626
627static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr,
628 unsigned int mask, unsigned int length)
629{
630 int num_sge = ibwr->num_sge;
631 struct rxe_sq *sq = &qp->sq;
632
633 if (unlikely(num_sge > sq->max_sge))
634 goto err1;
635
636 if (unlikely(mask & WR_ATOMIC_MASK)) {
637 if (length < 8)
638 goto err1;
639
640 if (atomic_wr(ibwr)->remote_addr & 0x7)
641 goto err1;
642 }
643
644 if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
645 (length > sq->max_inline)))
646 goto err1;
647
648 return 0;
649
650err1:
651 return -EINVAL;
652}
653
654static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
655 struct ib_send_wr *ibwr)
656{
657 wr->wr_id = ibwr->wr_id;
658 wr->num_sge = ibwr->num_sge;
659 wr->opcode = ibwr->opcode;
660 wr->send_flags = ibwr->send_flags;
661
662 if (qp_type(qp) == IB_QPT_UD ||
663 qp_type(qp) == IB_QPT_SMI ||
664 qp_type(qp) == IB_QPT_GSI) {
665 wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn;
666 wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey;
667 if (qp_type(qp) == IB_QPT_GSI)
668 wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;
669 if (wr->opcode == IB_WR_SEND_WITH_IMM)
670 wr->ex.imm_data = ibwr->ex.imm_data;
671 } else {
672 switch (wr->opcode) {
673 case IB_WR_RDMA_WRITE_WITH_IMM:
674 wr->ex.imm_data = ibwr->ex.imm_data;
675 case IB_WR_RDMA_READ:
676 case IB_WR_RDMA_WRITE:
677 wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
678 wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey;
679 break;
680 case IB_WR_SEND_WITH_IMM:
681 wr->ex.imm_data = ibwr->ex.imm_data;
682 break;
683 case IB_WR_SEND_WITH_INV:
684 wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
685 break;
686 case IB_WR_ATOMIC_CMP_AND_SWP:
687 case IB_WR_ATOMIC_FETCH_AND_ADD:
688 wr->wr.atomic.remote_addr =
689 atomic_wr(ibwr)->remote_addr;
690 wr->wr.atomic.compare_add =
691 atomic_wr(ibwr)->compare_add;
692 wr->wr.atomic.swap = atomic_wr(ibwr)->swap;
693 wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey;
694 break;
695 case IB_WR_LOCAL_INV:
696 wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
697 break;
698 case IB_WR_REG_MR:
699 wr->wr.reg.mr = reg_wr(ibwr)->mr;
700 wr->wr.reg.key = reg_wr(ibwr)->key;
701 wr->wr.reg.access = reg_wr(ibwr)->access;
702 break;
703 default:
704 break;
705 }
706 }
707}
708
709static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
710 unsigned int mask, unsigned int length,
711 struct rxe_send_wqe *wqe)
712{
713 int num_sge = ibwr->num_sge;
714 struct ib_sge *sge;
715 int i;
716 u8 *p;
717
718 init_send_wr(qp, &wqe->wr, ibwr);
719
720 if (qp_type(qp) == IB_QPT_UD ||
721 qp_type(qp) == IB_QPT_SMI ||
722 qp_type(qp) == IB_QPT_GSI)
723 memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av));
724
725 if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) {
726 p = wqe->dma.inline_data;
727
728 sge = ibwr->sg_list;
729 for (i = 0; i < num_sge; i++, sge++) {
730 if (qp->is_user && copy_from_user(p, (__user void *)
731 (uintptr_t)sge->addr, sge->length))
732 return -EFAULT;
733
734 else if (!qp->is_user)
735 memcpy(p, (void *)(uintptr_t)sge->addr,
736 sge->length);
737
738 p += sge->length;
739 }
740 } else if (mask & WR_REG_MASK) {
741 wqe->mask = mask;
742 wqe->state = wqe_state_posted;
743 return 0;
744 } else
745 memcpy(wqe->dma.sge, ibwr->sg_list,
746 num_sge * sizeof(struct ib_sge));
747
748 wqe->iova = (mask & WR_ATOMIC_MASK) ?
749 atomic_wr(ibwr)->remote_addr :
750 rdma_wr(ibwr)->remote_addr;
751 wqe->mask = mask;
752 wqe->dma.length = length;
753 wqe->dma.resid = length;
754 wqe->dma.num_sge = num_sge;
755 wqe->dma.cur_sge = 0;
756 wqe->dma.sge_offset = 0;
757 wqe->state = wqe_state_posted;
758 wqe->ssn = atomic_add_return(1, &qp->ssn);
759
760 return 0;
761}
762
763static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr,
764 unsigned mask, u32 length)
765{
766 int err;
767 struct rxe_sq *sq = &qp->sq;
768 struct rxe_send_wqe *send_wqe;
769 unsigned long flags;
770
771 err = validate_send_wr(qp, ibwr, mask, length);
772 if (err)
773 return err;
774
775 spin_lock_irqsave(&qp->sq.sq_lock, flags);
776
777 if (unlikely(queue_full(sq->queue))) {
778 err = -ENOMEM;
779 goto err1;
780 }
781
782 send_wqe = producer_addr(sq->queue);
783
784 err = init_send_wqe(qp, ibwr, mask, length, send_wqe);
785 if (unlikely(err))
786 goto err1;
787
788 /*
789 * make sure all changes to the work queue are
790 * written before we update the producer pointer
791 */
792 smp_wmb();
793
794 advance_producer(sq->queue);
795 spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
796
797 return 0;
798
799err1:
800 spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
801 return err;
802}
803
804static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
805 struct ib_send_wr **bad_wr)
806{
807 int err = 0;
808 struct rxe_qp *qp = to_rqp(ibqp);
809 unsigned int mask;
810 unsigned int length = 0;
811 int i;
812 int must_sched;
813
814 if (unlikely(!qp->valid)) {
815 *bad_wr = wr;
816 return -EINVAL;
817 }
818
819 if (unlikely(qp->req.state < QP_STATE_READY)) {
820 *bad_wr = wr;
821 return -EINVAL;
822 }
823
824 while (wr) {
825 mask = wr_opcode_mask(wr->opcode, qp);
826 if (unlikely(!mask)) {
827 err = -EINVAL;
828 *bad_wr = wr;
829 break;
830 }
831
832 if (unlikely((wr->send_flags & IB_SEND_INLINE) &&
833 !(mask & WR_INLINE_MASK))) {
834 err = -EINVAL;
835 *bad_wr = wr;
836 break;
837 }
838
839 length = 0;
840 for (i = 0; i < wr->num_sge; i++)
841 length += wr->sg_list[i].length;
842
843 err = post_one_send(qp, wr, mask, length);
844
845 if (err) {
846 *bad_wr = wr;
847 break;
848 }
849 wr = wr->next;
850 }
851
852 /*
853 * Must sched in case of GSI QP because ib_send_mad() hold irq lock,
854 * and the requester call ip_local_out_sk() that takes spin_lock_bh.
855 */
856 must_sched = (qp_type(qp) == IB_QPT_GSI) ||
857 (queue_count(qp->sq.queue) > 1);
858
859 rxe_run_task(&qp->req.task, must_sched);
860
861 return err;
862}
863
864static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
865 struct ib_recv_wr **bad_wr)
866{
867 int err = 0;
868 struct rxe_qp *qp = to_rqp(ibqp);
869 struct rxe_rq *rq = &qp->rq;
870 unsigned long flags;
871
872 if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
873 *bad_wr = wr;
874 err = -EINVAL;
875 goto err1;
876 }
877
878 if (unlikely(qp->srq)) {
879 *bad_wr = wr;
880 err = -EINVAL;
881 goto err1;
882 }
883
884 spin_lock_irqsave(&rq->producer_lock, flags);
885
886 while (wr) {
887 err = post_one_recv(rq, wr);
888 if (unlikely(err)) {
889 *bad_wr = wr;
890 break;
891 }
892 wr = wr->next;
893 }
894
895 spin_unlock_irqrestore(&rq->producer_lock, flags);
896
897err1:
898 return err;
899}
900
901static struct ib_cq *rxe_create_cq(struct ib_device *dev,
902 const struct ib_cq_init_attr *attr,
903 struct ib_ucontext *context,
904 struct ib_udata *udata)
905{
906 int err;
907 struct rxe_dev *rxe = to_rdev(dev);
908 struct rxe_cq *cq;
909
910 if (attr->flags)
911 return ERR_PTR(-EINVAL);
912
913 err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector, udata);
914 if (err)
915 goto err1;
916
917 cq = rxe_alloc(&rxe->cq_pool);
918 if (!cq) {
919 err = -ENOMEM;
920 goto err1;
921 }
922
923 err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector,
924 context, udata);
925 if (err)
926 goto err2;
927
928 return &cq->ibcq;
929
930err2:
931 rxe_drop_ref(cq);
932err1:
933 return ERR_PTR(err);
934}
935
936static int rxe_destroy_cq(struct ib_cq *ibcq)
937{
938 struct rxe_cq *cq = to_rcq(ibcq);
939
940 rxe_drop_ref(cq);
941 return 0;
942}
943
944static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
945{
946 int err;
947 struct rxe_cq *cq = to_rcq(ibcq);
948 struct rxe_dev *rxe = to_rdev(ibcq->device);
949
950 err = rxe_cq_chk_attr(rxe, cq, cqe, 0, udata);
951 if (err)
952 goto err1;
953
954 err = rxe_cq_resize_queue(cq, cqe, udata);
955 if (err)
956 goto err1;
957
958 return 0;
959
960err1:
961 return err;
962}
963
964static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
965{
966 int i;
967 struct rxe_cq *cq = to_rcq(ibcq);
968 struct rxe_cqe *cqe;
969 unsigned long flags;
970
971 spin_lock_irqsave(&cq->cq_lock, flags);
972 for (i = 0; i < num_entries; i++) {
973 cqe = queue_head(cq->queue);
974 if (!cqe)
975 break;
976
977 memcpy(wc++, &cqe->ibwc, sizeof(*wc));
978 advance_consumer(cq->queue);
979 }
980 spin_unlock_irqrestore(&cq->cq_lock, flags);
981
982 return i;
983}
984
985static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
986{
987 struct rxe_cq *cq = to_rcq(ibcq);
988 int count = queue_count(cq->queue);
989
990 return (count > wc_cnt) ? wc_cnt : count;
991}
992
993static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
994{
995 struct rxe_cq *cq = to_rcq(ibcq);
996
997 if (cq->notify != IB_CQ_NEXT_COMP)
998 cq->notify = flags & IB_CQ_SOLICITED_MASK;
999
1000 return 0;
1001}
1002
1003static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
1004{
1005 struct rxe_dev *rxe = to_rdev(ibpd->device);
1006 struct rxe_pd *pd = to_rpd(ibpd);
1007 struct rxe_mem *mr;
1008 int err;
1009
1010 mr = rxe_alloc(&rxe->mr_pool);
1011 if (!mr) {
1012 err = -ENOMEM;
1013 goto err1;
1014 }
1015
1016 rxe_add_index(mr);
1017
1018 rxe_add_ref(pd);
1019
1020 err = rxe_mem_init_dma(rxe, pd, access, mr);
1021 if (err)
1022 goto err2;
1023
1024 return &mr->ibmr;
1025
1026err2:
1027 rxe_drop_ref(pd);
1028 rxe_drop_index(mr);
1029 rxe_drop_ref(mr);
1030err1:
1031 return ERR_PTR(err);
1032}
1033
1034static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
1035 u64 start,
1036 u64 length,
1037 u64 iova,
1038 int access, struct ib_udata *udata)
1039{
1040 int err;
1041 struct rxe_dev *rxe = to_rdev(ibpd->device);
1042 struct rxe_pd *pd = to_rpd(ibpd);
1043 struct rxe_mem *mr;
1044
1045 mr = rxe_alloc(&rxe->mr_pool);
1046 if (!mr) {
1047 err = -ENOMEM;
1048 goto err2;
1049 }
1050
1051 rxe_add_index(mr);
1052
1053 rxe_add_ref(pd);
1054
1055 err = rxe_mem_init_user(rxe, pd, start, length, iova,
1056 access, udata, mr);
1057 if (err)
1058 goto err3;
1059
1060 return &mr->ibmr;
1061
1062err3:
1063 rxe_drop_ref(pd);
1064 rxe_drop_index(mr);
1065 rxe_drop_ref(mr);
1066err2:
1067 return ERR_PTR(err);
1068}
1069
1070static int rxe_dereg_mr(struct ib_mr *ibmr)
1071{
1072 struct rxe_mem *mr = to_rmr(ibmr);
1073
1074 mr->state = RXE_MEM_STATE_ZOMBIE;
1075 rxe_drop_ref(mr->pd);
1076 rxe_drop_index(mr);
1077 rxe_drop_ref(mr);
1078 return 0;
1079}
1080
1081static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd,
1082 enum ib_mr_type mr_type,
1083 u32 max_num_sg)
1084{
1085 struct rxe_dev *rxe = to_rdev(ibpd->device);
1086 struct rxe_pd *pd = to_rpd(ibpd);
1087 struct rxe_mem *mr;
1088 int err;
1089
1090 if (mr_type != IB_MR_TYPE_MEM_REG)
1091 return ERR_PTR(-EINVAL);
1092
1093 mr = rxe_alloc(&rxe->mr_pool);
1094 if (!mr) {
1095 err = -ENOMEM;
1096 goto err1;
1097 }
1098
1099 rxe_add_index(mr);
1100
1101 rxe_add_ref(pd);
1102
1103 err = rxe_mem_init_fast(rxe, pd, max_num_sg, mr);
1104 if (err)
1105 goto err2;
1106
1107 return &mr->ibmr;
1108
1109err2:
1110 rxe_drop_ref(pd);
1111 rxe_drop_index(mr);
1112 rxe_drop_ref(mr);
1113err1:
1114 return ERR_PTR(err);
1115}
1116
1117static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
1118{
1119 struct rxe_mem *mr = to_rmr(ibmr);
1120 struct rxe_map *map;
1121 struct rxe_phys_buf *buf;
1122
1123 if (unlikely(mr->nbuf == mr->num_buf))
1124 return -ENOMEM;
1125
1126 map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
1127 buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
1128
1129 buf->addr = addr;
1130 buf->size = ibmr->page_size;
1131 mr->nbuf++;
1132
1133 return 0;
1134}
1135
1136static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1137 unsigned int *sg_offset)
1138{
1139 struct rxe_mem *mr = to_rmr(ibmr);
1140 int n;
1141
1142 mr->nbuf = 0;
1143
1144 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);
1145
1146 mr->va = ibmr->iova;
1147 mr->iova = ibmr->iova;
1148 mr->length = ibmr->length;
1149 mr->page_shift = ilog2(ibmr->page_size);
1150 mr->page_mask = ibmr->page_size - 1;
1151 mr->offset = mr->iova & mr->page_mask;
1152
1153 return n;
1154}
1155
1156static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1157{
1158 int err;
1159 struct rxe_dev *rxe = to_rdev(ibqp->device);
1160 struct rxe_qp *qp = to_rqp(ibqp);
1161 struct rxe_mc_grp *grp;
1162
1163 /* takes a ref on grp if successful */
1164 err = rxe_mcast_get_grp(rxe, mgid, &grp);
1165 if (err)
1166 return err;
1167
1168 err = rxe_mcast_add_grp_elem(rxe, qp, grp);
1169
1170 rxe_drop_ref(grp);
1171 return err;
1172}
1173
1174static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1175{
1176 struct rxe_dev *rxe = to_rdev(ibqp->device);
1177 struct rxe_qp *qp = to_rqp(ibqp);
1178
1179 return rxe_mcast_drop_grp_elem(rxe, qp, mgid);
1180}
1181
1182static ssize_t rxe_show_parent(struct device *device,
1183 struct device_attribute *attr, char *buf)
1184{
1185 struct rxe_dev *rxe = container_of(device, struct rxe_dev,
1186 ib_dev.dev);
1187 char *name;
1188
1189 name = rxe->ifc_ops->parent_name(rxe, 1);
1190 return snprintf(buf, 16, "%s\n", name);
1191}
1192
1193static DEVICE_ATTR(parent, S_IRUGO, rxe_show_parent, NULL);
1194
1195static struct device_attribute *rxe_dev_attributes[] = {
1196 &dev_attr_parent,
1197};
1198
1199int rxe_register_device(struct rxe_dev *rxe)
1200{
1201 int err;
1202 int i;
1203 struct ib_device *dev = &rxe->ib_dev;
1204
1205 strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX);
1206 strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
1207
1208 dev->owner = THIS_MODULE;
1209 dev->node_type = RDMA_NODE_IB_CA;
1210 dev->phys_port_cnt = 1;
1211 dev->num_comp_vectors = RXE_NUM_COMP_VECTORS;
1212 dev->dma_device = rxe->ifc_ops->dma_device(rxe);
1213 dev->local_dma_lkey = 0;
1214 dev->node_guid = rxe->ifc_ops->node_guid(rxe);
1215 dev->dma_ops = &rxe_dma_mapping_ops;
1216
1217 dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
1218 dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
1219 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)
1220 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE)
1221 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT)
1222 | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD)
1223 | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD)
1224 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ)
1225 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ)
1226 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ)
1227 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ)
1228 | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV)
1229 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP)
1230 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP)
1231 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP)
1232 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP)
1233 | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND)
1234 | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV)
1235 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ)
1236 | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ)
1237 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ)
1238 | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ)
1239 | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ)
1240 | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)
1241 | BIT_ULL(IB_USER_VERBS_CMD_REG_MR)
1242 | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR)
1243 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH)
1244 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH)
1245 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH)
1246 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH)
1247 | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST)
1248 | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST)
1249 ;
1250
1251 dev->query_device = rxe_query_device;
1252 dev->modify_device = rxe_modify_device;
1253 dev->query_port = rxe_query_port;
1254 dev->modify_port = rxe_modify_port;
1255 dev->get_link_layer = rxe_get_link_layer;
1256 dev->query_gid = rxe_query_gid;
1257 dev->get_netdev = rxe_get_netdev;
1258 dev->add_gid = rxe_add_gid;
1259 dev->del_gid = rxe_del_gid;
1260 dev->query_pkey = rxe_query_pkey;
1261 dev->alloc_ucontext = rxe_alloc_ucontext;
1262 dev->dealloc_ucontext = rxe_dealloc_ucontext;
1263 dev->mmap = rxe_mmap;
1264 dev->get_port_immutable = rxe_port_immutable;
1265 dev->alloc_pd = rxe_alloc_pd;
1266 dev->dealloc_pd = rxe_dealloc_pd;
1267 dev->create_ah = rxe_create_ah;
1268 dev->modify_ah = rxe_modify_ah;
1269 dev->query_ah = rxe_query_ah;
1270 dev->destroy_ah = rxe_destroy_ah;
1271 dev->create_srq = rxe_create_srq;
1272 dev->modify_srq = rxe_modify_srq;
1273 dev->query_srq = rxe_query_srq;
1274 dev->destroy_srq = rxe_destroy_srq;
1275 dev->post_srq_recv = rxe_post_srq_recv;
1276 dev->create_qp = rxe_create_qp;
1277 dev->modify_qp = rxe_modify_qp;
1278 dev->query_qp = rxe_query_qp;
1279 dev->destroy_qp = rxe_destroy_qp;
1280 dev->post_send = rxe_post_send;
1281 dev->post_recv = rxe_post_recv;
1282 dev->create_cq = rxe_create_cq;
1283 dev->destroy_cq = rxe_destroy_cq;
1284 dev->resize_cq = rxe_resize_cq;
1285 dev->poll_cq = rxe_poll_cq;
1286 dev->peek_cq = rxe_peek_cq;
1287 dev->req_notify_cq = rxe_req_notify_cq;
1288 dev->get_dma_mr = rxe_get_dma_mr;
1289 dev->reg_user_mr = rxe_reg_user_mr;
1290 dev->dereg_mr = rxe_dereg_mr;
1291 dev->alloc_mr = rxe_alloc_mr;
1292 dev->map_mr_sg = rxe_map_mr_sg;
1293 dev->attach_mcast = rxe_attach_mcast;
1294 dev->detach_mcast = rxe_detach_mcast;
1295
1296 err = ib_register_device(dev, NULL);
1297 if (err) {
1298 pr_warn("rxe_register_device failed, err = %d\n", err);
1299 goto err1;
1300 }
1301
1302 for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) {
1303 err = device_create_file(&dev->dev, rxe_dev_attributes[i]);
1304 if (err) {
1305 pr_warn("device_create_file failed, i = %d, err = %d\n",
1306 i, err);
1307 goto err2;
1308 }
1309 }
1310
1311 return 0;
1312
1313err2:
1314 ib_unregister_device(dev);
1315err1:
1316 return err;
1317}
1318
1319int rxe_unregister_device(struct rxe_dev *rxe)
1320{
1321 int i;
1322 struct ib_device *dev = &rxe->ib_dev;
1323
1324 for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i)
1325 device_remove_file(&dev->dev, rxe_dev_attributes[i]);
1326
1327 ib_unregister_device(dev);
1328
1329 return 0;
1330}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
new file mode 100644
index 000000000000..cac1d52a08f0
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -0,0 +1,480 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef RXE_VERBS_H
35#define RXE_VERBS_H
36
37#include <linux/interrupt.h>
38#include <rdma/rdma_user_rxe.h>
39#include "rxe_pool.h"
40#include "rxe_task.h"
41
42static inline int pkey_match(u16 key1, u16 key2)
43{
44 return (((key1 & 0x7fff) != 0) &&
45 ((key1 & 0x7fff) == (key2 & 0x7fff)) &&
46 ((key1 & 0x8000) || (key2 & 0x8000))) ? 1 : 0;
47}
48
49/* Return >0 if psn_a > psn_b
50 * 0 if psn_a == psn_b
51 * <0 if psn_a < psn_b
52 */
53static inline int psn_compare(u32 psn_a, u32 psn_b)
54{
55 s32 diff;
56
57 diff = (psn_a - psn_b) << 8;
58 return diff;
59}
60
61struct rxe_ucontext {
62 struct rxe_pool_entry pelem;
63 struct ib_ucontext ibuc;
64};
65
66struct rxe_pd {
67 struct rxe_pool_entry pelem;
68 struct ib_pd ibpd;
69};
70
71struct rxe_ah {
72 struct rxe_pool_entry pelem;
73 struct ib_ah ibah;
74 struct rxe_pd *pd;
75 struct rxe_av av;
76};
77
78struct rxe_cqe {
79 union {
80 struct ib_wc ibwc;
81 struct ib_uverbs_wc uibwc;
82 };
83};
84
85struct rxe_cq {
86 struct rxe_pool_entry pelem;
87 struct ib_cq ibcq;
88 struct rxe_queue *queue;
89 spinlock_t cq_lock;
90 u8 notify;
91 int is_user;
92 struct tasklet_struct comp_task;
93};
94
95enum wqe_state {
96 wqe_state_posted,
97 wqe_state_processing,
98 wqe_state_pending,
99 wqe_state_done,
100 wqe_state_error,
101};
102
103struct rxe_sq {
104 int max_wr;
105 int max_sge;
106 int max_inline;
107 spinlock_t sq_lock; /* guard queue */
108 struct rxe_queue *queue;
109};
110
111struct rxe_rq {
112 int max_wr;
113 int max_sge;
114 spinlock_t producer_lock; /* guard queue producer */
115 spinlock_t consumer_lock; /* guard queue consumer */
116 struct rxe_queue *queue;
117};
118
119struct rxe_srq {
120 struct rxe_pool_entry pelem;
121 struct ib_srq ibsrq;
122 struct rxe_pd *pd;
123 struct rxe_rq rq;
124 u32 srq_num;
125
126 int limit;
127 int error;
128};
129
130enum rxe_qp_state {
131 QP_STATE_RESET,
132 QP_STATE_INIT,
133 QP_STATE_READY,
134 QP_STATE_DRAIN, /* req only */
135 QP_STATE_DRAINED, /* req only */
136 QP_STATE_ERROR
137};
138
139extern char *rxe_qp_state_name[];
140
141struct rxe_req_info {
142 enum rxe_qp_state state;
143 int wqe_index;
144 u32 psn;
145 int opcode;
146 atomic_t rd_atomic;
147 int wait_fence;
148 int need_rd_atomic;
149 int wait_psn;
150 int need_retry;
151 int noack_pkts;
152 struct rxe_task task;
153};
154
155struct rxe_comp_info {
156 u32 psn;
157 int opcode;
158 int timeout;
159 int timeout_retry;
160 u32 retry_cnt;
161 u32 rnr_retry;
162 struct rxe_task task;
163};
164
165enum rdatm_res_state {
166 rdatm_res_state_next,
167 rdatm_res_state_new,
168 rdatm_res_state_replay,
169};
170
171struct resp_res {
172 int type;
173 u32 first_psn;
174 u32 last_psn;
175 u32 cur_psn;
176 enum rdatm_res_state state;
177
178 union {
179 struct {
180 struct sk_buff *skb;
181 } atomic;
182 struct {
183 struct rxe_mem *mr;
184 u64 va_org;
185 u32 rkey;
186 u32 length;
187 u64 va;
188 u32 resid;
189 } read;
190 };
191};
192
193struct rxe_resp_info {
194 enum rxe_qp_state state;
195 u32 msn;
196 u32 psn;
197 int opcode;
198 int drop_msg;
199 int goto_error;
200 int sent_psn_nak;
201 enum ib_wc_status status;
202 u8 aeth_syndrome;
203
204 /* Receive only */
205 struct rxe_recv_wqe *wqe;
206
207 /* RDMA read / atomic only */
208 u64 va;
209 struct rxe_mem *mr;
210 u32 resid;
211 u32 rkey;
212 u64 atomic_orig;
213
214 /* SRQ only */
215 struct {
216 struct rxe_recv_wqe wqe;
217 struct ib_sge sge[RXE_MAX_SGE];
218 } srq_wqe;
219
220 /* Responder resources. It's a circular list where the oldest
221 * resource is dropped first.
222 */
223 struct resp_res *resources;
224 unsigned int res_head;
225 unsigned int res_tail;
226 struct resp_res *res;
227 struct rxe_task task;
228};
229
230struct rxe_qp {
231 struct rxe_pool_entry pelem;
232 struct ib_qp ibqp;
233 struct ib_qp_attr attr;
234 unsigned int valid;
235 unsigned int mtu;
236 int is_user;
237
238 struct rxe_pd *pd;
239 struct rxe_srq *srq;
240 struct rxe_cq *scq;
241 struct rxe_cq *rcq;
242
243 enum ib_sig_type sq_sig_type;
244
245 struct rxe_sq sq;
246 struct rxe_rq rq;
247
248 struct socket *sk;
249
250 struct rxe_av pri_av;
251 struct rxe_av alt_av;
252
253 /* list of mcast groups qp has joined (for cleanup) */
254 struct list_head grp_list;
255 spinlock_t grp_lock; /* guard grp_list */
256
257 struct sk_buff_head req_pkts;
258 struct sk_buff_head resp_pkts;
259 struct sk_buff_head send_pkts;
260
261 struct rxe_req_info req;
262 struct rxe_comp_info comp;
263 struct rxe_resp_info resp;
264
265 atomic_t ssn;
266 atomic_t skb_out;
267 int need_req_skb;
268
269 /* Timer for retranmitting packet when ACKs have been lost. RC
270 * only. The requester sets it when it is not already
271 * started. The responder resets it whenever an ack is
272 * received.
273 */
274 struct timer_list retrans_timer;
275 u64 qp_timeout_jiffies;
276
277 /* Timer for handling RNR NAKS. */
278 struct timer_list rnr_nak_timer;
279
280 spinlock_t state_lock; /* guard requester and completer */
281};
282
283enum rxe_mem_state {
284 RXE_MEM_STATE_ZOMBIE,
285 RXE_MEM_STATE_INVALID,
286 RXE_MEM_STATE_FREE,
287 RXE_MEM_STATE_VALID,
288};
289
290enum rxe_mem_type {
291 RXE_MEM_TYPE_NONE,
292 RXE_MEM_TYPE_DMA,
293 RXE_MEM_TYPE_MR,
294 RXE_MEM_TYPE_FMR,
295 RXE_MEM_TYPE_MW,
296};
297
298#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct rxe_phys_buf))
299
300struct rxe_phys_buf {
301 u64 addr;
302 u64 size;
303};
304
305struct rxe_map {
306 struct rxe_phys_buf buf[RXE_BUF_PER_MAP];
307};
308
309struct rxe_mem {
310 struct rxe_pool_entry pelem;
311 union {
312 struct ib_mr ibmr;
313 struct ib_mw ibmw;
314 };
315
316 struct rxe_pd *pd;
317 struct ib_umem *umem;
318
319 u32 lkey;
320 u32 rkey;
321
322 enum rxe_mem_state state;
323 enum rxe_mem_type type;
324 u64 va;
325 u64 iova;
326 size_t length;
327 u32 offset;
328 int access;
329
330 int page_shift;
331 int page_mask;
332 int map_shift;
333 int map_mask;
334
335 u32 num_buf;
336 u32 nbuf;
337
338 u32 max_buf;
339 u32 num_map;
340
341 struct rxe_map **map;
342};
343
344struct rxe_mc_grp {
345 struct rxe_pool_entry pelem;
346 spinlock_t mcg_lock; /* guard group */
347 struct rxe_dev *rxe;
348 struct list_head qp_list;
349 union ib_gid mgid;
350 int num_qp;
351 u32 qkey;
352 u16 pkey;
353};
354
355struct rxe_mc_elem {
356 struct rxe_pool_entry pelem;
357 struct list_head qp_list;
358 struct list_head grp_list;
359 struct rxe_qp *qp;
360 struct rxe_mc_grp *grp;
361};
362
363struct rxe_port {
364 struct ib_port_attr attr;
365 u16 *pkey_tbl;
366 __be64 port_guid;
367 __be64 subnet_prefix;
368 spinlock_t port_lock; /* guard port */
369 unsigned int mtu_cap;
370 /* special QPs */
371 u32 qp_smi_index;
372 u32 qp_gsi_index;
373};
374
375/* callbacks from rdma_rxe to network interface layer */
376struct rxe_ifc_ops {
377 void (*release)(struct rxe_dev *rxe);
378 __be64 (*node_guid)(struct rxe_dev *rxe);
379 __be64 (*port_guid)(struct rxe_dev *rxe);
380 struct device *(*dma_device)(struct rxe_dev *rxe);
381 int (*mcast_add)(struct rxe_dev *rxe, union ib_gid *mgid);
382 int (*mcast_delete)(struct rxe_dev *rxe, union ib_gid *mgid);
383 int (*prepare)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
384 struct sk_buff *skb, u32 *crc);
385 int (*send)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
386 struct sk_buff *skb);
387 int (*loopback)(struct sk_buff *skb);
388 struct sk_buff *(*init_packet)(struct rxe_dev *rxe, struct rxe_av *av,
389 int paylen, struct rxe_pkt_info *pkt);
390 char *(*parent_name)(struct rxe_dev *rxe, unsigned int port_num);
391 enum rdma_link_layer (*link_layer)(struct rxe_dev *rxe,
392 unsigned int port_num);
393};
394
395struct rxe_dev {
396 struct ib_device ib_dev;
397 struct ib_device_attr attr;
398 int max_ucontext;
399 int max_inline_data;
400 struct kref ref_cnt;
401 struct mutex usdev_lock;
402
403 struct rxe_ifc_ops *ifc_ops;
404
405 struct net_device *ndev;
406
407 int xmit_errors;
408
409 struct rxe_pool uc_pool;
410 struct rxe_pool pd_pool;
411 struct rxe_pool ah_pool;
412 struct rxe_pool srq_pool;
413 struct rxe_pool qp_pool;
414 struct rxe_pool cq_pool;
415 struct rxe_pool mr_pool;
416 struct rxe_pool mw_pool;
417 struct rxe_pool mc_grp_pool;
418 struct rxe_pool mc_elem_pool;
419
420 spinlock_t pending_lock; /* guard pending_mmaps */
421 struct list_head pending_mmaps;
422
423 spinlock_t mmap_offset_lock; /* guard mmap_offset */
424 int mmap_offset;
425
426 struct rxe_port port;
427 struct list_head list;
428};
429
430static inline struct rxe_dev *to_rdev(struct ib_device *dev)
431{
432 return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL;
433}
434
435static inline struct rxe_ucontext *to_ruc(struct ib_ucontext *uc)
436{
437 return uc ? container_of(uc, struct rxe_ucontext, ibuc) : NULL;
438}
439
440static inline struct rxe_pd *to_rpd(struct ib_pd *pd)
441{
442 return pd ? container_of(pd, struct rxe_pd, ibpd) : NULL;
443}
444
445static inline struct rxe_ah *to_rah(struct ib_ah *ah)
446{
447 return ah ? container_of(ah, struct rxe_ah, ibah) : NULL;
448}
449
450static inline struct rxe_srq *to_rsrq(struct ib_srq *srq)
451{
452 return srq ? container_of(srq, struct rxe_srq, ibsrq) : NULL;
453}
454
455static inline struct rxe_qp *to_rqp(struct ib_qp *qp)
456{
457 return qp ? container_of(qp, struct rxe_qp, ibqp) : NULL;
458}
459
460static inline struct rxe_cq *to_rcq(struct ib_cq *cq)
461{
462 return cq ? container_of(cq, struct rxe_cq, ibcq) : NULL;
463}
464
465static inline struct rxe_mem *to_rmr(struct ib_mr *mr)
466{
467 return mr ? container_of(mr, struct rxe_mem, ibmr) : NULL;
468}
469
470static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
471{
472 return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL;
473}
474
475int rxe_register_device(struct rxe_dev *rxe);
476int rxe_unregister_device(struct rxe_dev *rxe);
477
478void rxe_mc_cleanup(void *arg);
479
480#endif /* RXE_VERBS_H */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 1502199c8e56..7b6d40ff1acf 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -62,10 +62,8 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
62{ 62{
63 struct ipoib_dev_priv *priv = netdev_priv(netdev); 63 struct ipoib_dev_priv *priv = netdev_priv(netdev);
64 64
65 snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), 65 ib_get_device_fw_str(priv->ca, drvinfo->fw_version,
66 "%d.%d.%d", (int)(priv->ca->attrs.fw_ver >> 32), 66 sizeof(drvinfo->fw_version));
67 (int)(priv->ca->attrs.fw_ver >> 16) & 0xffff,
68 (int)priv->ca->attrs.fw_ver & 0xffff);
69 67
70 strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device), 68 strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
71 sizeof(drvinfo->bus_info)); 69 sizeof(drvinfo->bus_info));
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 5f58c41ef787..74bcaa064226 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1967,8 +1967,7 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
1967 priv->hca_caps = hca->attrs.device_cap_flags; 1967 priv->hca_caps = hca->attrs.device_cap_flags;
1968 1968
1969 if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { 1969 if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
1970 priv->dev->hw_features = NETIF_F_SG | 1970 priv->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
1971 NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
1972 1971
1973 if (priv->hca_caps & IB_DEVICE_UD_TSO) 1972 if (priv->hca_caps & IB_DEVICE_UD_TSO)
1974 priv->dev->hw_features |= NETIF_F_TSO; 1973 priv->dev->hw_features |= NETIF_F_TSO;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 1e7cbbaa15bd..c55ecb2c3736 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -135,7 +135,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
135 .cap = { 135 .cap = {
136 .max_send_wr = ipoib_sendq_size, 136 .max_send_wr = ipoib_sendq_size,
137 .max_recv_wr = ipoib_recvq_size, 137 .max_recv_wr = ipoib_recvq_size,
138 .max_send_sge = 1, 138 .max_send_sge = min_t(u32, priv->ca->attrs.max_sge,
139 MAX_SKB_FRAGS + 1),
139 .max_recv_sge = IPOIB_UD_RX_SG 140 .max_recv_sge = IPOIB_UD_RX_SG
140 }, 141 },
141 .sq_sig_type = IB_SIGNAL_ALL_WR, 142 .sq_sig_type = IB_SIGNAL_ALL_WR,
@@ -205,10 +206,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
205 if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING) 206 if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING)
206 init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; 207 init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
207 208
208 if (dev->features & NETIF_F_SG)
209 init_attr.cap.max_send_sge =
210 min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
211
212 priv->qp = ib_create_qp(priv->pd, &init_attr); 209 priv->qp = ib_create_qp(priv->pd, &init_attr);
213 if (IS_ERR(priv->qp)) { 210 if (IS_ERR(priv->qp)) {
214 printk(KERN_WARNING "%s: failed to create QP\n", ca->name); 211 printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
@@ -234,6 +231,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
234 priv->rx_wr.next = NULL; 231 priv->rx_wr.next = NULL;
235 priv->rx_wr.sg_list = priv->rx_sge; 232 priv->rx_wr.sg_list = priv->rx_sge;
236 233
234 if (init_attr.cap.max_send_sge > 1)
235 dev->features |= NETIF_F_SG;
236
237 priv->max_send_sge = init_attr.cap.max_send_sge; 237 priv->max_send_sge = init_attr.cap.max_send_sge;
238 238
239 return 0; 239 return 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index 4705e2dea423..e0ebe1378cb2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -104,6 +104,8 @@ enum {
104 104
105enum CPL_error { 105enum CPL_error {
106 CPL_ERR_NONE = 0, 106 CPL_ERR_NONE = 0,
107 CPL_ERR_TCAM_PARITY = 1,
108 CPL_ERR_TCAM_MISS = 2,
107 CPL_ERR_TCAM_FULL = 3, 109 CPL_ERR_TCAM_FULL = 3,
108 CPL_ERR_BAD_LENGTH = 15, 110 CPL_ERR_BAD_LENGTH = 15,
109 CPL_ERR_BAD_ROUTE = 18, 111 CPL_ERR_BAD_ROUTE = 18,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index f4497cf4d06d..d728704d0c7b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -721,6 +721,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
721#define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 721#define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98
722#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 722#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0
723#define QUERY_DEV_CAP_ETH_BACKPL_OFFSET 0x9c 723#define QUERY_DEV_CAP_ETH_BACKPL_OFFSET 0x9c
724#define QUERY_DEV_CAP_DIAG_RPRT_PER_PORT 0x9c
724#define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d 725#define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d
725#define QUERY_DEV_CAP_VXLAN 0x9e 726#define QUERY_DEV_CAP_VXLAN 0x9e
726#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0 727#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0
@@ -935,6 +936,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
935 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP; 936 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP;
936 if (field32 & (1 << 7)) 937 if (field32 & (1 << 7))
937 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT; 938 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT;
939 MLX4_GET(field32, outbox, QUERY_DEV_CAP_DIAG_RPRT_PER_PORT);
940 if (field32 & (1 << 17))
941 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT;
938 MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC); 942 MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC);
939 if (field & 1<<6) 943 if (field & 1<<6)
940 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN; 944 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN;
@@ -2457,6 +2461,42 @@ int mlx4_NOP(struct mlx4_dev *dev)
2457 MLX4_CMD_NATIVE); 2461 MLX4_CMD_NATIVE);
2458} 2462}
2459 2463
2464int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier,
2465 const u32 offset[],
2466 u32 value[], size_t array_len, u8 port)
2467{
2468 struct mlx4_cmd_mailbox *mailbox;
2469 u32 *outbox;
2470 size_t i;
2471 int ret;
2472
2473 mailbox = mlx4_alloc_cmd_mailbox(dev);
2474 if (IS_ERR(mailbox))
2475 return PTR_ERR(mailbox);
2476
2477 outbox = mailbox->buf;
2478
2479 ret = mlx4_cmd_box(dev, 0, mailbox->dma, port, op_modifier,
2480 MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A,
2481 MLX4_CMD_NATIVE);
2482 if (ret)
2483 goto out;
2484
2485 for (i = 0; i < array_len; i++) {
2486 if (offset[i] > MLX4_MAILBOX_SIZE) {
2487 ret = -EINVAL;
2488 goto out;
2489 }
2490
2491 MLX4_GET(value[i], outbox, offset[i]);
2492 }
2493
2494out:
2495 mlx4_free_cmd_mailbox(dev, mailbox);
2496 return ret;
2497}
2498EXPORT_SYMBOL(mlx4_query_diag_counters);
2499
2460int mlx4_get_phys_port_id(struct mlx4_dev *dev) 2500int mlx4_get_phys_port_id(struct mlx4_dev *dev)
2461{ 2501{
2462 u8 port; 2502 u8 port;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
index 04bc522605a0..c07f4d01b70e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
@@ -63,12 +63,12 @@ void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
63 complete(&srq->free); 63 complete(&srq->free);
64} 64}
65 65
66static int get_pas_size(void *srqc) 66static int get_pas_size(struct mlx5_srq_attr *in)
67{ 67{
68 u32 log_page_size = MLX5_GET(srqc, srqc, log_page_size) + 12; 68 u32 log_page_size = in->log_page_size + 12;
69 u32 log_srq_size = MLX5_GET(srqc, srqc, log_srq_size); 69 u32 log_srq_size = in->log_size;
70 u32 log_rq_stride = MLX5_GET(srqc, srqc, log_rq_stride); 70 u32 log_rq_stride = in->wqe_shift;
71 u32 page_offset = MLX5_GET(srqc, srqc, page_offset); 71 u32 page_offset = in->page_offset;
72 u32 po_quanta = 1 << (log_page_size - 6); 72 u32 po_quanta = 1 << (log_page_size - 6);
73 u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); 73 u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride);
74 u32 page_size = 1 << log_page_size; 74 u32 page_size = 1 << log_page_size;
@@ -78,57 +78,58 @@ static int get_pas_size(void *srqc)
78 return rq_num_pas * sizeof(u64); 78 return rq_num_pas * sizeof(u64);
79} 79}
80 80
81static void rmpc_srqc_reformat(void *srqc, void *rmpc, bool srqc_to_rmpc) 81static void set_wq(void *wq, struct mlx5_srq_attr *in)
82{ 82{
83 void *wq = MLX5_ADDR_OF(rmpc, rmpc, wq); 83 MLX5_SET(wq, wq, wq_signature, !!(in->flags
84 84 & MLX5_SRQ_FLAG_WQ_SIG));
85 if (srqc_to_rmpc) { 85 MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size);
86 switch (MLX5_GET(srqc, srqc, state)) { 86 MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4);
87 case MLX5_SRQC_STATE_GOOD: 87 MLX5_SET(wq, wq, log_wq_sz, in->log_size);
88 MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); 88 MLX5_SET(wq, wq, page_offset, in->page_offset);
89 break; 89 MLX5_SET(wq, wq, lwm, in->lwm);
90 case MLX5_SRQC_STATE_ERROR: 90 MLX5_SET(wq, wq, pd, in->pd);
91 MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_ERR); 91 MLX5_SET64(wq, wq, dbr_addr, in->db_record);
92 break; 92}
93 default: 93
94 pr_warn("%s: %d: Unknown srq state = 0x%x\n", __func__, 94static void set_srqc(void *srqc, struct mlx5_srq_attr *in)
95 __LINE__, MLX5_GET(srqc, srqc, state)); 95{
96 MLX5_SET(rmpc, rmpc, state, MLX5_GET(srqc, srqc, state)); 96 MLX5_SET(srqc, srqc, wq_signature, !!(in->flags
97 } 97 & MLX5_SRQ_FLAG_WQ_SIG));
98 98 MLX5_SET(srqc, srqc, log_page_size, in->log_page_size);
99 MLX5_SET(wq, wq, wq_signature, MLX5_GET(srqc, srqc, wq_signature)); 99 MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift);
100 MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(srqc, srqc, log_page_size)); 100 MLX5_SET(srqc, srqc, log_srq_size, in->log_size);
101 MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(srqc, srqc, log_rq_stride) + 4); 101 MLX5_SET(srqc, srqc, page_offset, in->page_offset);
102 MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(srqc, srqc, log_srq_size)); 102 MLX5_SET(srqc, srqc, lwm, in->lwm);
103 MLX5_SET(wq, wq, page_offset, MLX5_GET(srqc, srqc, page_offset)); 103 MLX5_SET(srqc, srqc, pd, in->pd);
104 MLX5_SET(wq, wq, lwm, MLX5_GET(srqc, srqc, lwm)); 104 MLX5_SET64(srqc, srqc, dbr_addr, in->db_record);
105 MLX5_SET(wq, wq, pd, MLX5_GET(srqc, srqc, pd)); 105 MLX5_SET(srqc, srqc, xrcd, in->xrcd);
106 MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(srqc, srqc, dbr_addr)); 106 MLX5_SET(srqc, srqc, cqn, in->cqn);
107 } else { 107}
108 switch (MLX5_GET(rmpc, rmpc, state)) { 108
109 case MLX5_RMPC_STATE_RDY: 109static void get_wq(void *wq, struct mlx5_srq_attr *in)
110 MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_GOOD); 110{
111 break; 111 if (MLX5_GET(wq, wq, wq_signature))
112 case MLX5_RMPC_STATE_ERR: 112 in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
113 MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_ERROR); 113 in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz);
114 break; 114 in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4;
115 default: 115 in->log_size = MLX5_GET(wq, wq, log_wq_sz);
116 pr_warn("%s: %d: Unknown rmp state = 0x%x\n", 116 in->page_offset = MLX5_GET(wq, wq, page_offset);
117 __func__, __LINE__, 117 in->lwm = MLX5_GET(wq, wq, lwm);
118 MLX5_GET(rmpc, rmpc, state)); 118 in->pd = MLX5_GET(wq, wq, pd);
119 MLX5_SET(srqc, srqc, state, 119 in->db_record = MLX5_GET64(wq, wq, dbr_addr);
120 MLX5_GET(rmpc, rmpc, state)); 120}
121 } 121
122 122static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
123 MLX5_SET(srqc, srqc, wq_signature, MLX5_GET(wq, wq, wq_signature)); 123{
124 MLX5_SET(srqc, srqc, log_page_size, MLX5_GET(wq, wq, log_wq_pg_sz)); 124 if (MLX5_GET(srqc, srqc, wq_signature))
125 MLX5_SET(srqc, srqc, log_rq_stride, MLX5_GET(wq, wq, log_wq_stride) - 4); 125 in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
126 MLX5_SET(srqc, srqc, log_srq_size, MLX5_GET(wq, wq, log_wq_sz)); 126 in->log_page_size = MLX5_GET(srqc, srqc, log_page_size);
127 MLX5_SET(srqc, srqc, page_offset, MLX5_GET(wq, wq, page_offset)); 127 in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride);
128 MLX5_SET(srqc, srqc, lwm, MLX5_GET(wq, wq, lwm)); 128 in->log_size = MLX5_GET(srqc, srqc, log_srq_size);
129 MLX5_SET(srqc, srqc, pd, MLX5_GET(wq, wq, pd)); 129 in->page_offset = MLX5_GET(srqc, srqc, page_offset);
130 MLX5_SET64(srqc, srqc, dbr_addr, MLX5_GET64(wq, wq, dbr_addr)); 130 in->lwm = MLX5_GET(srqc, srqc, lwm);
131 } 131 in->pd = MLX5_GET(srqc, srqc, pd);
132 in->db_record = MLX5_GET64(srqc, srqc, dbr_addr);
132} 133}
133 134
134struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) 135struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
@@ -149,19 +150,36 @@ struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
149EXPORT_SYMBOL(mlx5_core_get_srq); 150EXPORT_SYMBOL(mlx5_core_get_srq);
150 151
151static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, 152static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
152 struct mlx5_create_srq_mbox_in *in, int inlen) 153 struct mlx5_srq_attr *in)
153{ 154{
154 struct mlx5_create_srq_mbox_out out; 155 u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
156 void *create_in;
157 void *srqc;
158 void *pas;
159 int pas_size;
160 int inlen;
155 int err; 161 int err;
156 162
157 memset(&out, 0, sizeof(out)); 163 pas_size = get_pas_size(in);
164 inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
165 create_in = mlx5_vzalloc(inlen);
166 if (!create_in)
167 return -ENOMEM;
168
169 srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
170 pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
158 171
159 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_SRQ); 172 set_srqc(srqc, in);
173 memcpy(pas, in->pas, pas_size);
160 174
161 err = mlx5_cmd_exec_check_status(dev, (u32 *)in, inlen, (u32 *)(&out), 175 MLX5_SET(create_srq_in, create_in, opcode,
162 sizeof(out)); 176 MLX5_CMD_OP_CREATE_SRQ);
163 177
164 srq->srqn = be32_to_cpu(out.srqn) & 0xffffff; 178 err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out,
179 sizeof(create_out));
180 kvfree(create_in);
181 if (!err)
182 srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
165 183
166 return err; 184 return err;
167} 185}
@@ -169,67 +187,75 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
169static int destroy_srq_cmd(struct mlx5_core_dev *dev, 187static int destroy_srq_cmd(struct mlx5_core_dev *dev,
170 struct mlx5_core_srq *srq) 188 struct mlx5_core_srq *srq)
171{ 189{
172 struct mlx5_destroy_srq_mbox_in in; 190 u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
173 struct mlx5_destroy_srq_mbox_out out; 191 u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
174 192
175 memset(&in, 0, sizeof(in)); 193 MLX5_SET(destroy_srq_in, srq_in, opcode,
176 memset(&out, 0, sizeof(out)); 194 MLX5_CMD_OP_DESTROY_SRQ);
177 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); 195 MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
178 in.srqn = cpu_to_be32(srq->srqn);
179 196
180 return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), 197 return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in),
181 (u32 *)(&out), sizeof(out)); 198 srq_out, sizeof(srq_out));
182} 199}
183 200
184static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, 201static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
185 u16 lwm, int is_srq) 202 u16 lwm, int is_srq)
186{ 203{
187 struct mlx5_arm_srq_mbox_in in; 204 /* arm_srq structs missing using identical xrc ones */
188 struct mlx5_arm_srq_mbox_out out; 205 u32 srq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
189 206 u32 srq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
190 memset(&in, 0, sizeof(in));
191 memset(&out, 0, sizeof(out));
192 207
193 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ); 208 MLX5_SET(arm_xrc_srq_in, srq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
194 in.hdr.opmod = cpu_to_be16(!!is_srq); 209 MLX5_SET(arm_xrc_srq_in, srq_in, xrc_srqn, srq->srqn);
195 in.srqn = cpu_to_be32(srq->srqn); 210 MLX5_SET(arm_xrc_srq_in, srq_in, lwm, lwm);
196 in.lwm = cpu_to_be16(lwm);
197 211
198 return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), 212 return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in),
199 sizeof(in), (u32 *)(&out), 213 srq_out, sizeof(srq_out));
200 sizeof(out));
201} 214}
202 215
203static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, 216static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
204 struct mlx5_query_srq_mbox_out *out) 217 struct mlx5_srq_attr *out)
205{ 218{
206 struct mlx5_query_srq_mbox_in in; 219 u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
220 u32 *srq_out;
221 void *srqc;
222 int err;
207 223
208 memset(&in, 0, sizeof(in)); 224 srq_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_srq_out));
225 if (!srq_out)
226 return -ENOMEM;
209 227
210 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ); 228 MLX5_SET(query_srq_in, srq_in, opcode,
211 in.srqn = cpu_to_be32(srq->srqn); 229 MLX5_CMD_OP_QUERY_SRQ);
230 MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
231 err = mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in),
232 srq_out,
233 MLX5_ST_SZ_BYTES(query_srq_out));
234 if (err)
235 goto out;
212 236
213 return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), 237 srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry);
214 (u32 *)out, sizeof(*out)); 238 get_srqc(srqc, out);
239 if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD)
240 out->flags |= MLX5_SRQ_FLAG_ERR;
241out:
242 kvfree(srq_out);
243 return err;
215} 244}
216 245
217static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, 246static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
218 struct mlx5_core_srq *srq, 247 struct mlx5_core_srq *srq,
219 struct mlx5_create_srq_mbox_in *in, 248 struct mlx5_srq_attr *in)
220 int srq_inlen)
221{ 249{
222 u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; 250 u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
223 void *create_in; 251 void *create_in;
224 void *srqc;
225 void *xrc_srqc; 252 void *xrc_srqc;
226 void *pas; 253 void *pas;
227 int pas_size; 254 int pas_size;
228 int inlen; 255 int inlen;
229 int err; 256 int err;
230 257
231 srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); 258 pas_size = get_pas_size(in);
232 pas_size = get_pas_size(srqc);
233 inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; 259 inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
234 create_in = mlx5_vzalloc(inlen); 260 create_in = mlx5_vzalloc(inlen);
235 if (!create_in) 261 if (!create_in)
@@ -239,7 +265,8 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
239 xrc_srq_context_entry); 265 xrc_srq_context_entry);
240 pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); 266 pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
241 267
242 memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc)); 268 set_srqc(xrc_srqc, in);
269 MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
243 memcpy(pas, in->pas, pas_size); 270 memcpy(pas, in->pas, pas_size);
244 MLX5_SET(create_xrc_srq_in, create_in, opcode, 271 MLX5_SET(create_xrc_srq_in, create_in, opcode,
245 MLX5_CMD_OP_CREATE_XRC_SRQ); 272 MLX5_CMD_OP_CREATE_XRC_SRQ);
@@ -293,11 +320,10 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
293 320
294static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, 321static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
295 struct mlx5_core_srq *srq, 322 struct mlx5_core_srq *srq,
296 struct mlx5_query_srq_mbox_out *out) 323 struct mlx5_srq_attr *out)
297{ 324{
298 u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; 325 u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)];
299 u32 *xrcsrq_out; 326 u32 *xrcsrq_out;
300 void *srqc;
301 void *xrc_srqc; 327 void *xrc_srqc;
302 int err; 328 int err;
303 329
@@ -317,8 +343,9 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
317 343
318 xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, 344 xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out,
319 xrc_srq_context_entry); 345 xrc_srq_context_entry);
320 srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); 346 get_srqc(xrc_srqc, out);
321 memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc)); 347 if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD)
348 out->flags |= MLX5_SRQ_FLAG_ERR;
322 349
323out: 350out:
324 kvfree(xrcsrq_out); 351 kvfree(xrcsrq_out);
@@ -326,26 +353,27 @@ out:
326} 353}
327 354
328static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, 355static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
329 struct mlx5_create_srq_mbox_in *in, int srq_inlen) 356 struct mlx5_srq_attr *in)
330{ 357{
331 void *create_in; 358 void *create_in;
332 void *rmpc; 359 void *rmpc;
333 void *srqc; 360 void *wq;
334 int pas_size; 361 int pas_size;
335 int inlen; 362 int inlen;
336 int err; 363 int err;
337 364
338 srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); 365 pas_size = get_pas_size(in);
339 pas_size = get_pas_size(srqc);
340 inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; 366 inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
341 create_in = mlx5_vzalloc(inlen); 367 create_in = mlx5_vzalloc(inlen);
342 if (!create_in) 368 if (!create_in)
343 return -ENOMEM; 369 return -ENOMEM;
344 370
345 rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); 371 rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
372 wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
346 373
374 MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
375 set_wq(wq, in);
347 memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); 376 memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
348 rmpc_srqc_reformat(srqc, rmpc, true);
349 377
350 err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); 378 err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
351 379
@@ -390,11 +418,10 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev,
390} 418}
391 419
392static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, 420static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
393 struct mlx5_query_srq_mbox_out *out) 421 struct mlx5_srq_attr *out)
394{ 422{
395 u32 *rmp_out; 423 u32 *rmp_out;
396 void *rmpc; 424 void *rmpc;
397 void *srqc;
398 int err; 425 int err;
399 426
400 rmp_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_rmp_out)); 427 rmp_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_rmp_out));
@@ -405,9 +432,10 @@ static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
405 if (err) 432 if (err)
406 goto out; 433 goto out;
407 434
408 srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry);
409 rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); 435 rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context);
410 rmpc_srqc_reformat(srqc, rmpc, false); 436 get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out);
437 if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY)
438 out->flags |= MLX5_SRQ_FLAG_ERR;
411 439
412out: 440out:
413 kvfree(rmp_out); 441 kvfree(rmp_out);
@@ -416,15 +444,14 @@ out:
416 444
417static int create_srq_split(struct mlx5_core_dev *dev, 445static int create_srq_split(struct mlx5_core_dev *dev,
418 struct mlx5_core_srq *srq, 446 struct mlx5_core_srq *srq,
419 struct mlx5_create_srq_mbox_in *in, 447 struct mlx5_srq_attr *in)
420 int inlen, int is_xrc)
421{ 448{
422 if (!dev->issi) 449 if (!dev->issi)
423 return create_srq_cmd(dev, srq, in, inlen); 450 return create_srq_cmd(dev, srq, in);
424 else if (srq->common.res == MLX5_RES_XSRQ) 451 else if (srq->common.res == MLX5_RES_XSRQ)
425 return create_xrc_srq_cmd(dev, srq, in, inlen); 452 return create_xrc_srq_cmd(dev, srq, in);
426 else 453 else
427 return create_rmp_cmd(dev, srq, in, inlen); 454 return create_rmp_cmd(dev, srq, in);
428} 455}
429 456
430static int destroy_srq_split(struct mlx5_core_dev *dev, 457static int destroy_srq_split(struct mlx5_core_dev *dev,
@@ -439,15 +466,17 @@ static int destroy_srq_split(struct mlx5_core_dev *dev,
439} 466}
440 467
441int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, 468int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
442 struct mlx5_create_srq_mbox_in *in, int inlen, 469 struct mlx5_srq_attr *in)
443 int is_xrc)
444{ 470{
445 int err; 471 int err;
446 struct mlx5_srq_table *table = &dev->priv.srq_table; 472 struct mlx5_srq_table *table = &dev->priv.srq_table;
447 473
448 srq->common.res = is_xrc ? MLX5_RES_XSRQ : MLX5_RES_SRQ; 474 if (in->type == IB_SRQT_XRC)
475 srq->common.res = MLX5_RES_XSRQ;
476 else
477 srq->common.res = MLX5_RES_SRQ;
449 478
450 err = create_srq_split(dev, srq, in, inlen, is_xrc); 479 err = create_srq_split(dev, srq, in);
451 if (err) 480 if (err)
452 return err; 481 return err;
453 482
@@ -502,7 +531,7 @@ int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
502EXPORT_SYMBOL(mlx5_core_destroy_srq); 531EXPORT_SYMBOL(mlx5_core_destroy_srq);
503 532
504int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, 533int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
505 struct mlx5_query_srq_mbox_out *out) 534 struct mlx5_srq_attr *out)
506{ 535{
507 if (!dev->issi) 536 if (!dev->issi)
508 return query_srq_cmd(dev, srq, out); 537 return query_srq_cmd(dev, srq, out);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index 03a5093ffeb7..28274a6fbafe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -85,6 +85,7 @@ int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
85 85
86 return err; 86 return err;
87} 87}
88EXPORT_SYMBOL(mlx5_core_create_rq);
88 89
89int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen) 90int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen)
90{ 91{
@@ -110,6 +111,7 @@ void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
110 111
111 mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); 112 mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
112} 113}
114EXPORT_SYMBOL(mlx5_core_destroy_rq);
113 115
114int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out) 116int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
115{ 117{
@@ -430,6 +432,7 @@ int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
430 432
431 return err; 433 return err;
432} 434}
435EXPORT_SYMBOL(mlx5_core_create_rqt);
433 436
434int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, 437int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
435 int inlen) 438 int inlen)
@@ -455,3 +458,4 @@ void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn)
455 458
456 mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); 459 mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
457} 460}
461EXPORT_SYMBOL(mlx5_core_destroy_rqt);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e6f6910278f3..42da3552f7cb 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -220,6 +220,7 @@ enum {
220 MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32, 220 MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32,
221 MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, 221 MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33,
222 MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34, 222 MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34,
223 MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35,
223}; 224};
224 225
225enum { 226enum {
@@ -1342,6 +1343,9 @@ enum {
1342 VXLAN_STEER_BY_INNER_VLAN = 1 << 4, 1343 VXLAN_STEER_BY_INNER_VLAN = 1 << 4,
1343}; 1344};
1344 1345
1346enum {
1347 MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS = 0x2,
1348};
1345 1349
1346int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn, 1350int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn,
1347 enum mlx4_net_trans_promisc_mode mode); 1351 enum mlx4_net_trans_promisc_mode mode);
@@ -1382,6 +1386,9 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
1382int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); 1386int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
1383int mlx4_SYNC_TPT(struct mlx4_dev *dev); 1387int mlx4_SYNC_TPT(struct mlx4_dev *dev);
1384int mlx4_test_interrupts(struct mlx4_dev *dev); 1388int mlx4_test_interrupts(struct mlx4_dev *dev);
1389int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier,
1390 const u32 offset[], u32 value[],
1391 size_t array_len, u8 port);
1385u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port); 1392u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port);
1386bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector); 1393bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector);
1387struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port); 1394struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port);
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 2be976dd4966..2566f6d6444f 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -58,6 +58,8 @@ struct mlx5_core_cq {
58 void (*comp)(struct mlx5_core_cq *); 58 void (*comp)(struct mlx5_core_cq *);
59 void *priv; 59 void *priv;
60 } tasklet_ctx; 60 } tasklet_ctx;
61 int reset_notify_added;
62 struct list_head reset_notify;
61}; 63};
62 64
63 65
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a041b99fceac..ccea6fb16482 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -46,6 +46,7 @@
46 46
47#include <linux/mlx5/device.h> 47#include <linux/mlx5/device.h>
48#include <linux/mlx5/doorbell.h> 48#include <linux/mlx5/doorbell.h>
49#include <linux/mlx5/srq.h>
49 50
50enum { 51enum {
51 MLX5_RQ_BITMASK_VSD = 1 << 1, 52 MLX5_RQ_BITMASK_VSD = 1 << 1,
@@ -798,11 +799,10 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
798void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, 799void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
799 struct mlx5_cmd_mailbox *head); 800 struct mlx5_cmd_mailbox *head);
800int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, 801int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
801 struct mlx5_create_srq_mbox_in *in, int inlen, 802 struct mlx5_srq_attr *in);
802 int is_xrc);
803int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq); 803int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq);
804int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, 804int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
805 struct mlx5_query_srq_mbox_out *out); 805 struct mlx5_srq_attr *out);
806int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, 806int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
807 u16 lwm, int is_srq); 807 u16 lwm, int is_srq);
808void mlx5_init_mkey_table(struct mlx5_core_dev *dev); 808void mlx5_init_mkey_table(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index ab310819ac36..7879bf411891 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -556,9 +556,9 @@ struct mlx5_destroy_qp_mbox_out {
556struct mlx5_modify_qp_mbox_in { 556struct mlx5_modify_qp_mbox_in {
557 struct mlx5_inbox_hdr hdr; 557 struct mlx5_inbox_hdr hdr;
558 __be32 qpn; 558 __be32 qpn;
559 u8 rsvd1[4];
560 __be32 optparam;
561 u8 rsvd0[4]; 559 u8 rsvd0[4];
560 __be32 optparam;
561 u8 rsvd1[4];
562 struct mlx5_qp_context ctx; 562 struct mlx5_qp_context ctx;
563 u8 rsvd2[16]; 563 u8 rsvd2[16];
564}; 564};
diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h
index f43ed054a3e0..33c97dc900f8 100644
--- a/include/linux/mlx5/srq.h
+++ b/include/linux/mlx5/srq.h
@@ -35,6 +35,31 @@
35 35
36#include <linux/mlx5/driver.h> 36#include <linux/mlx5/driver.h>
37 37
38enum {
39 MLX5_SRQ_FLAG_ERR = (1 << 0),
40 MLX5_SRQ_FLAG_WQ_SIG = (1 << 1),
41};
42
43struct mlx5_srq_attr {
44 u32 type;
45 u32 flags;
46 u32 log_size;
47 u32 wqe_shift;
48 u32 log_page_size;
49 u32 wqe_cnt;
50 u32 srqn;
51 u32 xrcd;
52 u32 page_offset;
53 u32 cqn;
54 u32 pd;
55 u32 lwm;
56 u32 user_index;
57 u64 db_record;
58 u64 *pas;
59};
60
61struct mlx5_core_dev;
62
38void mlx5_init_srq_table(struct mlx5_core_dev *dev); 63void mlx5_init_srq_table(struct mlx5_core_dev *dev);
39void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev); 64void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev);
40 65
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 384041669489..5ee7aab95eb8 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -94,6 +94,19 @@ enum ib_sa_selector {
94 IB_SA_BEST = 3 94 IB_SA_BEST = 3
95}; 95};
96 96
97/*
98 * There are 4 types of join states:
99 * FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember.
100 * The order corresponds to JoinState bits in MCMemberRecord.
101 */
102enum ib_sa_mc_join_states {
103 FULLMEMBER_JOIN,
104 NONMEMBER_JOIN,
105 SENDONLY_NONMEBER_JOIN,
106 SENDONLY_FULLMEMBER_JOIN,
107 NUM_JOIN_MEMBERSHIP_TYPES,
108};
109
97#define IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT BIT(12) 110#define IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT BIT(12)
98 111
99/* 112/*
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index a8137dcf5a00..94a0bc5b5bdd 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -562,6 +562,7 @@ enum ib_event_type {
562 IB_EVENT_QP_LAST_WQE_REACHED, 562 IB_EVENT_QP_LAST_WQE_REACHED,
563 IB_EVENT_CLIENT_REREGISTER, 563 IB_EVENT_CLIENT_REREGISTER,
564 IB_EVENT_GID_CHANGE, 564 IB_EVENT_GID_CHANGE,
565 IB_EVENT_WQ_FATAL,
565}; 566};
566 567
567const char *__attribute_const__ ib_event_msg(enum ib_event_type event); 568const char *__attribute_const__ ib_event_msg(enum ib_event_type event);
@@ -572,6 +573,7 @@ struct ib_event {
572 struct ib_cq *cq; 573 struct ib_cq *cq;
573 struct ib_qp *qp; 574 struct ib_qp *qp;
574 struct ib_srq *srq; 575 struct ib_srq *srq;
576 struct ib_wq *wq;
575 u8 port_num; 577 u8 port_num;
576 } element; 578 } element;
577 enum ib_event_type event; 579 enum ib_event_type event;
@@ -1015,6 +1017,7 @@ struct ib_qp_init_attr {
1015 * Only needed for special QP types, or when using the RW API. 1017 * Only needed for special QP types, or when using the RW API.
1016 */ 1018 */
1017 u8 port_num; 1019 u8 port_num;
1020 struct ib_rwq_ind_table *rwq_ind_tbl;
1018}; 1021};
1019 1022
1020struct ib_qp_open_attr { 1023struct ib_qp_open_attr {
@@ -1323,6 +1326,8 @@ struct ib_ucontext {
1323 struct list_head ah_list; 1326 struct list_head ah_list;
1324 struct list_head xrcd_list; 1327 struct list_head xrcd_list;
1325 struct list_head rule_list; 1328 struct list_head rule_list;
1329 struct list_head wq_list;
1330 struct list_head rwq_ind_tbl_list;
1326 int closing; 1331 int closing;
1327 1332
1328 struct pid *tgid; 1333 struct pid *tgid;
@@ -1428,6 +1433,63 @@ struct ib_srq {
1428 } ext; 1433 } ext;
1429}; 1434};
1430 1435
1436enum ib_wq_type {
1437 IB_WQT_RQ
1438};
1439
1440enum ib_wq_state {
1441 IB_WQS_RESET,
1442 IB_WQS_RDY,
1443 IB_WQS_ERR
1444};
1445
1446struct ib_wq {
1447 struct ib_device *device;
1448 struct ib_uobject *uobject;
1449 void *wq_context;
1450 void (*event_handler)(struct ib_event *, void *);
1451 struct ib_pd *pd;
1452 struct ib_cq *cq;
1453 u32 wq_num;
1454 enum ib_wq_state state;
1455 enum ib_wq_type wq_type;
1456 atomic_t usecnt;
1457};
1458
1459struct ib_wq_init_attr {
1460 void *wq_context;
1461 enum ib_wq_type wq_type;
1462 u32 max_wr;
1463 u32 max_sge;
1464 struct ib_cq *cq;
1465 void (*event_handler)(struct ib_event *, void *);
1466};
1467
1468enum ib_wq_attr_mask {
1469 IB_WQ_STATE = 1 << 0,
1470 IB_WQ_CUR_STATE = 1 << 1,
1471};
1472
1473struct ib_wq_attr {
1474 enum ib_wq_state wq_state;
1475 enum ib_wq_state curr_wq_state;
1476};
1477
1478struct ib_rwq_ind_table {
1479 struct ib_device *device;
1480 struct ib_uobject *uobject;
1481 atomic_t usecnt;
1482 u32 ind_tbl_num;
1483 u32 log_ind_tbl_size;
1484 struct ib_wq **ind_tbl;
1485};
1486
1487struct ib_rwq_ind_table_init_attr {
1488 u32 log_ind_tbl_size;
1489 /* Each entry is a pointer to Receive Work Queue */
1490 struct ib_wq **ind_tbl;
1491};
1492
1431struct ib_qp { 1493struct ib_qp {
1432 struct ib_device *device; 1494 struct ib_device *device;
1433 struct ib_pd *pd; 1495 struct ib_pd *pd;
@@ -1450,6 +1512,7 @@ struct ib_qp {
1450 void *qp_context; 1512 void *qp_context;
1451 u32 qp_num; 1513 u32 qp_num;
1452 enum ib_qp_type qp_type; 1514 enum ib_qp_type qp_type;
1515 struct ib_rwq_ind_table *rwq_ind_tbl;
1453}; 1516};
1454 1517
1455struct ib_mr { 1518struct ib_mr {
@@ -1506,6 +1569,7 @@ enum ib_flow_spec_type {
1506 IB_FLOW_SPEC_IB = 0x22, 1569 IB_FLOW_SPEC_IB = 0x22,
1507 /* L3 header*/ 1570 /* L3 header*/
1508 IB_FLOW_SPEC_IPV4 = 0x30, 1571 IB_FLOW_SPEC_IPV4 = 0x30,
1572 IB_FLOW_SPEC_IPV6 = 0x31,
1509 /* L4 headers*/ 1573 /* L4 headers*/
1510 IB_FLOW_SPEC_TCP = 0x40, 1574 IB_FLOW_SPEC_TCP = 0x40,
1511 IB_FLOW_SPEC_UDP = 0x41 1575 IB_FLOW_SPEC_UDP = 0x41
@@ -1567,6 +1631,18 @@ struct ib_flow_spec_ipv4 {
1567 struct ib_flow_ipv4_filter mask; 1631 struct ib_flow_ipv4_filter mask;
1568}; 1632};
1569 1633
1634struct ib_flow_ipv6_filter {
1635 u8 src_ip[16];
1636 u8 dst_ip[16];
1637};
1638
1639struct ib_flow_spec_ipv6 {
1640 enum ib_flow_spec_type type;
1641 u16 size;
1642 struct ib_flow_ipv6_filter val;
1643 struct ib_flow_ipv6_filter mask;
1644};
1645
1570struct ib_flow_tcp_udp_filter { 1646struct ib_flow_tcp_udp_filter {
1571 __be16 dst_port; 1647 __be16 dst_port;
1572 __be16 src_port; 1648 __be16 src_port;
@@ -1588,6 +1664,7 @@ union ib_flow_spec {
1588 struct ib_flow_spec_ib ib; 1664 struct ib_flow_spec_ib ib;
1589 struct ib_flow_spec_ipv4 ipv4; 1665 struct ib_flow_spec_ipv4 ipv4;
1590 struct ib_flow_spec_tcp_udp tcp_udp; 1666 struct ib_flow_spec_tcp_udp tcp_udp;
1667 struct ib_flow_spec_ipv6 ipv6;
1591}; 1668};
1592 1669
1593struct ib_flow_attr { 1670struct ib_flow_attr {
@@ -1921,7 +1998,18 @@ struct ib_device {
1921 struct ifla_vf_stats *stats); 1998 struct ifla_vf_stats *stats);
1922 int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, 1999 int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
1923 int type); 2000 int type);
1924 2001 struct ib_wq * (*create_wq)(struct ib_pd *pd,
2002 struct ib_wq_init_attr *init_attr,
2003 struct ib_udata *udata);
2004 int (*destroy_wq)(struct ib_wq *wq);
2005 int (*modify_wq)(struct ib_wq *wq,
2006 struct ib_wq_attr *attr,
2007 u32 wq_attr_mask,
2008 struct ib_udata *udata);
2009 struct ib_rwq_ind_table * (*create_rwq_ind_table)(struct ib_device *device,
2010 struct ib_rwq_ind_table_init_attr *init_attr,
2011 struct ib_udata *udata);
2012 int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
1925 struct ib_dma_mapping_ops *dma_ops; 2013 struct ib_dma_mapping_ops *dma_ops;
1926 2014
1927 struct module *owner; 2015 struct module *owner;
@@ -1956,6 +2044,7 @@ struct ib_device {
1956 * in fast paths. 2044 * in fast paths.
1957 */ 2045 */
1958 int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *); 2046 int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *);
2047 void (*get_dev_fw_str)(struct ib_device *, char *str, size_t str_len);
1959}; 2048};
1960 2049
1961struct ib_client { 2050struct ib_client {
@@ -1991,6 +2080,8 @@ struct ib_client {
1991struct ib_device *ib_alloc_device(size_t size); 2080struct ib_device *ib_alloc_device(size_t size);
1992void ib_dealloc_device(struct ib_device *device); 2081void ib_dealloc_device(struct ib_device *device);
1993 2082
2083void ib_get_device_fw_str(struct ib_device *device, char *str, size_t str_len);
2084
1994int ib_register_device(struct ib_device *device, 2085int ib_register_device(struct ib_device *device,
1995 int (*port_callback)(struct ib_device *, 2086 int (*port_callback)(struct ib_device *,
1996 u8, struct kobject *)); 2087 u8, struct kobject *));
@@ -3168,6 +3259,15 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
3168struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, 3259struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
3169 u16 pkey, const union ib_gid *gid, 3260 u16 pkey, const union ib_gid *gid,
3170 const struct sockaddr *addr); 3261 const struct sockaddr *addr);
3262struct ib_wq *ib_create_wq(struct ib_pd *pd,
3263 struct ib_wq_init_attr *init_attr);
3264int ib_destroy_wq(struct ib_wq *wq);
3265int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr,
3266 u32 wq_attr_mask);
3267struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
3268 struct ib_rwq_ind_table_init_attr*
3269 wq_ind_table_init_attr);
3270int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
3171 3271
3172int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, 3272int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
3173 unsigned int *sg_offset, unsigned int page_size); 3273 unsigned int *sg_offset, unsigned int page_size);
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index afe44fde72a5..81fb1d15e8bb 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -333,11 +333,13 @@ int rdma_disconnect(struct rdma_cm_id *id);
333 * address. 333 * address.
334 * @id: Communication identifier associated with the request. 334 * @id: Communication identifier associated with the request.
335 * @addr: Multicast address identifying the group to join. 335 * @addr: Multicast address identifying the group to join.
336 * @join_state: Multicast JoinState bitmap requested by port.
337 * Bitmap is based on IB_SA_MCMEMBER_REC_JOIN_STATE bits.
336 * @context: User-defined context associated with the join request, returned 338 * @context: User-defined context associated with the join request, returned
337 * to the user through the private_data pointer in multicast events. 339 * to the user through the private_data pointer in multicast events.
338 */ 340 */
339int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 341int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
340 void *context); 342 u8 join_state, void *context);
341 343
342/** 344/**
343 * rdma_leave_multicast - Leave the multicast group specified by the given 345 * rdma_leave_multicast - Leave the multicast group specified by the given
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index 231901b08f6c..4edb0f2b4f9f 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -6,3 +6,4 @@ header-y += ib_user_verbs.h
6header-y += rdma_netlink.h 6header-y += rdma_netlink.h
7header-y += rdma_user_cm.h 7header-y += rdma_user_cm.h
8header-y += hfi/ 8header-y += hfi/
9header-y += rdma_user_rxe.h
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index b6543d73d20a..7f035f4b53b0 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -95,6 +95,11 @@ enum {
95 IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP, 95 IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP,
96 IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, 96 IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
97 IB_USER_VERBS_EX_CMD_DESTROY_FLOW, 97 IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
98 IB_USER_VERBS_EX_CMD_CREATE_WQ,
99 IB_USER_VERBS_EX_CMD_MODIFY_WQ,
100 IB_USER_VERBS_EX_CMD_DESTROY_WQ,
101 IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL,
102 IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL
98}; 103};
99 104
100/* 105/*
@@ -518,6 +523,14 @@ struct ib_uverbs_create_qp {
518 __u64 driver_data[0]; 523 __u64 driver_data[0];
519}; 524};
520 525
526enum ib_uverbs_create_qp_mask {
527 IB_UVERBS_CREATE_QP_MASK_IND_TABLE = 1UL << 0,
528};
529
530enum {
531 IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE,
532};
533
521struct ib_uverbs_ex_create_qp { 534struct ib_uverbs_ex_create_qp {
522 __u64 user_handle; 535 __u64 user_handle;
523 __u32 pd_handle; 536 __u32 pd_handle;
@@ -535,6 +548,8 @@ struct ib_uverbs_ex_create_qp {
535 __u8 reserved; 548 __u8 reserved;
536 __u32 comp_mask; 549 __u32 comp_mask;
537 __u32 create_flags; 550 __u32 create_flags;
551 __u32 rwq_ind_tbl_handle;
552 __u32 reserved1;
538}; 553};
539 554
540struct ib_uverbs_open_qp { 555struct ib_uverbs_open_qp {
@@ -852,6 +867,24 @@ struct ib_uverbs_flow_spec_tcp_udp {
852 struct ib_uverbs_flow_tcp_udp_filter mask; 867 struct ib_uverbs_flow_tcp_udp_filter mask;
853}; 868};
854 869
870struct ib_uverbs_flow_ipv6_filter {
871 __u8 src_ip[16];
872 __u8 dst_ip[16];
873};
874
875struct ib_uverbs_flow_spec_ipv6 {
876 union {
877 struct ib_uverbs_flow_spec_hdr hdr;
878 struct {
879 __u32 type;
880 __u16 size;
881 __u16 reserved;
882 };
883 };
884 struct ib_uverbs_flow_ipv6_filter val;
885 struct ib_uverbs_flow_ipv6_filter mask;
886};
887
855struct ib_uverbs_flow_attr { 888struct ib_uverbs_flow_attr {
856 __u32 type; 889 __u32 type;
857 __u16 size; 890 __u16 size;
@@ -946,4 +979,66 @@ struct ib_uverbs_destroy_srq_resp {
946 __u32 events_reported; 979 __u32 events_reported;
947}; 980};
948 981
982struct ib_uverbs_ex_create_wq {
983 __u32 comp_mask;
984 __u32 wq_type;
985 __u64 user_handle;
986 __u32 pd_handle;
987 __u32 cq_handle;
988 __u32 max_wr;
989 __u32 max_sge;
990};
991
992struct ib_uverbs_ex_create_wq_resp {
993 __u32 comp_mask;
994 __u32 response_length;
995 __u32 wq_handle;
996 __u32 max_wr;
997 __u32 max_sge;
998 __u32 wqn;
999};
1000
1001struct ib_uverbs_ex_destroy_wq {
1002 __u32 comp_mask;
1003 __u32 wq_handle;
1004};
1005
1006struct ib_uverbs_ex_destroy_wq_resp {
1007 __u32 comp_mask;
1008 __u32 response_length;
1009 __u32 events_reported;
1010 __u32 reserved;
1011};
1012
1013struct ib_uverbs_ex_modify_wq {
1014 __u32 attr_mask;
1015 __u32 wq_handle;
1016 __u32 wq_state;
1017 __u32 curr_wq_state;
1018};
1019
1020/* Prevent memory allocation rather than max expected size */
1021#define IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE 0x0d
1022struct ib_uverbs_ex_create_rwq_ind_table {
1023 __u32 comp_mask;
1024 __u32 log_ind_tbl_size;
1025 /* Following are the wq handles according to log_ind_tbl_size
1026 * wq_handle1
1027 * wq_handle2
1028 */
1029 __u32 wq_handles[0];
1030};
1031
1032struct ib_uverbs_ex_create_rwq_ind_table_resp {
1033 __u32 comp_mask;
1034 __u32 response_length;
1035 __u32 ind_tbl_handle;
1036 __u32 ind_tbl_num;
1037};
1038
1039struct ib_uverbs_ex_destroy_rwq_ind_table {
1040 __u32 comp_mask;
1041 __u32 ind_tbl_handle;
1042};
1043
949#endif /* IB_USER_VERBS_H */ 1044#endif /* IB_USER_VERBS_H */
diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
index 3066718eb120..01923d463673 100644
--- a/include/uapi/rdma/rdma_user_cm.h
+++ b/include/uapi/rdma/rdma_user_cm.h
@@ -244,12 +244,19 @@ struct rdma_ucm_join_ip_mcast {
244 __u32 id; 244 __u32 id;
245}; 245};
246 246
247/* Multicast join flags */
248enum {
249 RDMA_MC_JOIN_FLAG_FULLMEMBER,
250 RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER,
251 RDMA_MC_JOIN_FLAG_RESERVED,
252};
253
247struct rdma_ucm_join_mcast { 254struct rdma_ucm_join_mcast {
248 __u64 response; /* rdma_ucma_create_id_resp */ 255 __u64 response; /* rdma_ucma_create_id_resp */
249 __u64 uid; 256 __u64 uid;
250 __u32 id; 257 __u32 id;
251 __u16 addr_size; 258 __u16 addr_size;
252 __u16 reserved; 259 __u16 join_flags;
253 struct sockaddr_storage addr; 260 struct sockaddr_storage addr;
254}; 261};
255 262
diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
new file mode 100644
index 000000000000..1de99cfdaf7d
--- /dev/null
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -0,0 +1,144 @@
1/*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef RDMA_USER_RXE_H
34#define RDMA_USER_RXE_H
35
36#include <linux/types.h>
37
38union rxe_gid {
39 __u8 raw[16];
40 struct {
41 __be64 subnet_prefix;
42 __be64 interface_id;
43 } global;
44};
45
46struct rxe_global_route {
47 union rxe_gid dgid;
48 __u32 flow_label;
49 __u8 sgid_index;
50 __u8 hop_limit;
51 __u8 traffic_class;
52};
53
54struct rxe_av {
55 __u8 port_num;
56 __u8 network_type;
57 struct rxe_global_route grh;
58 union {
59 struct sockaddr _sockaddr;
60 struct sockaddr_in _sockaddr_in;
61 struct sockaddr_in6 _sockaddr_in6;
62 } sgid_addr, dgid_addr;
63};
64
65struct rxe_send_wr {
66 __u64 wr_id;
67 __u32 num_sge;
68 __u32 opcode;
69 __u32 send_flags;
70 union {
71 __be32 imm_data;
72 __u32 invalidate_rkey;
73 } ex;
74 union {
75 struct {
76 __u64 remote_addr;
77 __u32 rkey;
78 } rdma;
79 struct {
80 __u64 remote_addr;
81 __u64 compare_add;
82 __u64 swap;
83 __u32 rkey;
84 } atomic;
85 struct {
86 __u32 remote_qpn;
87 __u32 remote_qkey;
88 __u16 pkey_index;
89 } ud;
90 struct {
91 struct ib_mr *mr;
92 __u32 key;
93 int access;
94 } reg;
95 } wr;
96};
97
98struct rxe_sge {
99 __u64 addr;
100 __u32 length;
101 __u32 lkey;
102};
103
104struct mminfo {
105 __u64 offset;
106 __u32 size;
107 __u32 pad;
108};
109
110struct rxe_dma_info {
111 __u32 length;
112 __u32 resid;
113 __u32 cur_sge;
114 __u32 num_sge;
115 __u32 sge_offset;
116 union {
117 __u8 inline_data[0];
118 struct rxe_sge sge[0];
119 };
120};
121
122struct rxe_send_wqe {
123 struct rxe_send_wr wr;
124 struct rxe_av av;
125 __u32 status;
126 __u32 state;
127 __u64 iova;
128 __u32 mask;
129 __u32 first_psn;
130 __u32 last_psn;
131 __u32 ack_length;
132 __u32 ssn;
133 __u32 has_rd_atomic;
134 struct rxe_dma_info dma;
135};
136
137struct rxe_recv_wqe {
138 __u64 wr_id;
139 __u32 num_sge;
140 __u32 padding;
141 struct rxe_dma_info dma;
142};
143
144#endif /* RDMA_USER_RXE_H */